xfs_iops.c 34.1 KB
Newer Older
D
Dave Chinner 已提交
1
// SPDX-License-Identifier: GPL-2.0
L
Linus Torvalds 已提交
2
/*
3 4
 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
 * All Rights Reserved.
L
Linus Torvalds 已提交
5 6 7
 */
#include "xfs.h"
#include "xfs_fs.h"
8
#include "xfs_shared.h"
9 10 11
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
L
Linus Torvalds 已提交
12 13
#include "xfs_mount.h"
#include "xfs_inode.h"
14 15
#include "xfs_acl.h"
#include "xfs_quota.h"
L
Linus Torvalds 已提交
16
#include "xfs_attr.h"
17
#include "xfs_trans.h"
C
Christoph Hellwig 已提交
18
#include "xfs_trace.h"
19
#include "xfs_icache.h"
D
Dave Chinner 已提交
20
#include "xfs_symlink.h"
21
#include "xfs_dir2.h"
22
#include "xfs_iomap.h"
L
Linus Torvalds 已提交
23 24

#include <linux/xattr.h>
25
#include <linux/posix_acl.h>
26
#include <linux/security.h>
27
#include <linux/iversion.h>
L
Linus Torvalds 已提交
28

29 30 31 32 33 34 35 36 37 38 39 40
/*
 * Directories have different lock order w.r.t. mmap_sem compared to regular
 * files. This is due to readdir potentially triggering page faults on a user
 * buffer inside filldir(), and this happens with the ilock on the directory
 * held. For regular files, the lock order is the other way around - the
 * mmap_sem is taken during the page fault, and then we lock the ilock to do
 * block mapping. Hence we need a different class for the directory ilock so
 * that lockdep can tell them apart.
 */
static struct lock_class_key xfs_nondir_ilock_class;
static struct lock_class_key xfs_dir_ilock_class;

41 42 43 44 45
static int
xfs_initxattrs(
	struct inode		*inode,
	const struct xattr	*xattr_array,
	void			*fs_info)
46
{
47 48 49
	const struct xattr	*xattr;
	struct xfs_inode	*ip = XFS_I(inode);
	int			error = 0;
50 51

	for (xattr = xattr_array; xattr->name != NULL; xattr++) {
D
Dave Chinner 已提交
52
		error = xfs_attr_set(ip, xattr->name, xattr->value,
53
				      xattr->value_len, ATTR_SECURE);
54 55 56 57 58 59
		if (error < 0)
			break;
	}
	return error;
}

60 61 62 63 64 65
/*
 * Hook in SELinux.  This is not quite correct yet, what we really need
 * here (as we do for default ACLs) is a mechanism by which creation of
 * these attrs can be journalled at inode creation time (along with the
 * inode, of course, such that log replay can't cause these to be lost).
 */
66

67
STATIC int
68
xfs_init_security(
69
	struct inode	*inode,
70 71
	struct inode	*dir,
	const struct qstr *qstr)
72
{
D
Dave Chinner 已提交
73
	return security_inode_init_security(inode, dir, qstr,
74
					     &xfs_initxattrs, NULL);
75 76
}

77 78
static void
xfs_dentry_to_name(
79 80 81 82 83 84 85 86 87 88
	struct xfs_name	*namep,
	struct dentry	*dentry)
{
	namep->name = dentry->d_name.name;
	namep->len = dentry->d_name.len;
	namep->type = XFS_DIR3_FT_UNKNOWN;
}

static int
xfs_dentry_mode_to_name(
89
	struct xfs_name	*namep,
90 91
	struct dentry	*dentry,
	int		mode)
92 93 94
{
	namep->name = dentry->d_name.name;
	namep->len = dentry->d_name.len;
95
	namep->type = xfs_mode_to_ftype(mode);
96 97 98 99 100

	if (unlikely(namep->type == XFS_DIR3_FT_UNKNOWN))
		return -EFSCORRUPTED;

	return 0;
101 102
}

103
STATIC void
104
xfs_cleanup_inode(
105
	struct inode	*dir,
106
	struct inode	*inode,
107
	struct dentry	*dentry)
108
{
109
	struct xfs_name	teardown;
110 111

	/* Oh, the horror.
112
	 * If we can't add the ACL or we fail in
113
	 * xfs_init_security we must back out.
114 115
	 * ENOSPC can hit here, among other things.
	 */
116
	xfs_dentry_to_name(&teardown, dentry);
117

118
	xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
119 120
}

L
Linus Torvalds 已提交
121
STATIC int
122
xfs_generic_create(
L
Linus Torvalds 已提交
123 124
	struct inode	*dir,
	struct dentry	*dentry,
A
Al Viro 已提交
125
	umode_t		mode,
126 127
	dev_t		rdev,
	bool		tmpfile)	/* unnamed file */
L
Linus Torvalds 已提交
128
{
C
Christoph Hellwig 已提交
129
	struct inode	*inode;
130
	struct xfs_inode *ip = NULL;
131
	struct posix_acl *default_acl, *acl;
132
	struct xfs_name	name;
L
Linus Torvalds 已提交
133 134 135 136 137 138
	int		error;

	/*
	 * Irix uses Missed'em'V split, but doesn't want to see
	 * the upper 5 bits of (14bit) major.
	 */
139 140 141 142 143 144
	if (S_ISCHR(mode) || S_ISBLK(mode)) {
		if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
			return -EINVAL;
	} else {
		rdev = 0;
	}
L
Linus Torvalds 已提交
145

146 147 148
	error = posix_acl_create(dir, &mode, &default_acl, &acl);
	if (error)
		return error;
L
Linus Torvalds 已提交
149

150 151 152 153 154
	/* Verify mode is valid also for tmpfile case */
	error = xfs_dentry_mode_to_name(&name, dentry, mode);
	if (unlikely(error))
		goto out_free_acl;

155 156 157
	if (!tmpfile) {
		error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
	} else {
158
		error = xfs_create_tmpfile(XFS_I(dir), mode, &ip);
159
	}
C
Christoph Hellwig 已提交
160 161
	if (unlikely(error))
		goto out_free_acl;
162

163
	inode = VFS_I(ip);
164

165
	error = xfs_init_security(inode, dir, &dentry->d_name);
C
Christoph Hellwig 已提交
166 167 168
	if (unlikely(error))
		goto out_cleanup_inode;

169
#ifdef CONFIG_XFS_POSIX_ACL
C
Christoph Hellwig 已提交
170
	if (default_acl) {
171
		error = __xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
172
		if (error)
C
Christoph Hellwig 已提交
173
			goto out_cleanup_inode;
L
Linus Torvalds 已提交
174
	}
175
	if (acl) {
176
		error = __xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
177 178 179 180
		if (error)
			goto out_cleanup_inode;
	}
#endif
L
Linus Torvalds 已提交
181

182 183
	xfs_setup_iops(ip);

184 185 186 187 188 189 190 191 192 193
	if (tmpfile) {
		/*
		 * The VFS requires that any inode fed to d_tmpfile must have
		 * nlink == 1 so that it can decrement the nlink in d_tmpfile.
		 * However, we created the temp file with nlink == 0 because
		 * we're not allowed to put an inode with nlink > 0 on the
		 * unlinked list.  Therefore we have to set nlink to 1 so that
		 * d_tmpfile can immediately set it back to zero.
		 */
		set_nlink(inode, 1);
194
		d_tmpfile(dentry, inode);
195
	} else
196 197
		d_instantiate(dentry, inode);

198 199
	xfs_finish_inode_setup(ip);

200 201 202 203 204
 out_free_acl:
	if (default_acl)
		posix_acl_release(default_acl);
	if (acl)
		posix_acl_release(acl);
D
Dave Chinner 已提交
205
	return error;
C
Christoph Hellwig 已提交
206 207

 out_cleanup_inode:
208
	xfs_finish_inode_setup(ip);
209 210
	if (!tmpfile)
		xfs_cleanup_inode(dir, inode, dentry);
211
	xfs_irele(ip);
212
	goto out_free_acl;
L
Linus Torvalds 已提交
213 214
}

215 216 217 218 219 220 221 222 223 224
STATIC int
xfs_vn_mknod(
	struct inode	*dir,
	struct dentry	*dentry,
	umode_t		mode,
	dev_t		rdev)
{
	return xfs_generic_create(dir, dentry, mode, rdev, false);
}

L
Linus Torvalds 已提交
225
STATIC int
226
xfs_vn_create(
L
Linus Torvalds 已提交
227 228
	struct inode	*dir,
	struct dentry	*dentry,
A
Al Viro 已提交
229
	umode_t		mode,
A
Al Viro 已提交
230
	bool		flags)
L
Linus Torvalds 已提交
231
{
232
	return xfs_vn_mknod(dir, dentry, mode, 0);
L
Linus Torvalds 已提交
233 234 235
}

STATIC int
236
xfs_vn_mkdir(
L
Linus Torvalds 已提交
237 238
	struct inode	*dir,
	struct dentry	*dentry,
239
	umode_t		mode)
L
Linus Torvalds 已提交
240
{
241
	return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
L
Linus Torvalds 已提交
242 243 244
}

STATIC struct dentry *
245
xfs_vn_lookup(
L
Linus Torvalds 已提交
246 247
	struct inode	*dir,
	struct dentry	*dentry,
A
Al Viro 已提交
248
	unsigned int flags)
L
Linus Torvalds 已提交
249
{
A
Al Viro 已提交
250
	struct inode *inode;
251
	struct xfs_inode *cip;
252
	struct xfs_name	name;
L
Linus Torvalds 已提交
253 254 255 256 257
	int		error;

	if (dentry->d_name.len >= MAXNAMELEN)
		return ERR_PTR(-ENAMETOOLONG);

258
	xfs_dentry_to_name(&name, dentry);
259
	error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
A
Al Viro 已提交
260 261 262 263 264 265 266
	if (likely(!error))
		inode = VFS_I(cip);
	else if (likely(error == -ENOENT))
		inode = NULL;
	else
		inode = ERR_PTR(error);
	return d_splice_alias(inode, dentry);
L
Linus Torvalds 已提交
267 268
}

269 270 271 272
STATIC struct dentry *
xfs_vn_ci_lookup(
	struct inode	*dir,
	struct dentry	*dentry,
A
Al Viro 已提交
273
	unsigned int flags)
274 275 276 277 278 279 280 281 282 283
{
	struct xfs_inode *ip;
	struct xfs_name	xname;
	struct xfs_name ci_name;
	struct qstr	dname;
	int		error;

	if (dentry->d_name.len >= MAXNAMELEN)
		return ERR_PTR(-ENAMETOOLONG);

284
	xfs_dentry_to_name(&xname, dentry);
285 286
	error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
	if (unlikely(error)) {
D
Dave Chinner 已提交
287 288
		if (unlikely(error != -ENOENT))
			return ERR_PTR(error);
289 290 291 292 293
		/*
		 * call d_add(dentry, NULL) here when d_drop_negative_children
		 * is called in xfs_vn_mknod (ie. allow negative dentries
		 * with CI filesystems).
		 */
294 295 296 297 298
		return NULL;
	}

	/* if exact match, just splice and exit */
	if (!ci_name.name)
299
		return d_splice_alias(VFS_I(ip), dentry);
300 301 302 303

	/* else case-insensitive match... */
	dname.name = ci_name.name;
	dname.len = ci_name.len;
304
	dentry = d_add_ci(dentry, VFS_I(ip), &dname);
305 306 307 308
	kmem_free(ci_name.name);
	return dentry;
}

L
Linus Torvalds 已提交
309
STATIC int
310
xfs_vn_link(
L
Linus Torvalds 已提交
311 312 313 314
	struct dentry	*old_dentry,
	struct inode	*dir,
	struct dentry	*dentry)
{
315
	struct inode	*inode = d_inode(old_dentry);
316
	struct xfs_name	name;
L
Linus Torvalds 已提交
317 318
	int		error;

319 320 321
	error = xfs_dentry_mode_to_name(&name, dentry, inode->i_mode);
	if (unlikely(error))
		return error;
L
Linus Torvalds 已提交
322

323
	error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
324
	if (unlikely(error))
D
Dave Chinner 已提交
325
		return error;
326

A
Al Viro 已提交
327
	ihold(inode);
328 329
	d_instantiate(dentry, inode);
	return 0;
L
Linus Torvalds 已提交
330 331 332
}

STATIC int
333
xfs_vn_unlink(
L
Linus Torvalds 已提交
334 335 336
	struct inode	*dir,
	struct dentry	*dentry)
{
337
	struct xfs_name	name;
L
Linus Torvalds 已提交
338 339
	int		error;

340
	xfs_dentry_to_name(&name, dentry);
L
Linus Torvalds 已提交
341

342
	error = xfs_remove(XFS_I(dir), &name, XFS_I(d_inode(dentry)));
343 344 345 346 347 348 349 350 351 352 353
	if (error)
		return error;

	/*
	 * With unlink, the VFS makes the dentry "negative": no inode,
	 * but still hashed. This is incompatible with case-insensitive
	 * mode, so invalidate (unhash) the dentry in CI-mode.
	 */
	if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
		d_invalidate(dentry);
	return 0;
L
Linus Torvalds 已提交
354 355 356
}

STATIC int
357
xfs_vn_symlink(
L
Linus Torvalds 已提交
358 359 360 361
	struct inode	*dir,
	struct dentry	*dentry,
	const char	*symname)
{
362 363
	struct inode	*inode;
	struct xfs_inode *cip = NULL;
364
	struct xfs_name	name;
L
Linus Torvalds 已提交
365
	int		error;
A
Al Viro 已提交
366
	umode_t		mode;
L
Linus Torvalds 已提交
367

368
	mode = S_IFLNK |
A
Al Viro 已提交
369
		(irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
370 371 372
	error = xfs_dentry_mode_to_name(&name, dentry, mode);
	if (unlikely(error))
		goto out;
L
Linus Torvalds 已提交
373

C
Christoph Hellwig 已提交
374
	error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
375 376 377
	if (unlikely(error))
		goto out;

378
	inode = VFS_I(cip);
379

380
	error = xfs_init_security(inode, dir, &dentry->d_name);
381 382 383
	if (unlikely(error))
		goto out_cleanup_inode;

384 385
	xfs_setup_iops(cip);

386
	d_instantiate(dentry, inode);
387
	xfs_finish_inode_setup(cip);
388 389 390
	return 0;

 out_cleanup_inode:
391
	xfs_finish_inode_setup(cip);
392
	xfs_cleanup_inode(dir, inode, dentry);
393
	xfs_irele(cip);
394
 out:
D
Dave Chinner 已提交
395
	return error;
L
Linus Torvalds 已提交
396 397 398
}

STATIC int
399
xfs_vn_rename(
L
Linus Torvalds 已提交
400 401 402
	struct inode	*odir,
	struct dentry	*odentry,
	struct inode	*ndir,
403 404
	struct dentry	*ndentry,
	unsigned int	flags)
L
Linus Torvalds 已提交
405
{
406
	struct inode	*new_inode = d_inode(ndentry);
407
	int		omode = 0;
408
	int		error;
409 410
	struct xfs_name	oname;
	struct xfs_name	nname;
L
Linus Torvalds 已提交
411

D
Dave Chinner 已提交
412
	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
413 414
		return -EINVAL;

415 416
	/* if we are exchanging files, we need to set i_mode of both files */
	if (flags & RENAME_EXCHANGE)
417
		omode = d_inode(ndentry)->i_mode;
418

419 420 421 422 423 424 425 426
	error = xfs_dentry_mode_to_name(&oname, odentry, omode);
	if (omode && unlikely(error))
		return error;

	error = xfs_dentry_mode_to_name(&nname, ndentry,
					d_inode(odentry)->i_mode);
	if (unlikely(error))
		return error;
427

428
	return xfs_rename(XFS_I(odir), &oname, XFS_I(d_inode(odentry)),
429
			  XFS_I(ndir), &nname,
430
			  new_inode ? XFS_I(new_inode) : NULL, flags);
L
Linus Torvalds 已提交
431 432 433 434 435 436 437
}

/*
 * careful here - this function can get called recursively, so
 * we need to be very careful about how much stack we use.
 * uio is kmalloced for this reason...
 */
438
STATIC const char *
439
xfs_vn_get_link(
L
Linus Torvalds 已提交
440
	struct dentry		*dentry,
441
	struct inode		*inode,
442
	struct delayed_call	*done)
L
Linus Torvalds 已提交
443 444
{
	char			*link;
445
	int			error = -ENOMEM;
L
Linus Torvalds 已提交
446

447 448 449
	if (!dentry)
		return ERR_PTR(-ECHILD);

450
	link = kmalloc(XFS_SYMLINK_MAXLEN+1, GFP_KERNEL);
451 452
	if (!link)
		goto out_err;
L
Linus Torvalds 已提交
453

454
	error = xfs_readlink(XFS_I(d_inode(dentry)), link);
455 456
	if (unlikely(error))
		goto out_kfree;
L
Linus Torvalds 已提交
457

458 459
	set_delayed_call(done, kfree_link, link);
	return link;
460 461 462 463

 out_kfree:
	kfree(link);
 out_err:
464
	return ERR_PTR(error);
L
Linus Torvalds 已提交
465 466
}

467 468 469 470 471 472
STATIC const char *
xfs_vn_get_link_inline(
	struct dentry		*dentry,
	struct inode		*inode,
	struct delayed_call	*done)
{
473 474
	char			*link;

475
	ASSERT(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE);
476 477 478 479 480 481 482 483 484

	/*
	 * The VFS crashes on a NULL pointer, so return -EFSCORRUPTED if
	 * if_data is junk.
	 */
	link = XFS_I(inode)->i_df.if_u1.if_data;
	if (!link)
		return ERR_PTR(-EFSCORRUPTED);
	return link;
485 486
}

487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522
static uint32_t
xfs_stat_blksize(
	struct xfs_inode	*ip)
{
	struct xfs_mount	*mp = ip->i_mount;

	/*
	 * If the file blocks are being allocated from a realtime volume, then
	 * always return the realtime extent size.
	 */
	if (XFS_IS_REALTIME_INODE(ip))
		return xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;

	/*
	 * Allow large block sizes to be reported to userspace programs if the
	 * "largeio" mount option is used.
	 *
	 * If compatibility mode is specified, simply return the basic unit of
	 * caching so that we don't get inefficient read/modify/write I/O from
	 * user apps. Otherwise....
	 *
	 * If the underlying volume is a stripe, then return the stripe width in
	 * bytes as the recommended I/O size. It is not a stripe and we've set a
	 * default buffered I/O size, return that, otherwise return the compat
	 * default.
	 */
	if (!(mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE)) {
		if (mp->m_swidth)
			return mp->m_swidth << mp->m_sb.sb_blocklog;
		if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
			return 1U << max(mp->m_readio_log, mp->m_writeio_log);
	}

	return PAGE_SIZE;
}

L
Linus Torvalds 已提交
523
STATIC int
524
xfs_vn_getattr(
525 526 527 528
	const struct path	*path,
	struct kstat		*stat,
	u32			request_mask,
	unsigned int		query_flags)
L
Linus Torvalds 已提交
529
{
530
	struct inode		*inode = d_inode(path->dentry);
531 532 533
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;

C
Christoph Hellwig 已提交
534
	trace_xfs_getattr(ip);
535 536

	if (XFS_FORCED_SHUTDOWN(mp))
E
Eric Sandeen 已提交
537
		return -EIO;
538 539 540

	stat->size = XFS_ISIZE(ip);
	stat->dev = inode->i_sb->s_dev;
D
Dave Chinner 已提交
541
	stat->mode = inode->i_mode;
542
	stat->nlink = inode->i_nlink;
543 544
	stat->uid = inode->i_uid;
	stat->gid = inode->i_gid;
545 546
	stat->ino = ip->i_ino;
	stat->atime = inode->i_atime;
547 548
	stat->mtime = inode->i_mtime;
	stat->ctime = inode->i_ctime;
549 550 551
	stat->blocks =
		XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);

552 553 554 555 556 557 558 559
	if (ip->i_d.di_version == 3) {
		if (request_mask & STATX_BTIME) {
			stat->result_mask |= STATX_BTIME;
			stat->btime.tv_sec = ip->i_d.di_crtime.t_sec;
			stat->btime.tv_nsec = ip->i_d.di_crtime.t_nsec;
		}
	}

560 561 562 563
	/*
	 * Note: If you add another clause to set an attribute flag, please
	 * update attributes_mask below.
	 */
564 565 566 567 568 569
	if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
		stat->attributes |= STATX_ATTR_IMMUTABLE;
	if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
		stat->attributes |= STATX_ATTR_APPEND;
	if (ip->i_d.di_flags & XFS_DIFLAG_NODUMP)
		stat->attributes |= STATX_ATTR_NODUMP;
570

571 572 573 574
	stat->attributes_mask |= (STATX_ATTR_IMMUTABLE |
				  STATX_ATTR_APPEND |
				  STATX_ATTR_NODUMP);

575 576 577 578
	switch (inode->i_mode & S_IFMT) {
	case S_IFBLK:
	case S_IFCHR:
		stat->blksize = BLKDEV_IOSIZE;
C
Christoph Hellwig 已提交
579
		stat->rdev = inode->i_rdev;
580 581
		break;
	default:
582
		stat->blksize = xfs_stat_blksize(ip);
583 584
		stat->rdev = 0;
		break;
585
	}
586 587

	return 0;
L
Linus Torvalds 已提交
588 589
}

590 591 592 593 594
static void
xfs_setattr_mode(
	struct xfs_inode	*ip,
	struct iattr		*iattr)
{
595 596
	struct inode		*inode = VFS_I(ip);
	umode_t			mode = iattr->ia_mode;
597 598 599 600 601 602 603

	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));

	inode->i_mode &= S_IFMT;
	inode->i_mode |= mode & ~S_IFMT;
}

604
void
C
Christoph Hellwig 已提交
605 606 607 608 609 610 611 612
xfs_setattr_time(
	struct xfs_inode	*ip,
	struct iattr		*iattr)
{
	struct inode		*inode = VFS_I(ip);

	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));

613
	if (iattr->ia_valid & ATTR_ATIME)
C
Christoph Hellwig 已提交
614
		inode->i_atime = iattr->ia_atime;
615
	if (iattr->ia_valid & ATTR_CTIME)
C
Christoph Hellwig 已提交
616
		inode->i_ctime = iattr->ia_ctime;
617
	if (iattr->ia_valid & ATTR_MTIME)
C
Christoph Hellwig 已提交
618 619 620
		inode->i_mtime = iattr->ia_mtime;
}

621 622 623 624 625
static int
xfs_vn_change_ok(
	struct dentry	*dentry,
	struct iattr	*iattr)
{
626
	struct xfs_mount	*mp = XFS_I(d_inode(dentry))->i_mount;
627 628 629 630 631 632 633

	if (mp->m_flags & XFS_MOUNT_RDONLY)
		return -EROFS;

	if (XFS_FORCED_SHUTDOWN(mp))
		return -EIO;

634
	return setattr_prepare(dentry, iattr);
635 636 637 638 639 640
}

/*
 * Set non-size attributes of an inode.
 *
 * Caution: The caller of this function is responsible for calling
641
 * setattr_prepare() or otherwise verifying the change is fine.
642
 */
C
Christoph Hellwig 已提交
643 644 645 646 647 648 649 650 651 652 653
int
xfs_setattr_nonsize(
	struct xfs_inode	*ip,
	struct iattr		*iattr,
	int			flags)
{
	xfs_mount_t		*mp = ip->i_mount;
	struct inode		*inode = VFS_I(ip);
	int			mask = iattr->ia_valid;
	xfs_trans_t		*tp;
	int			error;
654 655
	kuid_t			uid = GLOBAL_ROOT_UID, iuid = GLOBAL_ROOT_UID;
	kgid_t			gid = GLOBAL_ROOT_GID, igid = GLOBAL_ROOT_GID;
C
Christoph Hellwig 已提交
656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675
	struct xfs_dquot	*udqp = NULL, *gdqp = NULL;
	struct xfs_dquot	*olddquot1 = NULL, *olddquot2 = NULL;

	ASSERT((mask & ATTR_SIZE) == 0);

	/*
	 * If disk quotas is on, we make sure that the dquots do exist on disk,
	 * before we start any other transactions. Trying to do this later
	 * is messy. We don't care to take a readlock to look at the ids
	 * in inode here, because we can't hold it across the trans_reserve.
	 * If the IDs do change before we take the ilock, we're covered
	 * because the i_*dquot fields will get updated anyway.
	 */
	if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
		uint	qflags = 0;

		if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
			uid = iattr->ia_uid;
			qflags |= XFS_QMOPT_UQUOTA;
		} else {
676
			uid = inode->i_uid;
C
Christoph Hellwig 已提交
677 678 679 680 681
		}
		if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
			gid = iattr->ia_gid;
			qflags |= XFS_QMOPT_GQUOTA;
		}  else {
682
			gid = inode->i_gid;
C
Christoph Hellwig 已提交
683 684 685 686 687 688 689 690 691
		}

		/*
		 * We take a reference when we initialize udqp and gdqp,
		 * so it is important that we never blindly double trip on
		 * the same variable. See xfs_create() for an example.
		 */
		ASSERT(udqp == NULL);
		ASSERT(gdqp == NULL);
692 693 694 695
		error = xfs_qm_vop_dqalloc(ip, xfs_kuid_to_uid(uid),
					   xfs_kgid_to_gid(gid),
					   xfs_get_projid(ip),
					   qflags, &udqp, &gdqp, NULL);
C
Christoph Hellwig 已提交
696 697 698 699
		if (error)
			return error;
	}

700
	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
C
Christoph Hellwig 已提交
701
	if (error)
702
		goto out_dqrele;
C
Christoph Hellwig 已提交
703 704

	xfs_ilock(ip, XFS_ILOCK_EXCL);
705
	xfs_trans_ijoin(tp, ip, 0);
C
Christoph Hellwig 已提交
706 707 708 709 710 711 712 713 714 715 716

	/*
	 * Change file ownership.  Must be the owner or privileged.
	 */
	if (mask & (ATTR_UID|ATTR_GID)) {
		/*
		 * These IDs could have changed since we last looked at them.
		 * But, we're assured that if the ownership did change
		 * while we didn't have the inode locked, inode's dquot(s)
		 * would have changed also.
		 */
717 718
		iuid = inode->i_uid;
		igid = inode->i_gid;
C
Christoph Hellwig 已提交
719 720 721 722 723 724 725 726
		gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
		uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;

		/*
		 * Do a quota reservation only if uid/gid is actually
		 * going to change.
		 */
		if (XFS_IS_QUOTA_RUNNING(mp) &&
727 728
		    ((XFS_IS_UQUOTA_ON(mp) && !uid_eq(iuid, uid)) ||
		     (XFS_IS_GQUOTA_ON(mp) && !gid_eq(igid, gid)))) {
C
Christoph Hellwig 已提交
729 730
			ASSERT(tp);
			error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
731
						NULL, capable(CAP_FOWNER) ?
C
Christoph Hellwig 已提交
732 733
						XFS_QMOPT_FORCE_RES : 0);
			if (error)	/* out of quota */
734
				goto out_cancel;
C
Christoph Hellwig 已提交
735 736 737 738 739 740 741 742 743 744 745 746 747
		}
	}

	/*
	 * Change file ownership.  Must be the owner or privileged.
	 */
	if (mask & (ATTR_UID|ATTR_GID)) {
		/*
		 * CAP_FSETID overrides the following restrictions:
		 *
		 * The set-user-ID and set-group-ID bits of a file will be
		 * cleared upon successful return from chown()
		 */
D
Dave Chinner 已提交
748
		if ((inode->i_mode & (S_ISUID|S_ISGID)) &&
C
Christoph Hellwig 已提交
749
		    !capable(CAP_FSETID))
D
Dave Chinner 已提交
750
			inode->i_mode &= ~(S_ISUID|S_ISGID);
C
Christoph Hellwig 已提交
751 752 753 754 755

		/*
		 * Change the ownerships and register quota modifications
		 * in the transaction.
		 */
756
		if (!uid_eq(iuid, uid)) {
C
Christoph Hellwig 已提交
757 758 759 760 761 762
			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
				ASSERT(mask & ATTR_UID);
				ASSERT(udqp);
				olddquot1 = xfs_qm_vop_chown(tp, ip,
							&ip->i_udquot, udqp);
			}
763
			ip->i_d.di_uid = xfs_kuid_to_uid(uid);
C
Christoph Hellwig 已提交
764 765
			inode->i_uid = uid;
		}
766
		if (!gid_eq(igid, gid)) {
C
Christoph Hellwig 已提交
767
			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
768 769
				ASSERT(xfs_sb_version_has_pquotino(&mp->m_sb) ||
				       !XFS_IS_PQUOTA_ON(mp));
C
Christoph Hellwig 已提交
770 771 772 773 774
				ASSERT(mask & ATTR_GID);
				ASSERT(gdqp);
				olddquot2 = xfs_qm_vop_chown(tp, ip,
							&ip->i_gdquot, gdqp);
			}
775
			ip->i_d.di_gid = xfs_kgid_to_gid(gid);
C
Christoph Hellwig 已提交
776 777 778 779
			inode->i_gid = gid;
		}
	}

780
	if (mask & ATTR_MODE)
781
		xfs_setattr_mode(ip, iattr);
C
Christoph Hellwig 已提交
782 783
	if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
		xfs_setattr_time(ip, iattr);
C
Christoph Hellwig 已提交
784 785 786

	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);

787
	XFS_STATS_INC(mp, xs_ig_attrchg);
C
Christoph Hellwig 已提交
788 789 790

	if (mp->m_flags & XFS_MOUNT_WSYNC)
		xfs_trans_set_sync(tp);
791
	error = xfs_trans_commit(tp);
C
Christoph Hellwig 已提交
792 793 794 795 796 797 798 799 800 801 802 803

	xfs_iunlock(ip, XFS_ILOCK_EXCL);

	/*
	 * Release any dquot(s) the inode had kept before chown.
	 */
	xfs_qm_dqrele(olddquot1);
	xfs_qm_dqrele(olddquot2);
	xfs_qm_dqrele(udqp);
	xfs_qm_dqrele(gdqp);

	if (error)
E
Eric Sandeen 已提交
804
		return error;
C
Christoph Hellwig 已提交
805 806 807 808 809 810 811 812 813

	/*
	 * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
	 * 	     update.  We could avoid this with linked transactions
	 * 	     and passing down the transaction pointer all the way
	 *	     to attr_set.  No previous user of the generic
	 * 	     Posix ACL code seems to care about this issue either.
	 */
	if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
D
Dave Chinner 已提交
814
		error = posix_acl_chmod(inode, inode->i_mode);
C
Christoph Hellwig 已提交
815
		if (error)
E
Eric Sandeen 已提交
816
			return error;
C
Christoph Hellwig 已提交
817 818 819 820
	}

	return 0;

821
out_cancel:
822
	xfs_trans_cancel(tp);
823
	xfs_iunlock(ip, XFS_ILOCK_EXCL);
824
out_dqrele:
C
Christoph Hellwig 已提交
825 826 827 828 829
	xfs_qm_dqrele(udqp);
	xfs_qm_dqrele(gdqp);
	return error;
}

830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845
int
xfs_vn_setattr_nonsize(
	struct dentry		*dentry,
	struct iattr		*iattr)
{
	struct xfs_inode	*ip = XFS_I(d_inode(dentry));
	int error;

	trace_xfs_setattr(ip);

	error = xfs_vn_change_ok(dentry, iattr);
	if (error)
		return error;
	return xfs_setattr_nonsize(ip, iattr, 0);
}

C
Christoph Hellwig 已提交
846 847
/*
 * Truncate file.  Must have write permission and not be a directory.
848 849
 *
 * Caution: The caller of this function is responsible for calling
850
 * setattr_prepare() or otherwise verifying the change is fine.
C
Christoph Hellwig 已提交
851
 */
D
Darrick J. Wong 已提交
852
STATIC int
C
Christoph Hellwig 已提交
853 854
xfs_setattr_size(
	struct xfs_inode	*ip,
855
	struct iattr		*iattr)
C
Christoph Hellwig 已提交
856 857 858
{
	struct xfs_mount	*mp = ip->i_mount;
	struct inode		*inode = VFS_I(ip);
859
	xfs_off_t		oldsize, newsize;
C
Christoph Hellwig 已提交
860 861
	struct xfs_trans	*tp;
	int			error;
862
	uint			lock_flags = 0;
863
	bool			did_zeroing = false;
C
Christoph Hellwig 已提交
864

865
	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
866
	ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
D
Dave Chinner 已提交
867
	ASSERT(S_ISREG(inode->i_mode));
868 869
	ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
		ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
C
Christoph Hellwig 已提交
870

871
	oldsize = inode->i_size;
872 873
	newsize = iattr->ia_size;

C
Christoph Hellwig 已提交
874 875 876
	/*
	 * Short circuit the truncate case for zero length files.
	 */
877
	if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) {
878
		if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME)))
879
			return 0;
880 881 882 883 884 885

		/*
		 * Use the regular setattr path to update the timestamps.
		 */
		iattr->ia_valid &= ~ATTR_SIZE;
		return xfs_setattr_nonsize(ip, iattr, 0);
C
Christoph Hellwig 已提交
886 887 888 889 890
	}

	/*
	 * Make sure that the dquots are attached to the inode.
	 */
891
	error = xfs_qm_dqattach(ip);
C
Christoph Hellwig 已提交
892
	if (error)
893
		return error;
C
Christoph Hellwig 已提交
894

895 896 897 898 899
	/*
	 * Wait for all direct I/O to complete.
	 */
	inode_dio_wait(inode);

C
Christoph Hellwig 已提交
900
	/*
901 902 903 904 905
	 * File data changes must be complete before we start the transaction to
	 * modify the inode.  This needs to be done before joining the inode to
	 * the transaction because the inode cannot be unlocked once it is a
	 * part of the transaction.
	 *
906 907 908
	 * Start with zeroing any data beyond EOF that we may expose on file
	 * extension, or zeroing out the rest of the block on a downward
	 * truncate.
C
Christoph Hellwig 已提交
909
	 */
910
	if (newsize > oldsize) {
C
Christoph Hellwig 已提交
911 912
		trace_xfs_zero_eof(ip, oldsize, newsize - oldsize);
		error = iomap_zero_range(inode, oldsize, newsize - oldsize,
913
				&did_zeroing, &xfs_buffered_write_iomap_ops);
914
	} else {
915
		error = iomap_truncate_page(inode, newsize, &did_zeroing,
916
				&xfs_buffered_write_iomap_ops);
C
Christoph Hellwig 已提交
917 918
	}

919 920 921
	if (error)
		return error;

922
	/*
923 924 925 926 927 928
	 * We've already locked out new page faults, so now we can safely remove
	 * pages from the page cache knowing they won't get refaulted until we
	 * drop the XFS_MMAP_EXCL lock after the extent manipulations are
	 * complete. The truncate_setsize() call also cleans partial EOF page
	 * PTEs on extending truncates and hence ensures sub-page block size
	 * filesystems are correctly handled, too.
929
	 *
930 931 932
	 * We have to do all the page cache truncate work outside the
	 * transaction context as the "lock" order is page lock->log space
	 * reservation as defined by extent allocation in the writeback path.
933
	 * Hence a truncate can fail with ENOMEM from xfs_trans_alloc(), but
934 935 936 937
	 * having already truncated the in-memory version of the file (i.e. made
	 * user visible changes). There's not much we can do about this, except
	 * to hope that the caller sees ENOMEM and retries the truncate
	 * operation.
938 939 940 941
	 *
	 * And we update in-core i_size and truncate page cache beyond newsize
	 * before writeback the [di_size, newsize] range, so we're guaranteed
	 * not to write stale data past the new EOF on truncate down.
942 943
	 */
	truncate_setsize(inode, newsize);
C
Christoph Hellwig 已提交
944

945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960
	/*
	 * We are going to log the inode size change in this transaction so
	 * any previous writes that are beyond the on disk EOF and the new
	 * EOF that have not been written out need to be written here.  If we
	 * do not write the data out, we expose ourselves to the null files
	 * problem. Note that this includes any block zeroing we did above;
	 * otherwise those blocks may not be zeroed after a crash.
	 */
	if (did_zeroing ||
	    (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) {
		error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
						ip->i_d.di_size, newsize - 1);
		if (error)
			return error;
	}

961
	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
C
Christoph Hellwig 已提交
962
	if (error)
963
		return error;
C
Christoph Hellwig 已提交
964 965 966

	lock_flags |= XFS_ILOCK_EXCL;
	xfs_ilock(ip, XFS_ILOCK_EXCL);
967
	xfs_trans_ijoin(tp, ip, 0);
C
Christoph Hellwig 已提交
968 969 970 971 972 973 974 975 976 977 978

	/*
	 * Only change the c/mtime if we are changing the size or we are
	 * explicitly asked to change it.  This handles the semantic difference
	 * between truncate() and ftruncate() as implemented in the VFS.
	 *
	 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
	 * special case where we need to update the times despite not having
	 * these flags set.  For all other operations the VFS set these flags
	 * explicitly if it wants a timestamp update.
	 */
979 980
	if (newsize != oldsize &&
	    !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {
C
Christoph Hellwig 已提交
981
		iattr->ia_ctime = iattr->ia_mtime =
982
			current_time(inode);
983
		iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
C
Christoph Hellwig 已提交
984 985
	}

986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002
	/*
	 * The first thing we do is set the size to new_size permanently on
	 * disk.  This way we don't have to worry about anyone ever being able
	 * to look at the data being freed even in the face of a crash.
	 * What we're getting around here is the case where we free a block, it
	 * is allocated to another file, it is written to, and then we crash.
	 * If the new data gets written to the file but the log buffers
	 * containing the free and reallocation don't, then we'd end up with
	 * garbage in the blocks being freed.  As long as we make the new size
	 * permanent before actually freeing any blocks it doesn't matter if
	 * they get written to.
	 */
	ip->i_d.di_size = newsize;
	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);

	if (newsize <= oldsize) {
		error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize);
C
Christoph Hellwig 已提交
1003
		if (error)
1004
			goto out_trans_cancel;
C
Christoph Hellwig 已提交
1005 1006 1007 1008 1009 1010 1011 1012 1013

		/*
		 * Truncated "down", so we're removing references to old data
		 * here - if we delay flushing for a long time, we expose
		 * ourselves unduly to the notorious NULL files problem.  So,
		 * we mark this inode and flush it when the file is closed,
		 * and do not wait the usual (long) time for writeout.
		 */
		xfs_iflags_set(ip, XFS_ITRUNCATED);
1014 1015 1016

		/* A truncate down always removes post-EOF blocks. */
		xfs_inode_clear_eofblocks_tag(ip);
C
Christoph Hellwig 已提交
1017 1018
	}

1019
	if (iattr->ia_valid & ATTR_MODE)
1020
		xfs_setattr_mode(ip, iattr);
1021
	if (iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
C
Christoph Hellwig 已提交
1022
		xfs_setattr_time(ip, iattr);
C
Christoph Hellwig 已提交
1023 1024 1025

	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);

1026
	XFS_STATS_INC(mp, xs_ig_attrchg);
C
Christoph Hellwig 已提交
1027 1028 1029 1030

	if (mp->m_flags & XFS_MOUNT_WSYNC)
		xfs_trans_set_sync(tp);

1031
	error = xfs_trans_commit(tp);
C
Christoph Hellwig 已提交
1032 1033 1034 1035 1036 1037
out_unlock:
	if (lock_flags)
		xfs_iunlock(ip, lock_flags);
	return error;

out_trans_cancel:
1038
	xfs_trans_cancel(tp);
C
Christoph Hellwig 已提交
1039 1040 1041
	goto out_unlock;
}

1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057
int
xfs_vn_setattr_size(
	struct dentry		*dentry,
	struct iattr		*iattr)
{
	struct xfs_inode	*ip = XFS_I(d_inode(dentry));
	int error;

	trace_xfs_setattr(ip);

	error = xfs_vn_change_ok(dentry, iattr);
	if (error)
		return error;
	return xfs_setattr_size(ip, iattr);
}

L
Linus Torvalds 已提交
1058
STATIC int
1059
xfs_vn_setattr(
1060 1061
	struct dentry		*dentry,
	struct iattr		*iattr)
L
Linus Torvalds 已提交
1062
{
1063 1064 1065
	int			error;

	if (iattr->ia_valid & ATTR_SIZE) {
1066 1067
		struct inode		*inode = d_inode(dentry);
		struct xfs_inode	*ip = XFS_I(inode);
1068
		uint			iolock;
1069

1070 1071
		xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
		iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
1072

1073
		error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
1074 1075
		if (error) {
			xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
1076
			return error;
1077
		}
1078

1079
		error = xfs_vn_setattr_size(dentry, iattr);
1080
		xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
1081
	} else {
1082
		error = xfs_vn_setattr_nonsize(dentry, iattr);
1083 1084
	}

D
Dave Chinner 已提交
1085
	return error;
L
Linus Torvalds 已提交
1086 1087
}

C
Christoph Hellwig 已提交
1088 1089 1090
STATIC int
xfs_vn_update_time(
	struct inode		*inode,
1091
	struct timespec64	*now,
C
Christoph Hellwig 已提交
1092 1093 1094 1095
	int			flags)
{
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
1096
	int			log_flags = XFS_ILOG_TIMESTAMP;
C
Christoph Hellwig 已提交
1097 1098 1099 1100 1101
	struct xfs_trans	*tp;
	int			error;

	trace_xfs_update_time(ip);

1102 1103 1104 1105 1106 1107 1108 1109 1110
	if (inode->i_sb->s_flags & SB_LAZYTIME) {
		if (!((flags & S_VERSION) &&
		      inode_maybe_inc_iversion(inode, false)))
			return generic_update_time(inode, now, flags);

		/* Capture the iversion update that just occurred */
		log_flags |= XFS_ILOG_CORE;
	}

1111 1112
	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
	if (error)
D
Dave Chinner 已提交
1113
		return error;
C
Christoph Hellwig 已提交
1114 1115

	xfs_ilock(ip, XFS_ILOCK_EXCL);
1116
	if (flags & S_CTIME)
C
Christoph Hellwig 已提交
1117
		inode->i_ctime = *now;
1118
	if (flags & S_MTIME)
C
Christoph Hellwig 已提交
1119
		inode->i_mtime = *now;
1120
	if (flags & S_ATIME)
C
Christoph Hellwig 已提交
1121
		inode->i_atime = *now;
1122

C
Christoph Hellwig 已提交
1123
	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1124
	xfs_trans_log_inode(tp, ip, log_flags);
1125
	return xfs_trans_commit(tp);
C
Christoph Hellwig 已提交
1126 1127
}

E
Eric Sandeen 已提交
1128 1129 1130 1131 1132 1133 1134 1135 1136
STATIC int
xfs_vn_fiemap(
	struct inode		*inode,
	struct fiemap_extent_info *fieinfo,
	u64			start,
	u64			length)
{
	int			error;

1137
	xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED);
1138 1139 1140 1141 1142 1143
	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
		fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR;
		error = iomap_fiemap(inode, fieinfo, start, length,
				&xfs_xattr_iomap_ops);
	} else {
		error = iomap_fiemap(inode, fieinfo, start, length,
1144
				&xfs_read_iomap_ops);
1145
	}
1146
	xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED);
E
Eric Sandeen 已提交
1147

1148
	return error;
E
Eric Sandeen 已提交
1149 1150
}

Z
Zhi Yong Wu 已提交
1151 1152 1153 1154 1155 1156
STATIC int
xfs_vn_tmpfile(
	struct inode	*dir,
	struct dentry	*dentry,
	umode_t		mode)
{
1157
	return xfs_generic_create(dir, dentry, mode, 0, true);
Z
Zhi Yong Wu 已提交
1158 1159
}

1160
static const struct inode_operations xfs_inode_operations = {
1161
	.get_acl		= xfs_get_acl,
1162
	.set_acl		= xfs_set_acl,
1163 1164 1165
	.getattr		= xfs_vn_getattr,
	.setattr		= xfs_vn_setattr,
	.listxattr		= xfs_vn_listxattr,
E
Eric Sandeen 已提交
1166
	.fiemap			= xfs_vn_fiemap,
C
Christoph Hellwig 已提交
1167
	.update_time		= xfs_vn_update_time,
L
Linus Torvalds 已提交
1168 1169
};

1170
static const struct inode_operations xfs_dir_inode_operations = {
1171 1172 1173 1174 1175 1176
	.create			= xfs_vn_create,
	.lookup			= xfs_vn_lookup,
	.link			= xfs_vn_link,
	.unlink			= xfs_vn_unlink,
	.symlink		= xfs_vn_symlink,
	.mkdir			= xfs_vn_mkdir,
1177 1178 1179 1180 1181 1182 1183
	/*
	 * Yes, XFS uses the same method for rmdir and unlink.
	 *
	 * There are some subtile differences deeper in the code,
	 * but we use S_ISDIR to check for those.
	 */
	.rmdir			= xfs_vn_unlink,
1184
	.mknod			= xfs_vn_mknod,
1185
	.rename			= xfs_vn_rename,
1186
	.get_acl		= xfs_get_acl,
1187
	.set_acl		= xfs_set_acl,
1188 1189 1190
	.getattr		= xfs_vn_getattr,
	.setattr		= xfs_vn_setattr,
	.listxattr		= xfs_vn_listxattr,
C
Christoph Hellwig 已提交
1191
	.update_time		= xfs_vn_update_time,
Z
Zhi Yong Wu 已提交
1192
	.tmpfile		= xfs_vn_tmpfile,
L
Linus Torvalds 已提交
1193 1194
};

1195
static const struct inode_operations xfs_dir_ci_inode_operations = {
1196 1197 1198 1199 1200 1201
	.create			= xfs_vn_create,
	.lookup			= xfs_vn_ci_lookup,
	.link			= xfs_vn_link,
	.unlink			= xfs_vn_unlink,
	.symlink		= xfs_vn_symlink,
	.mkdir			= xfs_vn_mkdir,
1202 1203 1204 1205 1206 1207 1208
	/*
	 * Yes, XFS uses the same method for rmdir and unlink.
	 *
	 * There are some subtile differences deeper in the code,
	 * but we use S_ISDIR to check for those.
	 */
	.rmdir			= xfs_vn_unlink,
1209
	.mknod			= xfs_vn_mknod,
1210
	.rename			= xfs_vn_rename,
1211
	.get_acl		= xfs_get_acl,
1212
	.set_acl		= xfs_set_acl,
1213 1214 1215
	.getattr		= xfs_vn_getattr,
	.setattr		= xfs_vn_setattr,
	.listxattr		= xfs_vn_listxattr,
C
Christoph Hellwig 已提交
1216
	.update_time		= xfs_vn_update_time,
Z
Zhi Yong Wu 已提交
1217
	.tmpfile		= xfs_vn_tmpfile,
1218 1219
};

1220
static const struct inode_operations xfs_symlink_inode_operations = {
1221
	.get_link		= xfs_vn_get_link,
1222 1223 1224
	.getattr		= xfs_vn_getattr,
	.setattr		= xfs_vn_setattr,
	.listxattr		= xfs_vn_listxattr,
C
Christoph Hellwig 已提交
1225
	.update_time		= xfs_vn_update_time,
L
Linus Torvalds 已提交
1226
};
1227

1228 1229 1230 1231 1232 1233 1234 1235
static const struct inode_operations xfs_inline_symlink_inode_operations = {
	.get_link		= xfs_vn_get_link_inline,
	.getattr		= xfs_vn_getattr,
	.setattr		= xfs_vn_setattr,
	.listxattr		= xfs_vn_listxattr,
	.update_time		= xfs_vn_update_time,
};

1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256
/* Figure out if this file actually supports DAX. */
static bool
xfs_inode_supports_dax(
	struct xfs_inode	*ip)
{
	struct xfs_mount	*mp = ip->i_mount;

	/* Only supported on non-reflinked files. */
	if (!S_ISREG(VFS_I(ip)->i_mode) || xfs_is_reflink_inode(ip))
		return false;

	/* DAX mount option or DAX iflag must be set. */
	if (!(mp->m_flags & XFS_MOUNT_DAX) &&
	    !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX))
		return false;

	/* Block size must match page size */
	if (mp->m_sb.sb_blocksize != PAGE_SIZE)
		return false;

	/* Device has to support DAX too. */
1257
	return xfs_inode_buftarg(ip)->bt_daxdev != NULL;
1258 1259
}

1260 1261 1262 1263 1264
STATIC void
xfs_diflags_to_iflags(
	struct inode		*inode,
	struct xfs_inode	*ip)
{
D
Dave Chinner 已提交
1265 1266 1267 1268 1269 1270
	uint16_t		flags = ip->i_d.di_flags;

	inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC |
			    S_NOATIME | S_DAX);

	if (flags & XFS_DIFLAG_IMMUTABLE)
1271
		inode->i_flags |= S_IMMUTABLE;
D
Dave Chinner 已提交
1272
	if (flags & XFS_DIFLAG_APPEND)
1273
		inode->i_flags |= S_APPEND;
D
Dave Chinner 已提交
1274
	if (flags & XFS_DIFLAG_SYNC)
1275
		inode->i_flags |= S_SYNC;
D
Dave Chinner 已提交
1276
	if (flags & XFS_DIFLAG_NOATIME)
1277
		inode->i_flags |= S_NOATIME;
1278
	if (xfs_inode_supports_dax(ip))
D
Dave Chinner 已提交
1279
		inode->i_flags |= S_DAX;
1280 1281 1282
}

/*
1283
 * Initialize the Linux inode.
1284
 *
1285 1286 1287 1288
 * When reading existing inodes from disk this is called directly from xfs_iget,
 * when creating a new inode it is called from xfs_ialloc after setting up the
 * inode. These callers have different criteria for clearing XFS_INEW, so leave
 * it up to the caller to deal with unlocking the inode appropriately.
1289 1290 1291 1292 1293
 */
void
xfs_setup_inode(
	struct xfs_inode	*ip)
{
1294
	struct inode		*inode = &ip->i_vnode;
1295
	gfp_t			gfp_mask;
1296 1297

	inode->i_ino = ip->i_ino;
C
Christoph Hellwig 已提交
1298
	inode->i_state = I_NEW;
1299 1300

	inode_sb_list_add(inode);
C
Christoph Hellwig 已提交
1301
	/* make the inode look hashed for the writeback code */
A
Al Viro 已提交
1302
	inode_fake_hash(inode);
1303

1304 1305
	inode->i_uid    = xfs_uid_to_kuid(ip->i_d.di_uid);
	inode->i_gid    = xfs_gid_to_kgid(ip->i_d.di_gid);
1306 1307 1308 1309

	i_size_write(inode, ip->i_d.di_size);
	xfs_diflags_to_iflags(inode, ip);

1310
	if (S_ISDIR(inode->i_mode)) {
1311 1312 1313 1314 1315 1316 1317 1318
		/*
		 * We set the i_rwsem class here to avoid potential races with
		 * lockdep_annotate_inode_mutex_key() reinitialising the lock
		 * after a filehandle lookup has already found the inode in
		 * cache before it has been unlocked via unlock_new_inode().
		 */
		lockdep_set_class(&inode->i_rwsem,
				  &inode->i_sb->s_type->i_mutex_dir_key);
1319
		lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);
1320
		ip->d_ops = ip->i_mount->m_dir_inode_ops;
1321 1322 1323
	} else {
		ip->d_ops = ip->i_mount->m_nondir_inode_ops;
		lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
1324 1325
	}

1326 1327 1328 1329 1330 1331 1332 1333
	/*
	 * Ensure all page cache allocations are done from GFP_NOFS context to
	 * prevent direct reclaim recursion back into the filesystem and blowing
	 * stacks or deadlocking.
	 */
	gfp_mask = mapping_gfp_mask(inode->i_mapping);
	mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS)));

1334 1335 1336 1337 1338 1339
	/*
	 * If there is no attribute fork no ACL can exist on this inode,
	 * and it can't have any file capabilities attached to it either.
	 */
	if (!XFS_IFORK_Q(ip)) {
		inode_has_no_xattr(inode);
1340
		cache_no_acl(inode);
1341
	}
1342
}
1343 1344 1345 1346 1347 1348 1349

void
xfs_setup_iops(
	struct xfs_inode	*ip)
{
	struct inode		*inode = &ip->i_vnode;

1350 1351 1352 1353
	switch (inode->i_mode & S_IFMT) {
	case S_IFREG:
		inode->i_op = &xfs_inode_operations;
		inode->i_fop = &xfs_file_operations;
D
Dan Williams 已提交
1354 1355 1356 1357
		if (IS_DAX(inode))
			inode->i_mapping->a_ops = &xfs_dax_aops;
		else
			inode->i_mapping->a_ops = &xfs_address_space_operations;
1358 1359 1360 1361 1362 1363 1364 1365 1366
		break;
	case S_IFDIR:
		if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
			inode->i_op = &xfs_dir_ci_inode_operations;
		else
			inode->i_op = &xfs_dir_inode_operations;
		inode->i_fop = &xfs_dir_file_operations;
		break;
	case S_IFLNK:
1367 1368 1369 1370
		if (ip->i_df.if_flags & XFS_IFINLINE)
			inode->i_op = &xfs_inline_symlink_inode_operations;
		else
			inode->i_op = &xfs_symlink_inode_operations;
1371 1372 1373 1374 1375 1376 1377
		break;
	default:
		inode->i_op = &xfs_inode_operations;
		init_special_inode(inode, inode->i_mode, inode->i_rdev);
		break;
	}
}