xfs_inode_item.c 22.8 KB
Newer Older
D
Dave Chinner 已提交
1
// SPDX-License-Identifier: GPL-2.0
L
Linus Torvalds 已提交
2
/*
3 4
 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
 * All Rights Reserved.
L
Linus Torvalds 已提交
5 6
 */
#include "xfs.h"
7
#include "xfs_fs.h"
8
#include "xfs_shared.h"
9
#include "xfs_format.h"
10 11
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
L
Linus Torvalds 已提交
12 13
#include "xfs_mount.h"
#include "xfs_inode.h"
14
#include "xfs_trans.h"
15
#include "xfs_inode_item.h"
C
Christoph Hellwig 已提交
16
#include "xfs_trace.h"
17
#include "xfs_trans_priv.h"
18
#include "xfs_buf_item.h"
C
Christoph Hellwig 已提交
19
#include "xfs_log.h"
20
#include "xfs_error.h"
L
Linus Torvalds 已提交
21

J
Jeff Layton 已提交
22
#include <linux/iversion.h>
L
Linus Torvalds 已提交
23 24 25

kmem_zone_t	*xfs_ili_zone;		/* inode log item zone */

26 27 28 29 30
static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
{
	return container_of(lip, struct xfs_inode_log_item, ili_item);
}

31
STATIC void
32 33
xfs_inode_item_data_fork_size(
	struct xfs_inode_log_item *iip,
34 35
	int			*nvecs,
	int			*nbytes)
L
Linus Torvalds 已提交
36
{
37
	struct xfs_inode	*ip = iip->ili_inode;
38

39
	switch (ip->i_df.if_format) {
L
Linus Torvalds 已提交
40
	case XFS_DINODE_FMT_EXTENTS:
41
		if ((iip->ili_fields & XFS_ILOG_DEXT) &&
42
		    ip->i_df.if_nextents > 0 &&
43 44 45 46 47
		    ip->i_df.if_bytes > 0) {
			/* worst case, doesn't subtract delalloc extents */
			*nbytes += XFS_IFORK_DSIZE(ip);
			*nvecs += 1;
		}
L
Linus Torvalds 已提交
48 49
		break;
	case XFS_DINODE_FMT_BTREE:
50
		if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
51 52 53 54
		    ip->i_df.if_broot_bytes > 0) {
			*nbytes += ip->i_df.if_broot_bytes;
			*nvecs += 1;
		}
L
Linus Torvalds 已提交
55 56
		break;
	case XFS_DINODE_FMT_LOCAL:
57
		if ((iip->ili_fields & XFS_ILOG_DDATA) &&
58 59 60 61
		    ip->i_df.if_bytes > 0) {
			*nbytes += roundup(ip->i_df.if_bytes, 4);
			*nvecs += 1;
		}
L
Linus Torvalds 已提交
62 63 64 65 66 67 68 69
		break;

	case XFS_DINODE_FMT_DEV:
		break;
	default:
		ASSERT(0);
		break;
	}
70
}
L
Linus Torvalds 已提交
71

72 73 74 75 76 77 78
STATIC void
xfs_inode_item_attr_fork_size(
	struct xfs_inode_log_item *iip,
	int			*nvecs,
	int			*nbytes)
{
	struct xfs_inode	*ip = iip->ili_inode;
L
Linus Torvalds 已提交
79

80
	switch (ip->i_afp->if_format) {
L
Linus Torvalds 已提交
81
	case XFS_DINODE_FMT_EXTENTS:
82
		if ((iip->ili_fields & XFS_ILOG_AEXT) &&
83
		    ip->i_afp->if_nextents > 0 &&
84 85 86 87 88
		    ip->i_afp->if_bytes > 0) {
			/* worst case, doesn't subtract unused space */
			*nbytes += XFS_IFORK_ASIZE(ip);
			*nvecs += 1;
		}
L
Linus Torvalds 已提交
89 90
		break;
	case XFS_DINODE_FMT_BTREE:
91
		if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
92 93 94 95
		    ip->i_afp->if_broot_bytes > 0) {
			*nbytes += ip->i_afp->if_broot_bytes;
			*nvecs += 1;
		}
L
Linus Torvalds 已提交
96 97
		break;
	case XFS_DINODE_FMT_LOCAL:
98
		if ((iip->ili_fields & XFS_ILOG_ADATA) &&
99 100 101 102
		    ip->i_afp->if_bytes > 0) {
			*nbytes += roundup(ip->i_afp->if_bytes, 4);
			*nvecs += 1;
		}
L
Linus Torvalds 已提交
103 104 105 106 107 108 109
		break;
	default:
		ASSERT(0);
		break;
	}
}

110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
/*
 * This returns the number of iovecs needed to log the given inode item.
 *
 * We need one iovec for the inode log format structure, one for the
 * inode core, and possibly one for the inode data/extents/b-tree root
 * and one for the inode attribute data/extents/b-tree root.
 */
STATIC void
xfs_inode_item_size(
	struct xfs_log_item	*lip,
	int			*nvecs,
	int			*nbytes)
{
	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
	struct xfs_inode	*ip = iip->ili_inode;

	*nvecs += 2;
	*nbytes += sizeof(struct xfs_inode_log_format) +
128
		   xfs_log_dinode_size(ip->i_mount);
129 130 131 132 133 134

	xfs_inode_item_data_fork_size(iip, nvecs, nbytes);
	if (XFS_IFORK_Q(ip))
		xfs_inode_item_attr_fork_size(iip, nvecs, nbytes);
}

C
Christoph Hellwig 已提交
135
STATIC void
136 137
xfs_inode_item_format_data_fork(
	struct xfs_inode_log_item *iip,
138 139 140
	struct xfs_inode_log_format *ilf,
	struct xfs_log_vec	*lv,
	struct xfs_log_iovec	**vecp)
L
Linus Torvalds 已提交
141
{
142
	struct xfs_inode	*ip = iip->ili_inode;
L
Linus Torvalds 已提交
143 144
	size_t			data_bytes;

145
	switch (ip->i_df.if_format) {
L
Linus Torvalds 已提交
146
	case XFS_DINODE_FMT_EXTENTS:
147
		iip->ili_fields &=
148
			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEV);
149

150
		if ((iip->ili_fields & XFS_ILOG_DEXT) &&
151
		    ip->i_df.if_nextents > 0 &&
152
		    ip->i_df.if_bytes > 0) {
153 154
			struct xfs_bmbt_rec *p;

155
			ASSERT(xfs_iext_count(&ip->i_df) > 0);
156 157 158 159 160 161 162 163

			p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT);
			data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK);
			xlog_finish_iovec(lv, *vecp, data_bytes);

			ASSERT(data_bytes <= ip->i_df.if_bytes);

			ilf->ilf_dsize = data_bytes;
164
			ilf->ilf_size++;
165
		} else {
166
			iip->ili_fields &= ~XFS_ILOG_DEXT;
L
Linus Torvalds 已提交
167 168 169
		}
		break;
	case XFS_DINODE_FMT_BTREE:
170
		iip->ili_fields &=
171
			~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | XFS_ILOG_DEV);
172

173
		if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
174
		    ip->i_df.if_broot_bytes > 0) {
L
Linus Torvalds 已提交
175
			ASSERT(ip->i_df.if_broot != NULL);
176
			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IBROOT,
C
Christoph Hellwig 已提交
177 178
					ip->i_df.if_broot,
					ip->i_df.if_broot_bytes);
179 180
			ilf->ilf_dsize = ip->i_df.if_broot_bytes;
			ilf->ilf_size++;
181
		} else {
182
			ASSERT(!(iip->ili_fields &
183
				 XFS_ILOG_DBROOT));
184
			iip->ili_fields &= ~XFS_ILOG_DBROOT;
L
Linus Torvalds 已提交
185 186 187
		}
		break;
	case XFS_DINODE_FMT_LOCAL:
188
		iip->ili_fields &=
189
			~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | XFS_ILOG_DEV);
190
		if ((iip->ili_fields & XFS_ILOG_DDATA) &&
191
		    ip->i_df.if_bytes > 0) {
L
Linus Torvalds 已提交
192 193
			/*
			 * Round i_bytes up to a word boundary.
194
			 * The underlying memory is guaranteed
L
Linus Torvalds 已提交
195 196 197
			 * to be there by xfs_idata_realloc().
			 */
			data_bytes = roundup(ip->i_df.if_bytes, 4);
C
Christoph Hellwig 已提交
198 199
			ASSERT(ip->i_df.if_u1.if_data != NULL);
			ASSERT(ip->i_d.di_size > 0);
200
			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL,
C
Christoph Hellwig 已提交
201
					ip->i_df.if_u1.if_data, data_bytes);
202 203
			ilf->ilf_dsize = (unsigned)data_bytes;
			ilf->ilf_size++;
204
		} else {
205
			iip->ili_fields &= ~XFS_ILOG_DDATA;
L
Linus Torvalds 已提交
206 207 208
		}
		break;
	case XFS_DINODE_FMT_DEV:
209
		iip->ili_fields &=
210
			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEXT);
211
		if (iip->ili_fields & XFS_ILOG_DEV)
C
Christoph Hellwig 已提交
212
			ilf->ilf_u.ilfu_rdev = sysv_encode_dev(VFS_I(ip)->i_rdev);
L
Linus Torvalds 已提交
213 214 215 216 217
		break;
	default:
		ASSERT(0);
		break;
	}
218 219
}

C
Christoph Hellwig 已提交
220
STATIC void
221 222
xfs_inode_item_format_attr_fork(
	struct xfs_inode_log_item *iip,
223 224 225
	struct xfs_inode_log_format *ilf,
	struct xfs_log_vec	*lv,
	struct xfs_log_iovec	**vecp)
226 227 228
{
	struct xfs_inode	*ip = iip->ili_inode;
	size_t			data_bytes;
L
Linus Torvalds 已提交
229

230
	switch (ip->i_afp->if_format) {
L
Linus Torvalds 已提交
231
	case XFS_DINODE_FMT_EXTENTS:
232
		iip->ili_fields &=
233 234
			~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);

235
		if ((iip->ili_fields & XFS_ILOG_AEXT) &&
236
		    ip->i_afp->if_nextents > 0 &&
237
		    ip->i_afp->if_bytes > 0) {
238 239
			struct xfs_bmbt_rec *p;

240
			ASSERT(xfs_iext_count(ip->i_afp) ==
241
				ip->i_afp->if_nextents);
242 243 244 245 246 247

			p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT);
			data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK);
			xlog_finish_iovec(lv, *vecp, data_bytes);

			ilf->ilf_asize = data_bytes;
248
			ilf->ilf_size++;
249
		} else {
250
			iip->ili_fields &= ~XFS_ILOG_AEXT;
L
Linus Torvalds 已提交
251 252 253
		}
		break;
	case XFS_DINODE_FMT_BTREE:
254
		iip->ili_fields &=
255 256
			~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);

257
		if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
258
		    ip->i_afp->if_broot_bytes > 0) {
L
Linus Torvalds 已提交
259
			ASSERT(ip->i_afp->if_broot != NULL);
260

261
			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_BROOT,
C
Christoph Hellwig 已提交
262 263
					ip->i_afp->if_broot,
					ip->i_afp->if_broot_bytes);
264 265
			ilf->ilf_asize = ip->i_afp->if_broot_bytes;
			ilf->ilf_size++;
266
		} else {
267
			iip->ili_fields &= ~XFS_ILOG_ABROOT;
L
Linus Torvalds 已提交
268 269 270
		}
		break;
	case XFS_DINODE_FMT_LOCAL:
271
		iip->ili_fields &=
272 273
			~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);

274
		if ((iip->ili_fields & XFS_ILOG_ADATA) &&
275
		    ip->i_afp->if_bytes > 0) {
L
Linus Torvalds 已提交
276 277
			/*
			 * Round i_bytes up to a word boundary.
278
			 * The underlying memory is guaranteed
L
Linus Torvalds 已提交
279 280 281
			 * to be there by xfs_idata_realloc().
			 */
			data_bytes = roundup(ip->i_afp->if_bytes, 4);
C
Christoph Hellwig 已提交
282
			ASSERT(ip->i_afp->if_u1.if_data != NULL);
283
			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL,
C
Christoph Hellwig 已提交
284 285
					ip->i_afp->if_u1.if_data,
					data_bytes);
286 287
			ilf->ilf_asize = (unsigned)data_bytes;
			ilf->ilf_size++;
288
		} else {
289
			iip->ili_fields &= ~XFS_ILOG_ADATA;
L
Linus Torvalds 已提交
290 291 292 293 294 295
		}
		break;
	default:
		ASSERT(0);
		break;
	}
296 297
}

298 299 300 301 302 303
/*
 * Convert an incore timestamp to a log timestamp.  Note that the log format
 * specifies host endian format!
 */
static inline xfs_ictimestamp_t
xfs_inode_to_log_dinode_ts(
304
	struct xfs_inode		*ip,
305 306 307 308 309
	const struct timespec64		tv)
{
	struct xfs_legacy_ictimestamp	*lits;
	xfs_ictimestamp_t		its;

310 311 312
	if (xfs_inode_has_bigtime(ip))
		return xfs_inode_encode_bigtime(tv);

313 314 315 316 317 318 319
	lits = (struct xfs_legacy_ictimestamp *)&its;
	lits->t_sec = tv.tv_sec;
	lits->t_nsec = tv.tv_nsec;

	return its;
}

320
static void
321 322
xfs_inode_to_log_dinode(
	struct xfs_inode	*ip,
323 324
	struct xfs_log_dinode	*to,
	xfs_lsn_t		lsn)
325
{
326 327 328
	struct xfs_icdinode	*from = &ip->i_d;
	struct inode		*inode = VFS_I(ip);

329
	to->di_magic = XFS_DINODE_MAGIC;
330
	to->di_format = xfs_ifork_format(&ip->i_df);
331 332
	to->di_uid = i_uid_read(inode);
	to->di_gid = i_gid_read(inode);
333 334
	to->di_projid_lo = from->di_projid & 0xffff;
	to->di_projid_hi = from->di_projid >> 16;
335

336
	memset(to->di_pad, 0, sizeof(to->di_pad));
337
	memset(to->di_pad3, 0, sizeof(to->di_pad3));
338 339 340
	to->di_atime = xfs_inode_to_log_dinode_ts(ip, inode->i_atime);
	to->di_mtime = xfs_inode_to_log_dinode_ts(ip, inode->i_mtime);
	to->di_ctime = xfs_inode_to_log_dinode_ts(ip, inode->i_ctime);
341
	to->di_nlink = inode->i_nlink;
342
	to->di_gen = inode->i_generation;
D
Dave Chinner 已提交
343
	to->di_mode = inode->i_mode;
344 345 346 347

	to->di_size = from->di_size;
	to->di_nblocks = from->di_nblocks;
	to->di_extsize = from->di_extsize;
348 349
	to->di_nextents = xfs_ifork_nextents(&ip->i_df);
	to->di_anextents = xfs_ifork_nextents(ip->i_afp);
350
	to->di_forkoff = from->di_forkoff;
351
	to->di_aformat = xfs_ifork_format(ip->i_afp);
352 353 354 355
	to->di_dmevmask = from->di_dmevmask;
	to->di_dmstate = from->di_dmstate;
	to->di_flags = from->di_flags;

356 357 358
	/* log a dummy value to ensure log structure is fully initialised */
	to->di_next_unlinked = NULLAGINO;

359 360
	if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
		to->di_version = 3;
J
Jeff Layton 已提交
361
		to->di_changecount = inode_peek_iversion(inode);
362
		to->di_crtime = xfs_inode_to_log_dinode_ts(ip, from->di_crtime);
363
		to->di_flags2 = from->di_flags2;
364
		to->di_cowextsize = from->di_cowextsize;
365 366 367 368
		to->di_ino = ip->i_ino;
		to->di_lsn = lsn;
		memset(to->di_pad2, 0, sizeof(to->di_pad2));
		uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
369 370
		to->di_flushiter = 0;
	} else {
371
		to->di_version = 2;
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
		to->di_flushiter = from->di_flushiter;
	}
}

/*
 * Format the inode core. Current timestamp data is only in the VFS inode
 * fields, so we need to grab them from there. Hence rather than just copying
 * the XFS inode core structure, format the fields directly into the iovec.
 */
static void
xfs_inode_item_format_core(
	struct xfs_inode	*ip,
	struct xfs_log_vec	*lv,
	struct xfs_log_iovec	**vecp)
{
	struct xfs_log_dinode	*dic;

	dic = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_ICORE);
390
	xfs_inode_to_log_dinode(ip, dic, ip->i_itemp->ili_item.li_lsn);
391
	xlog_finish_iovec(lv, *vecp, xfs_log_dinode_size(ip->i_mount));
392 393
}

394 395 396 397 398 399
/*
 * This is called to fill in the vector of log iovecs for the given inode
 * log item.  It fills the first item with an inode log format structure,
 * the second with the on-disk inode structure, and a possible third and/or
 * fourth with the inode data/extents/b-tree root and inode attributes
 * data/extents/b-tree root.
400 401 402 403 404
 *
 * Note: Always use the 64 bit inode log format structure so we don't
 * leave an uninitialised hole in the format item on 64 bit systems. Log
 * recovery on 32 bit systems handles this just fine, so there's no reason
 * for not using an initialising the properly padded structure all the time.
405 406 407 408
 */
STATIC void
xfs_inode_item_format(
	struct xfs_log_item	*lip,
409
	struct xfs_log_vec	*lv)
410 411 412
{
	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
	struct xfs_inode	*ip = iip->ili_inode;
413
	struct xfs_log_iovec	*vecp = NULL;
414
	struct xfs_inode_log_format *ilf;
415

416 417 418 419 420 421 422 423
	ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT);
	ilf->ilf_type = XFS_LI_INODE;
	ilf->ilf_ino = ip->i_ino;
	ilf->ilf_blkno = ip->i_imap.im_blkno;
	ilf->ilf_len = ip->i_imap.im_len;
	ilf->ilf_boffset = ip->i_imap.im_boffset;
	ilf->ilf_fields = XFS_ILOG_CORE;
	ilf->ilf_size = 2; /* format + core */
424 425 426 427 428 429 430 431

	/*
	 * make sure we don't leak uninitialised data into the log in the case
	 * when we don't log every field in the inode.
	 */
	ilf->ilf_dsize = 0;
	ilf->ilf_asize = 0;
	ilf->ilf_pad = 0;
432
	memset(&ilf->ilf_u, 0, sizeof(ilf->ilf_u));
433 434

	xlog_finish_iovec(lv, vecp, sizeof(*ilf));
435

436
	xfs_inode_item_format_core(ip, lv, &vecp);
437
	xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp);
438
	if (XFS_IFORK_Q(ip)) {
439
		xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp);
440 441 442 443 444
	} else {
		iip->ili_fields &=
			~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
	}

445 446
	/* update the format with the exact fields we actually logged */
	ilf->ilf_fields |= (iip->ili_fields & ~XFS_ILOG_TIMESTAMP);
L
Linus Torvalds 已提交
447 448 449 450
}

/*
 * This is called to pin the inode associated with the inode log
451
 * item in memory so it cannot be written out.
L
Linus Torvalds 已提交
452 453 454
 */
STATIC void
xfs_inode_item_pin(
455
	struct xfs_log_item	*lip)
L
Linus Torvalds 已提交
456
{
457
	struct xfs_inode	*ip = INODE_ITEM(lip)->ili_inode;
458

459
	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
460
	ASSERT(lip->li_buf);
461 462 463

	trace_xfs_inode_pin(ip, _RET_IP_);
	atomic_inc(&ip->i_pincount);
L
Linus Torvalds 已提交
464 465 466 467 468 469
}


/*
 * This is called to unpin the inode associated with the inode log
 * item which was previously pinned with a call to xfs_inode_item_pin().
470 471
 *
 * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
472 473 474 475 476 477
 *
 * Note that unpin can race with inode cluster buffer freeing marking the buffer
 * stale. In that case, flush completions are run from the buffer unpin call,
 * which may happen before the inode is unpinned. If we lose the race, there
 * will be no buffer attached to the log item, but the inode will be marked
 * XFS_ISTALE.
L
Linus Torvalds 已提交
478 479 480
 */
STATIC void
xfs_inode_item_unpin(
481
	struct xfs_log_item	*lip,
482
	int			remove)
L
Linus Torvalds 已提交
483
{
484
	struct xfs_inode	*ip = INODE_ITEM(lip)->ili_inode;
485

486
	trace_xfs_inode_unpin(ip, _RET_IP_);
487
	ASSERT(lip->li_buf || xfs_iflags_test(ip, XFS_ISTALE));
488 489
	ASSERT(atomic_read(&ip->i_pincount) > 0);
	if (atomic_dec_and_test(&ip->i_pincount))
490
		wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
L
Linus Torvalds 已提交
491 492 493
}

STATIC uint
494 495 496
xfs_inode_item_push(
	struct xfs_log_item	*lip,
	struct list_head	*buffer_list)
497 498
		__releases(&lip->li_ailp->ail_lock)
		__acquires(&lip->li_ailp->ail_lock)
L
Linus Torvalds 已提交
499
{
500 501
	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
	struct xfs_inode	*ip = iip->ili_inode;
502
	struct xfs_buf		*bp = lip->li_buf;
503 504
	uint			rval = XFS_ITEM_SUCCESS;
	int			error;
L
Linus Torvalds 已提交
505

506 507 508 509
	ASSERT(iip->ili_item.li_buf);

	if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp) ||
	    (ip->i_flags & XFS_ISTALE))
L
Linus Torvalds 已提交
510 511
		return XFS_ITEM_PINNED;

512
	if (xfs_iflags_test(ip, XFS_IFLUSHING))
513
		return XFS_ITEM_FLUSHING;
L
Linus Torvalds 已提交
514

515 516
	if (!xfs_buf_trylock(bp))
		return XFS_ITEM_LOCKED;
517

518
	spin_unlock(&lip->li_ailp->ail_lock);
519

520
	/*
521 522 523 524
	 * We need to hold a reference for flushing the cluster buffer as it may
	 * fail the buffer without IO submission. In which case, we better get a
	 * reference for that completion because otherwise we don't get a
	 * reference for IO until we queue the buffer for delwri submission.
525
	 */
526
	xfs_buf_hold(bp);
527
	error = xfs_iflush_cluster(bp);
528 529 530 531
	if (!error) {
		if (!xfs_buf_delwri_queue(bp, buffer_list))
			rval = XFS_ITEM_FLUSHING;
		xfs_buf_relse(bp);
532
	} else {
533 534 535 536 537 538
		/*
		 * Release the buffer if we were unable to flush anything. On
		 * any other error, the buffer has already been released.
		 */
		if (error == -EAGAIN)
			xfs_buf_relse(bp);
539
		rval = XFS_ITEM_LOCKED;
540
	}
541

542
	spin_lock(&lip->li_ailp->ail_lock);
543
	return rval;
L
Linus Torvalds 已提交
544 545 546 547 548 549
}

/*
 * Unlock the inode associated with the inode log item.
 */
STATIC void
C
Christoph Hellwig 已提交
550
xfs_inode_item_release(
551
	struct xfs_log_item	*lip)
L
Linus Torvalds 已提交
552
{
553 554
	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
	struct xfs_inode	*ip = iip->ili_inode;
555
	unsigned short		lock_flags;
L
Linus Torvalds 已提交
556

C
Christoph Hellwig 已提交
557 558
	ASSERT(ip->i_itemp != NULL);
	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
L
Linus Torvalds 已提交
559

560 561
	lock_flags = iip->ili_lock_flags;
	iip->ili_lock_flags = 0;
562
	if (lock_flags)
C
Christoph Hellwig 已提交
563
		xfs_iunlock(ip, lock_flags);
L
Linus Torvalds 已提交
564 565 566
}

/*
567 568 569 570 571 572 573 574 575 576 577
 * This is called to find out where the oldest active copy of the inode log
 * item in the on disk log resides now that the last log write of it completed
 * at the given lsn.  Since we always re-log all dirty data in an inode, the
 * latest copy in the on disk log is the only one that matters.  Therefore,
 * simply return the given lsn.
 *
 * If the inode has been marked stale because the cluster is being freed, we
 * don't want to (re-)insert this inode into the AIL. There is a race condition
 * where the cluster buffer may be unpinned before the inode is inserted into
 * the AIL during transaction committed processing. If the buffer is unpinned
 * before the inode item has been committed and inserted, then it is possible
578
 * for the buffer to be written and IO completes before the inode is inserted
579 580 581 582 583
 * into the AIL. In that case, we'd be inserting a clean, stale inode into the
 * AIL which will never get removed. It will, however, get reclaimed which
 * triggers an assert in xfs_inode_free() complaining about freein an inode
 * still in the AIL.
 *
584 585 586
 * To avoid this, just unpin the inode directly and return a LSN of -1 so the
 * transaction committed code knows that it does not need to do any further
 * processing on the item.
L
Linus Torvalds 已提交
587 588 589
 */
STATIC xfs_lsn_t
xfs_inode_item_committed(
590
	struct xfs_log_item	*lip,
L
Linus Torvalds 已提交
591 592
	xfs_lsn_t		lsn)
{
593 594 595
	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
	struct xfs_inode	*ip = iip->ili_inode;

596 597 598 599
	if (xfs_iflags_test(ip, XFS_ISTALE)) {
		xfs_inode_item_unpin(lip, 0);
		return -1;
	}
600
	return lsn;
L
Linus Torvalds 已提交
601 602 603 604
}

STATIC void
xfs_inode_item_committing(
605
	struct xfs_log_item	*lip,
C
Christoph Hellwig 已提交
606
	xfs_lsn_t		commit_lsn)
L
Linus Torvalds 已提交
607
{
C
Christoph Hellwig 已提交
608 609
	INODE_ITEM(lip)->ili_last_lsn = commit_lsn;
	return xfs_inode_item_release(lip);
L
Linus Torvalds 已提交
610 611
}

C
Christoph Hellwig 已提交
612
static const struct xfs_item_ops xfs_inode_item_ops = {
613 614 615 616
	.iop_size	= xfs_inode_item_size,
	.iop_format	= xfs_inode_item_format,
	.iop_pin	= xfs_inode_item_pin,
	.iop_unpin	= xfs_inode_item_unpin,
C
Christoph Hellwig 已提交
617
	.iop_release	= xfs_inode_item_release,
618 619
	.iop_committed	= xfs_inode_item_committed,
	.iop_push	= xfs_inode_item_push,
C
Christoph Hellwig 已提交
620
	.iop_committing	= xfs_inode_item_committing,
L
Linus Torvalds 已提交
621 622 623 624 625 626 627 628
};


/*
 * Initialize the inode log item for a newly allocated (in-core) inode.
 */
void
xfs_inode_item_init(
629 630
	struct xfs_inode	*ip,
	struct xfs_mount	*mp)
L
Linus Torvalds 已提交
631
{
632
	struct xfs_inode_log_item *iip;
L
Linus Torvalds 已提交
633 634

	ASSERT(ip->i_itemp == NULL);
635 636
	iip = ip->i_itemp = kmem_cache_zalloc(xfs_ili_zone,
					      GFP_KERNEL | __GFP_NOFAIL);
L
Linus Torvalds 已提交
637 638

	iip->ili_inode = ip;
D
Dave Chinner 已提交
639
	spin_lock_init(&iip->ili_lock);
640 641
	xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE,
						&xfs_inode_item_ops);
L
Linus Torvalds 已提交
642 643 644 645 646 647 648
}

/*
 * Free the inode log item and any memory hanging off of it.
 */
void
xfs_inode_item_destroy(
649
	struct xfs_inode	*ip)
L
Linus Torvalds 已提交
650
{
651 652 653 654 655 656 657
	struct xfs_inode_log_item *iip = ip->i_itemp;

	ASSERT(iip->ili_item.li_buf == NULL);

	ip->i_itemp = NULL;
	kmem_free(iip->ili_item.li_lv_shadow);
	kmem_cache_free(xfs_ili_zone, iip);
L
Linus Torvalds 已提交
658 659 660 661
}


/*
D
Dave Chinner 已提交
662 663 664
 * We only want to pull the item from the AIL if it is actually there
 * and its location in the log has not changed since we started the
 * flush.  Thus, we only bother if the inode's lsn has not changed.
L
Linus Torvalds 已提交
665
 */
D
Dave Chinner 已提交
666 667 668 669
static void
xfs_iflush_ail_updates(
	struct xfs_ail		*ailp,
	struct list_head	*list)
L
Linus Torvalds 已提交
670
{
D
Dave Chinner 已提交
671 672
	struct xfs_log_item	*lip;
	xfs_lsn_t		tail_lsn = 0;
673

D
Dave Chinner 已提交
674 675 676 677
	/* this is an opencoded batch version of xfs_trans_ail_delete */
	spin_lock(&ailp->ail_lock);
	list_for_each_entry(lip, list, li_bio_list) {
		xfs_lsn_t	lsn;
678

D
Dave Chinner 已提交
679 680
		clear_bit(XFS_LI_FAILED, &lip->li_flags);
		if (INODE_ITEM(lip)->ili_flush_lsn != lip->li_lsn)
681 682
			continue;

D
Dave Chinner 已提交
683 684 685
		lsn = xfs_ail_delete_one(ailp, lip);
		if (!tail_lsn && lsn)
			tail_lsn = lsn;
686
	}
D
Dave Chinner 已提交
687 688
	xfs_ail_update_finish(ailp, tail_lsn);
}
L
Linus Torvalds 已提交
689

D
Dave Chinner 已提交
690 691 692 693 694 695 696 697 698 699 700 701
/*
 * Walk the list of inodes that have completed their IOs. If they are clean
 * remove them from the list and dissociate them from the buffer. Buffers that
 * are still dirty remain linked to the buffer and on the list. Caller must
 * handle them appropriately.
 */
static void
xfs_iflush_finish(
	struct xfs_buf		*bp,
	struct list_head	*list)
{
	struct xfs_log_item	*lip, *n;
L
Linus Torvalds 已提交
702

D
Dave Chinner 已提交
703 704
	list_for_each_entry_safe(lip, n, list, li_bio_list) {
		struct xfs_inode_log_item *iip = INODE_ITEM(lip);
705 706
		bool	drop_buffer = false;

D
Dave Chinner 已提交
707
		spin_lock(&iip->ili_lock);
708 709 710

		/*
		 * Remove the reference to the cluster buffer if the inode is
D
Dave Chinner 已提交
711 712
		 * clean in memory and drop the buffer reference once we've
		 * dropped the locks we hold.
713 714 715 716
		 */
		ASSERT(iip->ili_item.li_buf == bp);
		if (!iip->ili_fields) {
			iip->ili_item.li_buf = NULL;
D
Dave Chinner 已提交
717
			list_del_init(&lip->li_bio_list);
718 719
			drop_buffer = true;
		}
720
		iip->ili_last_fields = 0;
721
		iip->ili_flush_lsn = 0;
D
Dave Chinner 已提交
722
		spin_unlock(&iip->ili_lock);
723
		xfs_iflags_clear(iip->ili_inode, XFS_IFLUSHING);
724 725
		if (drop_buffer)
			xfs_buf_rele(bp);
726
	}
L
Linus Torvalds 已提交
727 728
}

D
Dave Chinner 已提交
729 730
/*
 * Inode buffer IO completion routine.  It is responsible for removing inodes
731 732
 * attached to the buffer from the AIL if they have not been re-logged and
 * completing the inode flush.
D
Dave Chinner 已提交
733 734
 */
void
735
xfs_buf_inode_iodone(
D
Dave Chinner 已提交
736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773
	struct xfs_buf		*bp)
{
	struct xfs_log_item	*lip, *n;
	LIST_HEAD(flushed_inodes);
	LIST_HEAD(ail_updates);

	/*
	 * Pull the attached inodes from the buffer one at a time and take the
	 * appropriate action on them.
	 */
	list_for_each_entry_safe(lip, n, &bp->b_li_list, li_bio_list) {
		struct xfs_inode_log_item *iip = INODE_ITEM(lip);

		if (xfs_iflags_test(iip->ili_inode, XFS_ISTALE)) {
			xfs_iflush_abort(iip->ili_inode);
			continue;
		}
		if (!iip->ili_last_fields)
			continue;

		/* Do an unlocked check for needing the AIL lock. */
		if (iip->ili_flush_lsn == lip->li_lsn ||
		    test_bit(XFS_LI_FAILED, &lip->li_flags))
			list_move_tail(&lip->li_bio_list, &ail_updates);
		else
			list_move_tail(&lip->li_bio_list, &flushed_inodes);
	}

	if (!list_empty(&ail_updates)) {
		xfs_iflush_ail_updates(bp->b_mount->m_ail, &ail_updates);
		list_splice_tail(&ail_updates, &flushed_inodes);
	}

	xfs_iflush_finish(bp, &flushed_inodes);
	if (!list_empty(&flushed_inodes))
		list_splice_tail(&flushed_inodes, &bp->b_li_list);
}

774 775 776 777 778 779 780 781 782 783
void
xfs_buf_inode_io_fail(
	struct xfs_buf		*bp)
{
	struct xfs_log_item	*lip;

	list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
		set_bit(XFS_LI_FAILED, &lip->li_flags);
}

L
Linus Torvalds 已提交
784
/*
785
 * This is the inode flushing abort routine.  It is called when
786 787
 * the filesystem is shutting down to clean up the inode state.  It is
 * responsible for removing the inode item from the AIL if it has not been
788
 * re-logged and clearing the inode's flush state.
L
Linus Torvalds 已提交
789 790 791
 */
void
xfs_iflush_abort(
792
	struct xfs_inode	*ip)
L
Linus Torvalds 已提交
793
{
794 795
	struct xfs_inode_log_item *iip = ip->i_itemp;
	struct xfs_buf		*bp = NULL;
L
Linus Torvalds 已提交
796 797

	if (iip) {
798 799 800 801 802 803
		/*
		 * Clear the failed bit before removing the item from the AIL so
		 * xfs_trans_ail_delete() doesn't try to clear and release the
		 * buffer attached to the log item before we are done with it.
		 */
		clear_bit(XFS_LI_FAILED, &iip->ili_item.li_flags);
804
		xfs_trans_ail_delete(&iip->ili_item, 0);
805

L
Linus Torvalds 已提交
806 807 808 809
		/*
		 * Clear the inode logging fields so no more flushes are
		 * attempted.
		 */
D
Dave Chinner 已提交
810
		spin_lock(&iip->ili_lock);
811
		iip->ili_last_fields = 0;
812
		iip->ili_fields = 0;
813
		iip->ili_fsync_fields = 0;
814 815 816
		iip->ili_flush_lsn = 0;
		bp = iip->ili_item.li_buf;
		iip->ili_item.li_buf = NULL;
817
		list_del_init(&iip->ili_item.li_bio_list);
D
Dave Chinner 已提交
818
		spin_unlock(&iip->ili_lock);
L
Linus Torvalds 已提交
819
	}
820
	xfs_iflags_clear(ip, XFS_IFLUSHING);
821 822
	if (bp)
		xfs_buf_rele(bp);
L
Linus Torvalds 已提交
823 824
}

825
/*
826 827
 * convert an xfs_inode_log_format struct from the old 32 bit version
 * (which can have different field alignments) to the native 64 bit version
828 829 830
 */
int
xfs_inode_item_format_convert(
831 832
	struct xfs_log_iovec		*buf,
	struct xfs_inode_log_format	*in_f)
833
{
834 835
	struct xfs_inode_log_format_32	*in_f32 = buf->i_addr;

836 837
	if (buf->i_len != sizeof(*in_f32)) {
		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
838
		return -EFSCORRUPTED;
839
	}
840 841 842 843 844 845 846

	in_f->ilf_type = in_f32->ilf_type;
	in_f->ilf_size = in_f32->ilf_size;
	in_f->ilf_fields = in_f32->ilf_fields;
	in_f->ilf_asize = in_f32->ilf_asize;
	in_f->ilf_dsize = in_f32->ilf_dsize;
	in_f->ilf_ino = in_f32->ilf_ino;
847
	memcpy(&in_f->ilf_u, &in_f32->ilf_u, sizeof(in_f->ilf_u));
848 849 850 851
	in_f->ilf_blkno = in_f32->ilf_blkno;
	in_f->ilf_len = in_f32->ilf_len;
	in_f->ilf_boffset = in_f32->ilf_boffset;
	return 0;
852
}