common.c 21.3 KB
Newer Older
D
Dave Chinner 已提交
1
// SPDX-License-Identifier: GPL-2.0+
D
Darrick J. Wong 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2017 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <darrick.wong@oracle.com>
 */
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_btree.h"
#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_inode.h"
D
Darrick J. Wong 已提交
19 20
#include "xfs_icache.h"
#include "xfs_itable.h"
D
Darrick J. Wong 已提交
21 22 23 24 25 26 27 28 29 30
#include "xfs_alloc.h"
#include "xfs_alloc_btree.h"
#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
#include "xfs_refcount.h"
#include "xfs_refcount_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
D
Darrick J. Wong 已提交
31 32
#include "xfs_log.h"
#include "xfs_trans_priv.h"
33 34
#include "xfs_attr.h"
#include "xfs_reflink.h"
D
Darrick J. Wong 已提交
35 36 37 38
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
39
#include "scrub/btree.h"
40
#include "scrub/repair.h"
D
Darrick J. Wong 已提交
41 42 43

/* Common code for the metadata scrubbers. */

44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
/*
 * Handling operational errors.
 *
 * The *_process_error() family of functions are used to process error return
 * codes from functions called as part of a scrub operation.
 *
 * If there's no error, we return true to tell the caller that it's ok
 * to move on to the next check in its list.
 *
 * For non-verifier errors (e.g. ENOMEM) we return false to tell the
 * caller that something bad happened, and we preserve *error so that
 * the caller can return the *error up the stack to userspace.
 *
 * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting
 * OFLAG_CORRUPT in sm_flags and the *error is cleared.  In other words,
 * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT,
 * not via return codes.  We return false to tell the caller that
 * something bad happened.  Since the error has been cleared, the caller
 * will (presumably) return that zero and scrubbing will move on to
 * whatever's next.
 *
 * ftrace can be used to record the precise metadata location and the
 * approximate code location of the failed operation.
 */

/* Check for operational errors. */
70
static bool
D
Darrick J. Wong 已提交
71
__xchk_process_error(
72 73 74
	struct xfs_scrub_context	*sc,
	xfs_agnumber_t			agno,
	xfs_agblock_t			bno,
75 76 77
	int				*error,
	__u32				errflag,
	void				*ret_ip)
78 79 80 81 82 83
{
	switch (*error) {
	case 0:
		return true;
	case -EDEADLOCK:
		/* Used to restart an op with deadlock avoidance. */
D
Darrick J. Wong 已提交
84
		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
85 86 87 88
		break;
	case -EFSBADCRC:
	case -EFSCORRUPTED:
		/* Note the badness but don't abort. */
89
		sc->sm->sm_flags |= errflag;
90 91 92
		*error = 0;
		/* fall through */
	default:
D
Darrick J. Wong 已提交
93
		trace_xchk_op_error(sc, agno, bno, *error,
94
				ret_ip);
95 96 97 98 99 100
		break;
	}
	return false;
}

bool
D
Darrick J. Wong 已提交
101
xchk_process_error(
102 103 104 105 106
	struct xfs_scrub_context	*sc,
	xfs_agnumber_t			agno,
	xfs_agblock_t			bno,
	int				*error)
{
D
Darrick J. Wong 已提交
107
	return __xchk_process_error(sc, agno, bno, error,
108 109 110 111
			XFS_SCRUB_OFLAG_CORRUPT, __return_address);
}

bool
D
Darrick J. Wong 已提交
112
xchk_xref_process_error(
113 114 115 116 117
	struct xfs_scrub_context	*sc,
	xfs_agnumber_t			agno,
	xfs_agblock_t			bno,
	int				*error)
{
D
Darrick J. Wong 已提交
118
	return __xchk_process_error(sc, agno, bno, error,
119 120 121 122 123
			XFS_SCRUB_OFLAG_XFAIL, __return_address);
}

/* Check for operational errors for a file offset. */
static bool
D
Darrick J. Wong 已提交
124
__xchk_fblock_process_error(
125 126 127
	struct xfs_scrub_context	*sc,
	int				whichfork,
	xfs_fileoff_t			offset,
128 129 130
	int				*error,
	__u32				errflag,
	void				*ret_ip)
131 132 133 134 135 136
{
	switch (*error) {
	case 0:
		return true;
	case -EDEADLOCK:
		/* Used to restart an op with deadlock avoidance. */
D
Darrick J. Wong 已提交
137
		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
138 139 140 141
		break;
	case -EFSBADCRC:
	case -EFSCORRUPTED:
		/* Note the badness but don't abort. */
142
		sc->sm->sm_flags |= errflag;
143 144 145
		*error = 0;
		/* fall through */
	default:
D
Darrick J. Wong 已提交
146
		trace_xchk_file_op_error(sc, whichfork, offset, *error,
147
				ret_ip);
148 149 150 151 152
		break;
	}
	return false;
}

153
bool
D
Darrick J. Wong 已提交
154
xchk_fblock_process_error(
155 156 157 158 159
	struct xfs_scrub_context	*sc,
	int				whichfork,
	xfs_fileoff_t			offset,
	int				*error)
{
D
Darrick J. Wong 已提交
160
	return __xchk_fblock_process_error(sc, whichfork, offset, error,
161 162 163 164
			XFS_SCRUB_OFLAG_CORRUPT, __return_address);
}

bool
D
Darrick J. Wong 已提交
165
xchk_fblock_xref_process_error(
166 167 168 169 170
	struct xfs_scrub_context	*sc,
	int				whichfork,
	xfs_fileoff_t			offset,
	int				*error)
{
D
Darrick J. Wong 已提交
171
	return __xchk_fblock_process_error(sc, whichfork, offset, error,
172 173 174
			XFS_SCRUB_OFLAG_XFAIL, __return_address);
}

175 176 177 178 179 180 181 182 183 184 185 186 187 188
/*
 * Handling scrub corruption/optimization/warning checks.
 *
 * The *_set_{corrupt,preen,warning}() family of functions are used to
 * record the presence of metadata that is incorrect (corrupt), could be
 * optimized somehow (preen), or should be flagged for administrative
 * review but is not incorrect (warn).
 *
 * ftrace can be used to record the precise metadata location and
 * approximate code location of the failed check.
 */

/* Record a block which could be optimized. */
void
D
Darrick J. Wong 已提交
189
xchk_block_set_preen(
190 191 192 193
	struct xfs_scrub_context	*sc,
	struct xfs_buf			*bp)
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
D
Darrick J. Wong 已提交
194
	trace_xchk_block_preen(sc, bp->b_bn, __return_address);
195 196 197 198 199 200 201 202
}

/*
 * Record an inode which could be optimized.  The trace data will
 * include the block given by bp if bp is given; otherwise it will use
 * the block location of the inode record itself.
 */
void
D
Darrick J. Wong 已提交
203
xchk_ino_set_preen(
204
	struct xfs_scrub_context	*sc,
205
	xfs_ino_t			ino)
206 207
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
D
Darrick J. Wong 已提交
208
	trace_xchk_ino_preen(sc, ino, __return_address);
209 210 211 212
}

/* Record a corrupt block. */
void
D
Darrick J. Wong 已提交
213
xchk_block_set_corrupt(
214 215 216 217
	struct xfs_scrub_context	*sc,
	struct xfs_buf			*bp)
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
D
Darrick J. Wong 已提交
218
	trace_xchk_block_error(sc, bp->b_bn, __return_address);
219 220
}

221 222
/* Record a corruption while cross-referencing. */
void
D
Darrick J. Wong 已提交
223
xchk_block_xref_set_corrupt(
224 225 226 227
	struct xfs_scrub_context	*sc,
	struct xfs_buf			*bp)
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
D
Darrick J. Wong 已提交
228
	trace_xchk_block_error(sc, bp->b_bn, __return_address);
229 230
}

231 232 233 234 235 236
/*
 * Record a corrupt inode.  The trace data will include the block given
 * by bp if bp is given; otherwise it will use the block location of the
 * inode record itself.
 */
void
D
Darrick J. Wong 已提交
237
xchk_ino_set_corrupt(
238
	struct xfs_scrub_context	*sc,
239
	xfs_ino_t			ino)
240 241
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
D
Darrick J. Wong 已提交
242
	trace_xchk_ino_error(sc, ino, __return_address);
243 244
}

245 246
/* Record a corruption while cross-referencing with an inode. */
void
D
Darrick J. Wong 已提交
247
xchk_ino_xref_set_corrupt(
248
	struct xfs_scrub_context	*sc,
249
	xfs_ino_t			ino)
250 251
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
D
Darrick J. Wong 已提交
252
	trace_xchk_ino_error(sc, ino, __return_address);
253 254
}

255 256
/* Record corruption in a block indexed by a file fork. */
void
D
Darrick J. Wong 已提交
257
xchk_fblock_set_corrupt(
258 259 260 261 262
	struct xfs_scrub_context	*sc,
	int				whichfork,
	xfs_fileoff_t			offset)
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
D
Darrick J. Wong 已提交
263
	trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
264 265
}

266 267
/* Record a corruption while cross-referencing a fork block. */
void
D
Darrick J. Wong 已提交
268
xchk_fblock_xref_set_corrupt(
269 270 271 272 273
	struct xfs_scrub_context	*sc,
	int				whichfork,
	xfs_fileoff_t			offset)
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
D
Darrick J. Wong 已提交
274
	trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
275 276
}

277 278 279 280 281
/*
 * Warn about inodes that need administrative review but is not
 * incorrect.
 */
void
D
Darrick J. Wong 已提交
282
xchk_ino_set_warning(
283
	struct xfs_scrub_context	*sc,
284
	xfs_ino_t			ino)
285 286
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
D
Darrick J. Wong 已提交
287
	trace_xchk_ino_warning(sc, ino, __return_address);
288 289 290 291
}

/* Warn about a block indexed by a file fork that needs review. */
void
D
Darrick J. Wong 已提交
292
xchk_fblock_set_warning(
293 294 295 296 297
	struct xfs_scrub_context	*sc,
	int				whichfork,
	xfs_fileoff_t			offset)
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
D
Darrick J. Wong 已提交
298
	trace_xchk_fblock_warning(sc, whichfork, offset, __return_address);
299 300 301 302
}

/* Signal an incomplete scrub. */
void
D
Darrick J. Wong 已提交
303
xchk_set_incomplete(
304 305 306
	struct xfs_scrub_context	*sc)
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE;
D
Darrick J. Wong 已提交
307
	trace_xchk_incomplete(sc, __return_address);
308 309
}

310 311 312 313 314
/*
 * rmap scrubbing -- compute the number of blocks with a given owner,
 * at least according to the reverse mapping data.
 */

D
Darrick J. Wong 已提交
315
struct xchk_rmap_ownedby_info {
316 317 318 319 320
	struct xfs_owner_info	*oinfo;
	xfs_filblks_t		*blocks;
};

STATIC int
D
Darrick J. Wong 已提交
321
xchk_count_rmap_ownedby_irec(
322 323 324 325
	struct xfs_btree_cur			*cur,
	struct xfs_rmap_irec			*rec,
	void					*priv)
{
D
Darrick J. Wong 已提交
326
	struct xchk_rmap_ownedby_info		*sroi = priv;
327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
	bool					irec_attr;
	bool					oinfo_attr;

	irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK;
	oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK;

	if (rec->rm_owner != sroi->oinfo->oi_owner)
		return 0;

	if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr)
		(*sroi->blocks) += rec->rm_blockcount;

	return 0;
}

/*
 * Calculate the number of blocks the rmap thinks are owned by something.
 * The caller should pass us an rmapbt cursor.
 */
int
D
Darrick J. Wong 已提交
347
xchk_count_rmap_ownedby_ag(
348 349 350 351 352
	struct xfs_scrub_context		*sc,
	struct xfs_btree_cur			*cur,
	struct xfs_owner_info			*oinfo,
	xfs_filblks_t				*blocks)
{
D
Darrick J. Wong 已提交
353
	struct xchk_rmap_ownedby_info		sroi;
354 355 356 357 358

	sroi.oinfo = oinfo;
	*blocks = 0;
	sroi.blocks = blocks;

D
Darrick J. Wong 已提交
359
	return xfs_rmap_query_all(cur, xchk_count_rmap_ownedby_irec,
360 361 362
			&sroi);
}

363 364 365 366 367 368 369 370
/*
 * AG scrubbing
 *
 * These helpers facilitate locking an allocation group's header
 * buffers, setting up cursors for all btrees that are present, and
 * cleaning everything up once we're through.
 */

D
Darrick J. Wong 已提交
371 372 373 374 375 376 377 378
/* Decide if we want to return an AG header read failure. */
static inline bool
want_ag_read_header_failure(
	struct xfs_scrub_context	*sc,
	unsigned int			type)
{
	/* Return all AG header read failures when scanning btrees. */
	if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF &&
D
Darrick J. Wong 已提交
379 380
	    sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL &&
	    sc->sm->sm_type != XFS_SCRUB_TYPE_AGI)
D
Darrick J. Wong 已提交
381 382 383 384 385 386 387 388 389 390 391
		return true;
	/*
	 * If we're scanning a given type of AG header, we only want to
	 * see read failures from that specific header.  We'd like the
	 * other headers to cross-check them, but this isn't required.
	 */
	if (sc->sm->sm_type == type)
		return true;
	return false;
}

392 393 394
/*
 * Grab all the headers for an AG.
 *
D
Darrick J. Wong 已提交
395
 * The headers should be released by xchk_ag_free, but as a fail
396 397 398 399
 * safe we attach all the buffers we grab to the scrub transaction so
 * they'll all be freed when we cancel it.
 */
int
D
Darrick J. Wong 已提交
400
xchk_ag_read_headers(
401 402 403 404 405 406 407 408 409 410
	struct xfs_scrub_context	*sc,
	xfs_agnumber_t			agno,
	struct xfs_buf			**agi,
	struct xfs_buf			**agf,
	struct xfs_buf			**agfl)
{
	struct xfs_mount		*mp = sc->mp;
	int				error;

	error = xfs_ialloc_read_agi(mp, sc->tp, agno, agi);
D
Darrick J. Wong 已提交
411
	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI))
412 413 414
		goto out;

	error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, agf);
D
Darrick J. Wong 已提交
415
	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF))
416 417 418
		goto out;

	error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl);
D
Darrick J. Wong 已提交
419
	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL))
420
		goto out;
421
	error = 0;
422 423 424 425 426 427
out:
	return error;
}

/* Release all the AG btree cursors. */
void
D
Darrick J. Wong 已提交
428 429
xchk_ag_btcur_free(
	struct xchk_ag		*sa)
430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453
{
	if (sa->refc_cur)
		xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR);
	if (sa->rmap_cur)
		xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR);
	if (sa->fino_cur)
		xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR);
	if (sa->ino_cur)
		xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR);
	if (sa->cnt_cur)
		xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR);
	if (sa->bno_cur)
		xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR);

	sa->refc_cur = NULL;
	sa->rmap_cur = NULL;
	sa->fino_cur = NULL;
	sa->ino_cur = NULL;
	sa->bno_cur = NULL;
	sa->cnt_cur = NULL;
}

/* Initialize all the btree cursors for an AG. */
int
D
Darrick J. Wong 已提交
454
xchk_ag_btcur_init(
455
	struct xfs_scrub_context	*sc,
D
Darrick J. Wong 已提交
456
	struct xchk_ag		*sa)
457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
{
	struct xfs_mount		*mp = sc->mp;
	xfs_agnumber_t			agno = sa->agno;

	if (sa->agf_bp) {
		/* Set up a bnobt cursor for cross-referencing. */
		sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
				agno, XFS_BTNUM_BNO);
		if (!sa->bno_cur)
			goto err;

		/* Set up a cntbt cursor for cross-referencing. */
		sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
				agno, XFS_BTNUM_CNT);
		if (!sa->cnt_cur)
			goto err;
	}

	/* Set up a inobt cursor for cross-referencing. */
	if (sa->agi_bp) {
		sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
					agno, XFS_BTNUM_INO);
		if (!sa->ino_cur)
			goto err;
	}

	/* Set up a finobt cursor for cross-referencing. */
	if (sa->agi_bp && xfs_sb_version_hasfinobt(&mp->m_sb)) {
		sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
				agno, XFS_BTNUM_FINO);
		if (!sa->fino_cur)
			goto err;
	}

	/* Set up a rmapbt cursor for cross-referencing. */
	if (sa->agf_bp && xfs_sb_version_hasrmapbt(&mp->m_sb)) {
		sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
				agno);
		if (!sa->rmap_cur)
			goto err;
	}

	/* Set up a refcountbt cursor for cross-referencing. */
	if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb)) {
		sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
502
				sa->agf_bp, agno);
503 504 505 506 507 508 509 510 511 512 513
		if (!sa->refc_cur)
			goto err;
	}

	return 0;
err:
	return -ENOMEM;
}

/* Release the AG header context and btree cursors. */
void
D
Darrick J. Wong 已提交
514
xchk_ag_free(
515
	struct xfs_scrub_context	*sc,
D
Darrick J. Wong 已提交
516
	struct xchk_ag		*sa)
517
{
D
Darrick J. Wong 已提交
518
	xchk_ag_btcur_free(sa);
519 520 521 522 523 524 525 526 527 528 529 530
	if (sa->agfl_bp) {
		xfs_trans_brelse(sc->tp, sa->agfl_bp);
		sa->agfl_bp = NULL;
	}
	if (sa->agf_bp) {
		xfs_trans_brelse(sc->tp, sa->agf_bp);
		sa->agf_bp = NULL;
	}
	if (sa->agi_bp) {
		xfs_trans_brelse(sc->tp, sa->agi_bp);
		sa->agi_bp = NULL;
	}
531 532 533 534
	if (sa->pag) {
		xfs_perag_put(sa->pag);
		sa->pag = NULL;
	}
535 536 537 538 539 540 541 542 543 544 545
	sa->agno = NULLAGNUMBER;
}

/*
 * For scrub, grab the AGI and the AGF headers, in that order.  Locking
 * order requires us to get the AGI before the AGF.  We use the
 * transaction to avoid deadlocking on crosslinked metadata buffers;
 * either the caller passes one in (bmap scrub) or we have to create a
 * transaction ourselves.
 */
int
D
Darrick J. Wong 已提交
546
xchk_ag_init(
547 548
	struct xfs_scrub_context	*sc,
	xfs_agnumber_t			agno,
D
Darrick J. Wong 已提交
549
	struct xchk_ag		*sa)
550 551 552 553
{
	int				error;

	sa->agno = agno;
D
Darrick J. Wong 已提交
554
	error = xchk_ag_read_headers(sc, agno, &sa->agi_bp,
555 556 557 558
			&sa->agf_bp, &sa->agfl_bp);
	if (error)
		return error;

D
Darrick J. Wong 已提交
559
	return xchk_ag_btcur_init(sc, sa);
560 561
}

562 563
/*
 * Grab the per-ag structure if we haven't already gotten it.  Teardown of the
D
Darrick J. Wong 已提交
564
 * xchk_ag will release it for us.
565 566
 */
void
D
Darrick J. Wong 已提交
567
xchk_perag_get(
568
	struct xfs_mount	*mp,
D
Darrick J. Wong 已提交
569
	struct xchk_ag	*sa)
570 571 572 573 574
{
	if (!sa->pag)
		sa->pag = xfs_perag_get(mp, sa->agno);
}

D
Darrick J. Wong 已提交
575 576
/* Per-scrubber setup functions */

577 578 579
/*
 * Grab an empty transaction so that we can re-grab locked buffers if
 * one of our btrees turns out to be cyclic.
580 581 582 583 584 585
 *
 * If we're going to repair something, we need to ask for the largest possible
 * log reservation so that we can handle the worst case scenario for metadata
 * updates while rebuilding a metadata item.  We also need to reserve as many
 * blocks in the head transaction as we think we're going to need to rebuild
 * the metadata object.
586 587
 */
int
D
Darrick J. Wong 已提交
588
xchk_trans_alloc(
589 590
	struct xfs_scrub_context	*sc,
	uint				resblks)
591
{
592 593 594 595
	if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
		return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
				resblks, 0, 0, &sc->tp);

596 597 598
	return xfs_trans_alloc_empty(sc->mp, &sc->tp);
}

D
Darrick J. Wong 已提交
599 600
/* Set us up with a transaction and an empty context. */
int
D
Darrick J. Wong 已提交
601
xchk_setup_fs(
D
Darrick J. Wong 已提交
602 603 604
	struct xfs_scrub_context	*sc,
	struct xfs_inode		*ip)
{
605 606 607
	uint				resblks;

	resblks = xfs_repair_calc_ag_resblks(sc);
D
Darrick J. Wong 已提交
608
	return xchk_trans_alloc(sc, resblks);
D
Darrick J. Wong 已提交
609
}
D
Darrick J. Wong 已提交
610 611 612

/* Set us up with AG headers and btree cursors. */
int
D
Darrick J. Wong 已提交
613
xchk_setup_ag_btree(
D
Darrick J. Wong 已提交
614 615 616 617
	struct xfs_scrub_context	*sc,
	struct xfs_inode		*ip,
	bool				force_log)
{
D
Darrick J. Wong 已提交
618
	struct xfs_mount		*mp = sc->mp;
D
Darrick J. Wong 已提交
619 620
	int				error;

D
Darrick J. Wong 已提交
621 622 623 624 625 626 627
	/*
	 * If the caller asks us to checkpont the log, do so.  This
	 * expensive operation should be performed infrequently and only
	 * as a last resort.  Any caller that sets force_log should
	 * document why they need to do so.
	 */
	if (force_log) {
D
Darrick J. Wong 已提交
628
		error = xchk_checkpoint_log(mp);
D
Darrick J. Wong 已提交
629 630 631 632
		if (error)
			return error;
	}

D
Darrick J. Wong 已提交
633
	error = xchk_setup_fs(sc, ip);
D
Darrick J. Wong 已提交
634 635 636
	if (error)
		return error;

D
Darrick J. Wong 已提交
637
	return xchk_ag_init(sc, sc->sm->sm_agno, &sc->sa);
D
Darrick J. Wong 已提交
638
}
D
Darrick J. Wong 已提交
639 640 641

/* Push everything out of the log onto disk. */
int
D
Darrick J. Wong 已提交
642
xchk_checkpoint_log(
D
Darrick J. Wong 已提交
643 644 645 646
	struct xfs_mount	*mp)
{
	int			error;

647
	error = xfs_log_force(mp, XFS_LOG_SYNC);
D
Darrick J. Wong 已提交
648 649 650 651 652
	if (error)
		return error;
	xfs_ail_push_all_sync(mp->m_ail);
	return 0;
}
D
Darrick J. Wong 已提交
653 654 655 656 657 658 659

/*
 * Given an inode and the scrub control structure, grab either the
 * inode referenced in the control structure or the inode passed in.
 * The inode is not locked.
 */
int
D
Darrick J. Wong 已提交
660
xchk_get_inode(
D
Darrick J. Wong 已提交
661 662 663
	struct xfs_scrub_context	*sc,
	struct xfs_inode		*ip_in)
{
664
	struct xfs_imap			imap;
D
Darrick J. Wong 已提交
665 666 667 668 669 670 671 672 673 674 675 676 677 678 679
	struct xfs_mount		*mp = sc->mp;
	struct xfs_inode		*ip = NULL;
	int				error;

	/* We want to scan the inode we already had opened. */
	if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
		sc->ip = ip_in;
		return 0;
	}

	/* Look up the inode, see if the generation number matches. */
	if (xfs_internal_inum(mp, sc->sm->sm_ino))
		return -ENOENT;
	error = xfs_iget(mp, NULL, sc->sm->sm_ino,
			XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip);
680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706
	switch (error) {
	case -ENOENT:
		/* Inode doesn't exist, just bail out. */
		return error;
	case 0:
		/* Got an inode, continue. */
		break;
	case -EINVAL:
		/*
		 * -EINVAL with IGET_UNTRUSTED could mean one of several
		 * things: userspace gave us an inode number that doesn't
		 * correspond to fs space, or doesn't have an inobt entry;
		 * or it could simply mean that the inode buffer failed the
		 * read verifiers.
		 *
		 * Try just the inode mapping lookup -- if it succeeds, then
		 * the inode buffer verifier failed and something needs fixing.
		 * Otherwise, we really couldn't find it so tell userspace
		 * that it no longer exists.
		 */
		error = xfs_imap(sc->mp, sc->tp, sc->sm->sm_ino, &imap,
				XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE);
		if (error)
			return -ENOENT;
		error = -EFSCORRUPTED;
		/* fall through */
	default:
D
Darrick J. Wong 已提交
707
		trace_xchk_op_error(sc,
D
Darrick J. Wong 已提交
708 709 710 711 712 713 714 715 716 717 718 719 720
				XFS_INO_TO_AGNO(mp, sc->sm->sm_ino),
				XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
				error, __return_address);
		return error;
	}
	if (VFS_I(ip)->i_generation != sc->sm->sm_gen) {
		iput(VFS_I(ip));
		return -ENOENT;
	}

	sc->ip = ip;
	return 0;
}
D
Darrick J. Wong 已提交
721 722 723

/* Set us up to scrub a file's contents. */
int
D
Darrick J. Wong 已提交
724
xchk_setup_inode_contents(
D
Darrick J. Wong 已提交
725 726 727 728 729 730
	struct xfs_scrub_context	*sc,
	struct xfs_inode		*ip,
	unsigned int			resblks)
{
	int				error;

D
Darrick J. Wong 已提交
731
	error = xchk_get_inode(sc, ip);
D
Darrick J. Wong 已提交
732 733 734 735 736 737
	if (error)
		return error;

	/* Got the inode, lock it and we're ready to go. */
	sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
	xfs_ilock(sc->ip, sc->ilock_flags);
D
Darrick J. Wong 已提交
738
	error = xchk_trans_alloc(sc, resblks);
D
Darrick J. Wong 已提交
739 740 741 742 743 744 745 746 747
	if (error)
		goto out;
	sc->ilock_flags |= XFS_ILOCK_EXCL;
	xfs_ilock(sc->ip, XFS_ILOCK_EXCL);

out:
	/* scrub teardown will unlock and release the inode for us */
	return error;
}
748 749 750 751 752 753 754

/*
 * Predicate that decides if we need to evaluate the cross-reference check.
 * If there was an error accessing the cross-reference btree, just delete
 * the cursor and skip the check.
 */
bool
D
Darrick J. Wong 已提交
755
xchk_should_check_xref(
756 757 758 759
	struct xfs_scrub_context	*sc,
	int				*error,
	struct xfs_btree_cur		**curpp)
{
760
	/* No point in xref if we already know we're corrupt. */
D
Darrick J. Wong 已提交
761
	if (xchk_skip_xref(sc->sm))
762 763
		return false;

764 765 766 767 768 769 770 771 772 773 774 775 776 777
	if (*error == 0)
		return true;

	if (curpp) {
		/* If we've already given up on xref, just bail out. */
		if (!*curpp)
			return false;

		/* xref error, delete cursor and bail out. */
		xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR);
		*curpp = NULL;
	}

	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
D
Darrick J. Wong 已提交
778
	trace_xchk_xref_error(sc, *error, __return_address);
779 780 781 782 783 784 785 786

	/*
	 * Errors encountered during cross-referencing with another
	 * data structure should not cause this scrubber to abort.
	 */
	*error = 0;
	return false;
}
D
Darrick J. Wong 已提交
787 788 789

/* Run the structure verifiers on in-memory buffers to detect bad memory. */
void
D
Darrick J. Wong 已提交
790
xchk_buffer_recheck(
D
Darrick J. Wong 已提交
791 792 793 794 795 796
	struct xfs_scrub_context	*sc,
	struct xfs_buf			*bp)
{
	xfs_failaddr_t			fa;

	if (bp->b_ops == NULL) {
D
Darrick J. Wong 已提交
797
		xchk_block_set_corrupt(sc, bp);
D
Darrick J. Wong 已提交
798 799 800
		return;
	}
	if (bp->b_ops->verify_struct == NULL) {
D
Darrick J. Wong 已提交
801
		xchk_set_incomplete(sc);
D
Darrick J. Wong 已提交
802 803 804 805 806 807
		return;
	}
	fa = bp->b_ops->verify_struct(bp);
	if (!fa)
		return;
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
D
Darrick J. Wong 已提交
808
	trace_xchk_block_error(sc, bp->b_bn, fa);
D
Darrick J. Wong 已提交
809
}
810 811 812 813 814 815

/*
 * Scrub the attr/data forks of a metadata inode.  The metadata inode must be
 * pointed to by sc->ip and the ILOCK must be held.
 */
int
D
Darrick J. Wong 已提交
816
xchk_metadata_inode_forks(
817 818 819 820 821 822 823 824 825 826 827
	struct xfs_scrub_context	*sc)
{
	__u32				smtype;
	bool				shared;
	int				error;

	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
		return 0;

	/* Metadata inodes don't live on the rt device. */
	if (sc->ip->i_d.di_flags & XFS_DIFLAG_REALTIME) {
D
Darrick J. Wong 已提交
828
		xchk_ino_set_corrupt(sc, sc->ip->i_ino);
829 830 831 832 833
		return 0;
	}

	/* They should never participate in reflink. */
	if (xfs_is_reflink_inode(sc->ip)) {
D
Darrick J. Wong 已提交
834
		xchk_ino_set_corrupt(sc, sc->ip->i_ino);
835 836 837 838 839
		return 0;
	}

	/* They also should never have extended attributes. */
	if (xfs_inode_hasattr(sc->ip)) {
D
Darrick J. Wong 已提交
840
		xchk_ino_set_corrupt(sc, sc->ip->i_ino);
841 842 843 844 845 846
		return 0;
	}

	/* Invoke the data fork scrubber. */
	smtype = sc->sm->sm_type;
	sc->sm->sm_type = XFS_SCRUB_TYPE_BMBTD;
D
Darrick J. Wong 已提交
847
	error = xchk_bmap_data(sc);
848 849 850 851 852 853 854 855
	sc->sm->sm_type = smtype;
	if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
		return error;

	/* Look for incorrect shared blocks. */
	if (xfs_sb_version_hasreflink(&sc->mp->m_sb)) {
		error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
				&shared);
D
Darrick J. Wong 已提交
856
		if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0,
857 858 859
				&error))
			return error;
		if (shared)
D
Darrick J. Wong 已提交
860
			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
861 862 863 864
	}

	return error;
}
865 866 867 868 869 870 871 872 873

/*
 * Try to lock an inode in violation of the usual locking order rules.  For
 * example, trying to get the IOLOCK while in transaction context, or just
 * plain breaking AG-order or inode-order inode locking rules.  Either way,
 * the only way to avoid an ABBA deadlock is to use trylock and back off if
 * we can't.
 */
int
D
Darrick J. Wong 已提交
874
xchk_ilock_inverted(
875 876 877 878 879 880 881 882 883 884 885 886
	struct xfs_inode	*ip,
	uint			lock_mode)
{
	int			i;

	for (i = 0; i < 20; i++) {
		if (xfs_ilock_nowait(ip, lock_mode))
			return 0;
		delay(1);
	}
	return -EDEADLOCK;
}