common.c 21.0 KB
Newer Older
D
Dave Chinner 已提交
1
// SPDX-License-Identifier: GPL-2.0+
D
Darrick J. Wong 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * Copyright (C) 2017 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <darrick.wong@oracle.com>
 */
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_btree.h"
#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_inode.h"
D
Darrick J. Wong 已提交
19 20
#include "xfs_icache.h"
#include "xfs_itable.h"
D
Darrick J. Wong 已提交
21 22 23 24 25 26 27 28 29 30
#include "xfs_alloc.h"
#include "xfs_alloc_btree.h"
#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
#include "xfs_refcount.h"
#include "xfs_refcount_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
D
Darrick J. Wong 已提交
31 32
#include "xfs_log.h"
#include "xfs_trans_priv.h"
33 34
#include "xfs_attr.h"
#include "xfs_reflink.h"
D
Darrick J. Wong 已提交
35 36 37 38
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
39
#include "scrub/btree.h"
40
#include "scrub/repair.h"
D
Darrick J. Wong 已提交
41 42 43

/* Common code for the metadata scrubbers. */

44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
/*
 * Handling operational errors.
 *
 * The *_process_error() family of functions are used to process error return
 * codes from functions called as part of a scrub operation.
 *
 * If there's no error, we return true to tell the caller that it's ok
 * to move on to the next check in its list.
 *
 * For non-verifier errors (e.g. ENOMEM) we return false to tell the
 * caller that something bad happened, and we preserve *error so that
 * the caller can return the *error up the stack to userspace.
 *
 * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting
 * OFLAG_CORRUPT in sm_flags and the *error is cleared.  In other words,
 * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT,
 * not via return codes.  We return false to tell the caller that
 * something bad happened.  Since the error has been cleared, the caller
 * will (presumably) return that zero and scrubbing will move on to
 * whatever's next.
 *
 * ftrace can be used to record the precise metadata location and the
 * approximate code location of the failed operation.
 */

/* Check for operational errors. */
70
static bool
D
Darrick J. Wong 已提交
71
__xchk_process_error(
72
	struct xfs_scrub	*sc,
73 74 75 76 77
	xfs_agnumber_t		agno,
	xfs_agblock_t		bno,
	int			*error,
	__u32			errflag,
	void			*ret_ip)
78 79 80 81 82 83
{
	switch (*error) {
	case 0:
		return true;
	case -EDEADLOCK:
		/* Used to restart an op with deadlock avoidance. */
D
Darrick J. Wong 已提交
84
		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
85 86 87 88
		break;
	case -EFSBADCRC:
	case -EFSCORRUPTED:
		/* Note the badness but don't abort. */
89
		sc->sm->sm_flags |= errflag;
90 91 92
		*error = 0;
		/* fall through */
	default:
D
Darrick J. Wong 已提交
93
		trace_xchk_op_error(sc, agno, bno, *error,
94
				ret_ip);
95 96 97 98 99 100
		break;
	}
	return false;
}

bool
D
Darrick J. Wong 已提交
101
xchk_process_error(
102
	struct xfs_scrub	*sc,
103 104 105
	xfs_agnumber_t		agno,
	xfs_agblock_t		bno,
	int			*error)
106
{
D
Darrick J. Wong 已提交
107
	return __xchk_process_error(sc, agno, bno, error,
108 109 110 111
			XFS_SCRUB_OFLAG_CORRUPT, __return_address);
}

bool
D
Darrick J. Wong 已提交
112
xchk_xref_process_error(
113
	struct xfs_scrub	*sc,
114 115 116
	xfs_agnumber_t		agno,
	xfs_agblock_t		bno,
	int			*error)
117
{
D
Darrick J. Wong 已提交
118
	return __xchk_process_error(sc, agno, bno, error,
119 120 121 122 123
			XFS_SCRUB_OFLAG_XFAIL, __return_address);
}

/* Check for operational errors for a file offset. */
static bool
D
Darrick J. Wong 已提交
124
__xchk_fblock_process_error(
125
	struct xfs_scrub	*sc,
126 127 128 129 130
	int			whichfork,
	xfs_fileoff_t		offset,
	int			*error,
	__u32			errflag,
	void			*ret_ip)
131 132 133 134 135 136
{
	switch (*error) {
	case 0:
		return true;
	case -EDEADLOCK:
		/* Used to restart an op with deadlock avoidance. */
D
Darrick J. Wong 已提交
137
		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
138 139 140 141
		break;
	case -EFSBADCRC:
	case -EFSCORRUPTED:
		/* Note the badness but don't abort. */
142
		sc->sm->sm_flags |= errflag;
143 144 145
		*error = 0;
		/* fall through */
	default:
D
Darrick J. Wong 已提交
146
		trace_xchk_file_op_error(sc, whichfork, offset, *error,
147
				ret_ip);
148 149 150 151 152
		break;
	}
	return false;
}

153
bool
D
Darrick J. Wong 已提交
154
xchk_fblock_process_error(
155
	struct xfs_scrub	*sc,
156 157 158
	int			whichfork,
	xfs_fileoff_t		offset,
	int			*error)
159
{
D
Darrick J. Wong 已提交
160
	return __xchk_fblock_process_error(sc, whichfork, offset, error,
161 162 163 164
			XFS_SCRUB_OFLAG_CORRUPT, __return_address);
}

bool
D
Darrick J. Wong 已提交
165
xchk_fblock_xref_process_error(
166
	struct xfs_scrub	*sc,
167 168 169
	int			whichfork,
	xfs_fileoff_t		offset,
	int			*error)
170
{
D
Darrick J. Wong 已提交
171
	return __xchk_fblock_process_error(sc, whichfork, offset, error,
172 173 174
			XFS_SCRUB_OFLAG_XFAIL, __return_address);
}

175 176 177 178 179 180 181 182 183 184 185 186 187 188
/*
 * Handling scrub corruption/optimization/warning checks.
 *
 * The *_set_{corrupt,preen,warning}() family of functions are used to
 * record the presence of metadata that is incorrect (corrupt), could be
 * optimized somehow (preen), or should be flagged for administrative
 * review but is not incorrect (warn).
 *
 * ftrace can be used to record the precise metadata location and
 * approximate code location of the failed check.
 */

/* Record a block which could be optimized. */
void
D
Darrick J. Wong 已提交
189
xchk_block_set_preen(
190
	struct xfs_scrub	*sc,
191
	struct xfs_buf		*bp)
192 193
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
D
Darrick J. Wong 已提交
194
	trace_xchk_block_preen(sc, bp->b_bn, __return_address);
195 196 197 198 199 200 201 202
}

/*
 * Record an inode which could be optimized.  The trace data will
 * include the block given by bp if bp is given; otherwise it will use
 * the block location of the inode record itself.
 */
void
D
Darrick J. Wong 已提交
203
xchk_ino_set_preen(
204
	struct xfs_scrub	*sc,
205
	xfs_ino_t		ino)
206 207
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
D
Darrick J. Wong 已提交
208
	trace_xchk_ino_preen(sc, ino, __return_address);
209 210 211 212
}

/* Record a corrupt block. */
void
D
Darrick J. Wong 已提交
213
xchk_block_set_corrupt(
214
	struct xfs_scrub	*sc,
215
	struct xfs_buf		*bp)
216 217
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
D
Darrick J. Wong 已提交
218
	trace_xchk_block_error(sc, bp->b_bn, __return_address);
219 220
}

221 222
/* Record a corruption while cross-referencing. */
void
D
Darrick J. Wong 已提交
223
xchk_block_xref_set_corrupt(
224
	struct xfs_scrub	*sc,
225
	struct xfs_buf		*bp)
226 227
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
D
Darrick J. Wong 已提交
228
	trace_xchk_block_error(sc, bp->b_bn, __return_address);
229 230
}

231 232 233 234 235 236
/*
 * Record a corrupt inode.  The trace data will include the block given
 * by bp if bp is given; otherwise it will use the block location of the
 * inode record itself.
 */
void
D
Darrick J. Wong 已提交
237
xchk_ino_set_corrupt(
238
	struct xfs_scrub	*sc,
239
	xfs_ino_t		ino)
240 241
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
D
Darrick J. Wong 已提交
242
	trace_xchk_ino_error(sc, ino, __return_address);
243 244
}

245 246
/* Record a corruption while cross-referencing with an inode. */
void
D
Darrick J. Wong 已提交
247
xchk_ino_xref_set_corrupt(
248
	struct xfs_scrub	*sc,
249
	xfs_ino_t		ino)
250 251
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
D
Darrick J. Wong 已提交
252
	trace_xchk_ino_error(sc, ino, __return_address);
253 254
}

255 256
/* Record corruption in a block indexed by a file fork. */
void
D
Darrick J. Wong 已提交
257
xchk_fblock_set_corrupt(
258
	struct xfs_scrub	*sc,
259 260
	int			whichfork,
	xfs_fileoff_t		offset)
261 262
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
D
Darrick J. Wong 已提交
263
	trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
264 265
}

266 267
/* Record a corruption while cross-referencing a fork block. */
void
D
Darrick J. Wong 已提交
268
xchk_fblock_xref_set_corrupt(
269
	struct xfs_scrub	*sc,
270 271
	int			whichfork,
	xfs_fileoff_t		offset)
272 273
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
D
Darrick J. Wong 已提交
274
	trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
275 276
}

277 278 279 280 281
/*
 * Warn about inodes that need administrative review but is not
 * incorrect.
 */
void
D
Darrick J. Wong 已提交
282
xchk_ino_set_warning(
283
	struct xfs_scrub	*sc,
284
	xfs_ino_t		ino)
285 286
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
D
Darrick J. Wong 已提交
287
	trace_xchk_ino_warning(sc, ino, __return_address);
288 289 290 291
}

/* Warn about a block indexed by a file fork that needs review. */
void
D
Darrick J. Wong 已提交
292
xchk_fblock_set_warning(
293
	struct xfs_scrub	*sc,
294 295
	int			whichfork,
	xfs_fileoff_t		offset)
296 297
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
D
Darrick J. Wong 已提交
298
	trace_xchk_fblock_warning(sc, whichfork, offset, __return_address);
299 300 301 302
}

/* Signal an incomplete scrub. */
void
D
Darrick J. Wong 已提交
303
xchk_set_incomplete(
304
	struct xfs_scrub	*sc)
305 306
{
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE;
D
Darrick J. Wong 已提交
307
	trace_xchk_incomplete(sc, __return_address);
308 309
}

310 311 312 313 314
/*
 * rmap scrubbing -- compute the number of blocks with a given owner,
 * at least according to the reverse mapping data.
 */

D
Darrick J. Wong 已提交
315
struct xchk_rmap_ownedby_info {
316 317 318 319 320
	struct xfs_owner_info	*oinfo;
	xfs_filblks_t		*blocks;
};

STATIC int
D
Darrick J. Wong 已提交
321
xchk_count_rmap_ownedby_irec(
322 323 324
	struct xfs_btree_cur		*cur,
	struct xfs_rmap_irec		*rec,
	void				*priv)
325
{
326 327 328
	struct xchk_rmap_ownedby_info	*sroi = priv;
	bool				irec_attr;
	bool				oinfo_attr;
329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346

	irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK;
	oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK;

	if (rec->rm_owner != sroi->oinfo->oi_owner)
		return 0;

	if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr)
		(*sroi->blocks) += rec->rm_blockcount;

	return 0;
}

/*
 * Calculate the number of blocks the rmap thinks are owned by something.
 * The caller should pass us an rmapbt cursor.
 */
int
D
Darrick J. Wong 已提交
347
xchk_count_rmap_ownedby_ag(
348
	struct xfs_scrub		*sc,
349 350 351
	struct xfs_btree_cur		*cur,
	struct xfs_owner_info		*oinfo,
	xfs_filblks_t			*blocks)
352
{
353
	struct xchk_rmap_ownedby_info	sroi;
354 355 356 357 358

	sroi.oinfo = oinfo;
	*blocks = 0;
	sroi.blocks = blocks;

D
Darrick J. Wong 已提交
359
	return xfs_rmap_query_all(cur, xchk_count_rmap_ownedby_irec,
360 361 362
			&sroi);
}

363 364 365 366 367 368 369 370
/*
 * AG scrubbing
 *
 * These helpers facilitate locking an allocation group's header
 * buffers, setting up cursors for all btrees that are present, and
 * cleaning everything up once we're through.
 */

D
Darrick J. Wong 已提交
371 372 373
/* Decide if we want to return an AG header read failure. */
static inline bool
want_ag_read_header_failure(
374
	struct xfs_scrub	*sc,
375
	unsigned int		type)
D
Darrick J. Wong 已提交
376 377 378
{
	/* Return all AG header read failures when scanning btrees. */
	if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF &&
D
Darrick J. Wong 已提交
379 380
	    sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL &&
	    sc->sm->sm_type != XFS_SCRUB_TYPE_AGI)
D
Darrick J. Wong 已提交
381 382 383 384 385 386 387 388 389 390 391
		return true;
	/*
	 * If we're scanning a given type of AG header, we only want to
	 * see read failures from that specific header.  We'd like the
	 * other headers to cross-check them, but this isn't required.
	 */
	if (sc->sm->sm_type == type)
		return true;
	return false;
}

392 393 394
/*
 * Grab all the headers for an AG.
 *
D
Darrick J. Wong 已提交
395
 * The headers should be released by xchk_ag_free, but as a fail
396 397 398 399
 * safe we attach all the buffers we grab to the scrub transaction so
 * they'll all be freed when we cancel it.
 */
int
D
Darrick J. Wong 已提交
400
xchk_ag_read_headers(
401
	struct xfs_scrub	*sc,
402 403 404 405
	xfs_agnumber_t		agno,
	struct xfs_buf		**agi,
	struct xfs_buf		**agf,
	struct xfs_buf		**agfl)
406
{
407 408
	struct xfs_mount	*mp = sc->mp;
	int			error;
409 410

	error = xfs_ialloc_read_agi(mp, sc->tp, agno, agi);
D
Darrick J. Wong 已提交
411
	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI))
412 413 414
		goto out;

	error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, agf);
D
Darrick J. Wong 已提交
415
	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF))
416 417 418
		goto out;

	error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl);
D
Darrick J. Wong 已提交
419
	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL))
420
		goto out;
421
	error = 0;
422 423 424 425 426 427
out:
	return error;
}

/* Release all the AG btree cursors. */
void
D
Darrick J. Wong 已提交
428 429
xchk_ag_btcur_free(
	struct xchk_ag		*sa)
430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453
{
	if (sa->refc_cur)
		xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR);
	if (sa->rmap_cur)
		xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR);
	if (sa->fino_cur)
		xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR);
	if (sa->ino_cur)
		xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR);
	if (sa->cnt_cur)
		xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR);
	if (sa->bno_cur)
		xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR);

	sa->refc_cur = NULL;
	sa->rmap_cur = NULL;
	sa->fino_cur = NULL;
	sa->ino_cur = NULL;
	sa->bno_cur = NULL;
	sa->cnt_cur = NULL;
}

/* Initialize all the btree cursors for an AG. */
int
D
Darrick J. Wong 已提交
454
xchk_ag_btcur_init(
455
	struct xfs_scrub	*sc,
D
Darrick J. Wong 已提交
456
	struct xchk_ag		*sa)
457
{
458 459
	struct xfs_mount	*mp = sc->mp;
	xfs_agnumber_t		agno = sa->agno;
460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501

	if (sa->agf_bp) {
		/* Set up a bnobt cursor for cross-referencing. */
		sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
				agno, XFS_BTNUM_BNO);
		if (!sa->bno_cur)
			goto err;

		/* Set up a cntbt cursor for cross-referencing. */
		sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
				agno, XFS_BTNUM_CNT);
		if (!sa->cnt_cur)
			goto err;
	}

	/* Set up a inobt cursor for cross-referencing. */
	if (sa->agi_bp) {
		sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
					agno, XFS_BTNUM_INO);
		if (!sa->ino_cur)
			goto err;
	}

	/* Set up a finobt cursor for cross-referencing. */
	if (sa->agi_bp && xfs_sb_version_hasfinobt(&mp->m_sb)) {
		sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
				agno, XFS_BTNUM_FINO);
		if (!sa->fino_cur)
			goto err;
	}

	/* Set up a rmapbt cursor for cross-referencing. */
	if (sa->agf_bp && xfs_sb_version_hasrmapbt(&mp->m_sb)) {
		sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
				agno);
		if (!sa->rmap_cur)
			goto err;
	}

	/* Set up a refcountbt cursor for cross-referencing. */
	if (sa->agf_bp && xfs_sb_version_hasreflink(&mp->m_sb)) {
		sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
502
				sa->agf_bp, agno);
503 504 505 506 507 508 509 510 511 512 513
		if (!sa->refc_cur)
			goto err;
	}

	return 0;
err:
	return -ENOMEM;
}

/* Release the AG header context and btree cursors. */
void
D
Darrick J. Wong 已提交
514
xchk_ag_free(
515
	struct xfs_scrub	*sc,
D
Darrick J. Wong 已提交
516
	struct xchk_ag		*sa)
517
{
D
Darrick J. Wong 已提交
518
	xchk_ag_btcur_free(sa);
519 520 521 522 523 524 525 526 527 528 529 530
	if (sa->agfl_bp) {
		xfs_trans_brelse(sc->tp, sa->agfl_bp);
		sa->agfl_bp = NULL;
	}
	if (sa->agf_bp) {
		xfs_trans_brelse(sc->tp, sa->agf_bp);
		sa->agf_bp = NULL;
	}
	if (sa->agi_bp) {
		xfs_trans_brelse(sc->tp, sa->agi_bp);
		sa->agi_bp = NULL;
	}
531 532 533 534
	if (sa->pag) {
		xfs_perag_put(sa->pag);
		sa->pag = NULL;
	}
535 536 537 538 539 540 541 542 543 544 545
	sa->agno = NULLAGNUMBER;
}

/*
 * For scrub, grab the AGI and the AGF headers, in that order.  Locking
 * order requires us to get the AGI before the AGF.  We use the
 * transaction to avoid deadlocking on crosslinked metadata buffers;
 * either the caller passes one in (bmap scrub) or we have to create a
 * transaction ourselves.
 */
int
D
Darrick J. Wong 已提交
546
xchk_ag_init(
547
	struct xfs_scrub	*sc,
548
	xfs_agnumber_t		agno,
D
Darrick J. Wong 已提交
549
	struct xchk_ag		*sa)
550
{
551
	int			error;
552 553

	sa->agno = agno;
D
Darrick J. Wong 已提交
554
	error = xchk_ag_read_headers(sc, agno, &sa->agi_bp,
555 556 557 558
			&sa->agf_bp, &sa->agfl_bp);
	if (error)
		return error;

D
Darrick J. Wong 已提交
559
	return xchk_ag_btcur_init(sc, sa);
560 561
}

562 563
/*
 * Grab the per-ag structure if we haven't already gotten it.  Teardown of the
D
Darrick J. Wong 已提交
564
 * xchk_ag will release it for us.
565 566
 */
void
D
Darrick J. Wong 已提交
567
xchk_perag_get(
568
	struct xfs_mount	*mp,
569
	struct xchk_ag		*sa)
570 571 572 573 574
{
	if (!sa->pag)
		sa->pag = xfs_perag_get(mp, sa->agno);
}

D
Darrick J. Wong 已提交
575 576
/* Per-scrubber setup functions */

577 578 579
/*
 * Grab an empty transaction so that we can re-grab locked buffers if
 * one of our btrees turns out to be cyclic.
580 581 582 583 584 585
 *
 * If we're going to repair something, we need to ask for the largest possible
 * log reservation so that we can handle the worst case scenario for metadata
 * updates while rebuilding a metadata item.  We also need to reserve as many
 * blocks in the head transaction as we think we're going to need to rebuild
 * the metadata object.
586 587
 */
int
D
Darrick J. Wong 已提交
588
xchk_trans_alloc(
589
	struct xfs_scrub	*sc,
590
	uint			resblks)
591
{
592 593 594 595
	if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
		return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
				resblks, 0, 0, &sc->tp);

596 597 598
	return xfs_trans_alloc_empty(sc->mp, &sc->tp);
}

D
Darrick J. Wong 已提交
599 600
/* Set us up with a transaction and an empty context. */
int
D
Darrick J. Wong 已提交
601
xchk_setup_fs(
602
	struct xfs_scrub	*sc,
603
	struct xfs_inode	*ip)
D
Darrick J. Wong 已提交
604
{
605
	uint			resblks;
606

607
	resblks = xrep_calc_ag_resblks(sc);
D
Darrick J. Wong 已提交
608
	return xchk_trans_alloc(sc, resblks);
D
Darrick J. Wong 已提交
609
}
D
Darrick J. Wong 已提交
610 611 612

/* Set us up with AG headers and btree cursors. */
int
D
Darrick J. Wong 已提交
613
xchk_setup_ag_btree(
614
	struct xfs_scrub	*sc,
615 616
	struct xfs_inode	*ip,
	bool			force_log)
D
Darrick J. Wong 已提交
617
{
618 619
	struct xfs_mount	*mp = sc->mp;
	int			error;
D
Darrick J. Wong 已提交
620

D
Darrick J. Wong 已提交
621 622 623 624 625 626 627
	/*
	 * If the caller asks us to checkpont the log, do so.  This
	 * expensive operation should be performed infrequently and only
	 * as a last resort.  Any caller that sets force_log should
	 * document why they need to do so.
	 */
	if (force_log) {
D
Darrick J. Wong 已提交
628
		error = xchk_checkpoint_log(mp);
D
Darrick J. Wong 已提交
629 630 631 632
		if (error)
			return error;
	}

D
Darrick J. Wong 已提交
633
	error = xchk_setup_fs(sc, ip);
D
Darrick J. Wong 已提交
634 635 636
	if (error)
		return error;

D
Darrick J. Wong 已提交
637
	return xchk_ag_init(sc, sc->sm->sm_agno, &sc->sa);
D
Darrick J. Wong 已提交
638
}
D
Darrick J. Wong 已提交
639 640 641

/* Push everything out of the log onto disk. */
int
D
Darrick J. Wong 已提交
642
xchk_checkpoint_log(
D
Darrick J. Wong 已提交
643 644 645 646
	struct xfs_mount	*mp)
{
	int			error;

647
	error = xfs_log_force(mp, XFS_LOG_SYNC);
D
Darrick J. Wong 已提交
648 649 650 651 652
	if (error)
		return error;
	xfs_ail_push_all_sync(mp->m_ail);
	return 0;
}
D
Darrick J. Wong 已提交
653 654 655 656 657 658 659

/*
 * Given an inode and the scrub control structure, grab either the
 * inode referenced in the control structure or the inode passed in.
 * The inode is not locked.
 */
int
D
Darrick J. Wong 已提交
660
xchk_get_inode(
661
	struct xfs_scrub	*sc,
662
	struct xfs_inode	*ip_in)
D
Darrick J. Wong 已提交
663
{
664 665 666 667
	struct xfs_imap		imap;
	struct xfs_mount	*mp = sc->mp;
	struct xfs_inode	*ip = NULL;
	int			error;
D
Darrick J. Wong 已提交
668 669 670 671 672 673 674 675 676 677 678 679

	/* We want to scan the inode we already had opened. */
	if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
		sc->ip = ip_in;
		return 0;
	}

	/* Look up the inode, see if the generation number matches. */
	if (xfs_internal_inum(mp, sc->sm->sm_ino))
		return -ENOENT;
	error = xfs_iget(mp, NULL, sc->sm->sm_ino,
			XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip);
680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706
	switch (error) {
	case -ENOENT:
		/* Inode doesn't exist, just bail out. */
		return error;
	case 0:
		/* Got an inode, continue. */
		break;
	case -EINVAL:
		/*
		 * -EINVAL with IGET_UNTRUSTED could mean one of several
		 * things: userspace gave us an inode number that doesn't
		 * correspond to fs space, or doesn't have an inobt entry;
		 * or it could simply mean that the inode buffer failed the
		 * read verifiers.
		 *
		 * Try just the inode mapping lookup -- if it succeeds, then
		 * the inode buffer verifier failed and something needs fixing.
		 * Otherwise, we really couldn't find it so tell userspace
		 * that it no longer exists.
		 */
		error = xfs_imap(sc->mp, sc->tp, sc->sm->sm_ino, &imap,
				XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE);
		if (error)
			return -ENOENT;
		error = -EFSCORRUPTED;
		/* fall through */
	default:
D
Darrick J. Wong 已提交
707
		trace_xchk_op_error(sc,
D
Darrick J. Wong 已提交
708 709 710 711 712 713
				XFS_INO_TO_AGNO(mp, sc->sm->sm_ino),
				XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
				error, __return_address);
		return error;
	}
	if (VFS_I(ip)->i_generation != sc->sm->sm_gen) {
714
		xfs_irele(ip);
D
Darrick J. Wong 已提交
715 716 717 718 719 720
		return -ENOENT;
	}

	sc->ip = ip;
	return 0;
}
D
Darrick J. Wong 已提交
721 722 723

/* Set us up to scrub a file's contents. */
int
D
Darrick J. Wong 已提交
724
xchk_setup_inode_contents(
725
	struct xfs_scrub	*sc,
726 727
	struct xfs_inode	*ip,
	unsigned int		resblks)
D
Darrick J. Wong 已提交
728
{
729
	int			error;
D
Darrick J. Wong 已提交
730

D
Darrick J. Wong 已提交
731
	error = xchk_get_inode(sc, ip);
D
Darrick J. Wong 已提交
732 733 734 735 736 737
	if (error)
		return error;

	/* Got the inode, lock it and we're ready to go. */
	sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
	xfs_ilock(sc->ip, sc->ilock_flags);
D
Darrick J. Wong 已提交
738
	error = xchk_trans_alloc(sc, resblks);
D
Darrick J. Wong 已提交
739 740 741 742 743 744 745 746 747
	if (error)
		goto out;
	sc->ilock_flags |= XFS_ILOCK_EXCL;
	xfs_ilock(sc->ip, XFS_ILOCK_EXCL);

out:
	/* scrub teardown will unlock and release the inode for us */
	return error;
}
748 749 750 751 752 753 754

/*
 * Predicate that decides if we need to evaluate the cross-reference check.
 * If there was an error accessing the cross-reference btree, just delete
 * the cursor and skip the check.
 */
bool
D
Darrick J. Wong 已提交
755
xchk_should_check_xref(
756
	struct xfs_scrub	*sc,
757 758
	int			*error,
	struct xfs_btree_cur	**curpp)
759
{
760
	/* No point in xref if we already know we're corrupt. */
D
Darrick J. Wong 已提交
761
	if (xchk_skip_xref(sc->sm))
762 763
		return false;

764 765 766 767 768 769 770 771 772 773 774 775 776 777
	if (*error == 0)
		return true;

	if (curpp) {
		/* If we've already given up on xref, just bail out. */
		if (!*curpp)
			return false;

		/* xref error, delete cursor and bail out. */
		xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR);
		*curpp = NULL;
	}

	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
D
Darrick J. Wong 已提交
778
	trace_xchk_xref_error(sc, *error, __return_address);
779 780 781 782 783 784 785 786

	/*
	 * Errors encountered during cross-referencing with another
	 * data structure should not cause this scrubber to abort.
	 */
	*error = 0;
	return false;
}
D
Darrick J. Wong 已提交
787 788 789

/* Run the structure verifiers on in-memory buffers to detect bad memory. */
void
D
Darrick J. Wong 已提交
790
xchk_buffer_recheck(
791
	struct xfs_scrub	*sc,
792
	struct xfs_buf		*bp)
D
Darrick J. Wong 已提交
793
{
794
	xfs_failaddr_t		fa;
D
Darrick J. Wong 已提交
795 796

	if (bp->b_ops == NULL) {
D
Darrick J. Wong 已提交
797
		xchk_block_set_corrupt(sc, bp);
D
Darrick J. Wong 已提交
798 799 800
		return;
	}
	if (bp->b_ops->verify_struct == NULL) {
D
Darrick J. Wong 已提交
801
		xchk_set_incomplete(sc);
D
Darrick J. Wong 已提交
802 803 804 805 806 807
		return;
	}
	fa = bp->b_ops->verify_struct(bp);
	if (!fa)
		return;
	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
D
Darrick J. Wong 已提交
808
	trace_xchk_block_error(sc, bp->b_bn, fa);
D
Darrick J. Wong 已提交
809
}
810 811 812 813 814 815

/*
 * Scrub the attr/data forks of a metadata inode.  The metadata inode must be
 * pointed to by sc->ip and the ILOCK must be held.
 */
int
D
Darrick J. Wong 已提交
816
xchk_metadata_inode_forks(
817
	struct xfs_scrub	*sc)
818
{
819 820 821
	__u32			smtype;
	bool			shared;
	int			error;
822 823 824 825 826 827

	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
		return 0;

	/* Metadata inodes don't live on the rt device. */
	if (sc->ip->i_d.di_flags & XFS_DIFLAG_REALTIME) {
D
Darrick J. Wong 已提交
828
		xchk_ino_set_corrupt(sc, sc->ip->i_ino);
829 830 831 832 833
		return 0;
	}

	/* They should never participate in reflink. */
	if (xfs_is_reflink_inode(sc->ip)) {
D
Darrick J. Wong 已提交
834
		xchk_ino_set_corrupt(sc, sc->ip->i_ino);
835 836 837 838 839
		return 0;
	}

	/* They also should never have extended attributes. */
	if (xfs_inode_hasattr(sc->ip)) {
D
Darrick J. Wong 已提交
840
		xchk_ino_set_corrupt(sc, sc->ip->i_ino);
841 842 843 844 845 846
		return 0;
	}

	/* Invoke the data fork scrubber. */
	smtype = sc->sm->sm_type;
	sc->sm->sm_type = XFS_SCRUB_TYPE_BMBTD;
D
Darrick J. Wong 已提交
847
	error = xchk_bmap_data(sc);
848 849 850 851 852 853 854 855
	sc->sm->sm_type = smtype;
	if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
		return error;

	/* Look for incorrect shared blocks. */
	if (xfs_sb_version_hasreflink(&sc->mp->m_sb)) {
		error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
				&shared);
D
Darrick J. Wong 已提交
856
		if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0,
857 858 859
				&error))
			return error;
		if (shared)
D
Darrick J. Wong 已提交
860
			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
861 862 863 864
	}

	return error;
}
865 866 867 868 869 870 871 872 873

/*
 * Try to lock an inode in violation of the usual locking order rules.  For
 * example, trying to get the IOLOCK while in transaction context, or just
 * plain breaking AG-order or inode-order inode locking rules.  Either way,
 * the only way to avoid an ABBA deadlock is to use trylock and back off if
 * we can't.
 */
int
D
Darrick J. Wong 已提交
874
xchk_ilock_inverted(
875 876 877 878 879 880 881 882 883 884 885 886
	struct xfs_inode	*ip,
	uint			lock_mode)
{
	int			i;

	for (i = 0; i < 20; i++) {
		if (xfs_ilock_nowait(ip, lock_mode))
			return 0;
		delay(1);
	}
	return -EDEADLOCK;
}