tree-checker.c 53.3 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * Copyright (C) Qu Wenruo 2017.  All rights reserved.
 */

/*
 * The module is used to catch unexpected/corrupted tree block data.
 * Such behavior can be caused either by a fuzzed image or bugs.
 *
 * The objective is to do leaf/node validation checks when tree block is read
 * from disk, and check *every* possible member, so other code won't
 * need to checking them again.
 *
 * Due to the potential and unwanted damage, every checker needs to be
 * carefully reviewed otherwise so it does not prevent mount of valid images.
 */

18 19 20
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/error-injection.h>
21 22 23 24
#include "ctree.h"
#include "tree-checker.h"
#include "disk-io.h"
#include "compression.h"
25
#include "volumes.h"
26
#include "misc.h"
27

28 29 30 31 32 33
/*
 * Error message should follow the following format:
 * corrupt <type>: <identifier>, <reason>[, <bad_value>]
 *
 * @type:	leaf or node
 * @identifier:	the necessary info to locate the leaf/node.
34
 * 		It's recommended to decode key.objecitd/offset if it's
35 36
 * 		meaningful.
 * @reason:	describe the error
37
 * @bad_value:	optional, it's recommended to output bad value and its
38 39 40 41 42 43 44 45 46 47
 *		expected value (range).
 *
 * Since comma is used to separate the components, only space is allowed
 * inside each component.
 */

/*
 * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
 * Allows callers to customize the output.
 */
48
__printf(3, 4)
49
__cold
50
static void generic_err(const struct extent_buffer *eb, int slot,
51 52
			const char *fmt, ...)
{
53
	const struct btrfs_fs_info *fs_info = eb->fs_info;
54 55 56 57 58 59 60 61
	struct va_format vaf;
	va_list args;

	va_start(args, fmt);

	vaf.fmt = fmt;
	vaf.va = &args;

62
	btrfs_crit(fs_info,
63 64
		"corrupt %s: root=%llu block=%llu slot=%d, %pV",
		btrfs_header_level(eb) == 0 ? "leaf" : "node",
65
		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, &vaf);
66 67 68
	va_end(args);
}

69 70 71 72
/*
 * Customized reporter for extent data item, since its key objectid and
 * offset has its own meaning.
 */
73
__printf(3, 4)
74
__cold
75
static void file_extent_err(const struct extent_buffer *eb, int slot,
76 77
			    const char *fmt, ...)
{
78
	const struct btrfs_fs_info *fs_info = eb->fs_info;
79 80 81 82 83 84 85 86 87 88
	struct btrfs_key key;
	struct va_format vaf;
	va_list args;

	btrfs_item_key_to_cpu(eb, &key, slot);
	va_start(args, fmt);

	vaf.fmt = fmt;
	vaf.va = &args;

89
	btrfs_crit(fs_info,
90
	"corrupt %s: root=%llu block=%llu slot=%d ino=%llu file_offset=%llu, %pV",
91 92 93
		btrfs_header_level(eb) == 0 ? "leaf" : "node",
		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
		key.objectid, key.offset, &vaf);
94 95 96 97 98 99 100
	va_end(args);
}

/*
 * Return 0 if the btrfs_file_extent_##name is aligned to @alignment
 * Else return 1
 */
101
#define CHECK_FE_ALIGNED(leaf, slot, fi, name, alignment)		      \
102
({									      \
103 104
	if (unlikely(!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)),      \
				 (alignment))))				      \
105
		file_extent_err((leaf), (slot),				      \
106 107 108 109 110 111
	"invalid %s for file extent, have %llu, should be aligned to %u",     \
			(#name), btrfs_file_extent_##name((leaf), (fi)),      \
			(alignment));					      \
	(!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment)));   \
})

112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
static u64 file_extent_end(struct extent_buffer *leaf,
			   struct btrfs_key *key,
			   struct btrfs_file_extent_item *extent)
{
	u64 end;
	u64 len;

	if (btrfs_file_extent_type(leaf, extent) == BTRFS_FILE_EXTENT_INLINE) {
		len = btrfs_file_extent_ram_bytes(leaf, extent);
		end = ALIGN(key->offset + len, leaf->fs_info->sectorsize);
	} else {
		len = btrfs_file_extent_num_bytes(leaf, extent);
		end = key->offset + len;
	}
	return end;
}

129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
/*
 * Customized report for dir_item, the only new important information is
 * key->objectid, which represents inode number
 */
__printf(3, 4)
__cold
static void dir_item_err(const struct extent_buffer *eb, int slot,
			 const char *fmt, ...)
{
	const struct btrfs_fs_info *fs_info = eb->fs_info;
	struct btrfs_key key;
	struct va_format vaf;
	va_list args;

	btrfs_item_key_to_cpu(eb, &key, slot);
	va_start(args, fmt);

	vaf.fmt = fmt;
	vaf.va = &args;

	btrfs_crit(fs_info,
		"corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
		btrfs_header_level(eb) == 0 ? "leaf" : "node",
		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
		key.objectid, &vaf);
	va_end(args);
}

/*
 * This functions checks prev_key->objectid, to ensure current key and prev_key
 * share the same objectid as inode number.
 *
 * This is to detect missing INODE_ITEM in subvolume trees.
 *
 * Return true if everything is OK or we don't need to check.
 * Return false if anything is wrong.
 */
static bool check_prev_ino(struct extent_buffer *leaf,
			   struct btrfs_key *key, int slot,
			   struct btrfs_key *prev_key)
{
	/* No prev key, skip check */
	if (slot == 0)
		return true;

	/* Only these key->types needs to be checked */
	ASSERT(key->type == BTRFS_XATTR_ITEM_KEY ||
	       key->type == BTRFS_INODE_REF_KEY ||
	       key->type == BTRFS_DIR_INDEX_KEY ||
	       key->type == BTRFS_DIR_ITEM_KEY ||
	       key->type == BTRFS_EXTENT_DATA_KEY);

	/*
	 * Only subvolume trees along with their reloc trees need this check.
	 * Things like log tree doesn't follow this ino requirement.
	 */
	if (!is_fstree(btrfs_header_owner(leaf)))
		return true;

	if (key->objectid == prev_key->objectid)
		return true;

	/* Error found */
	dir_item_err(leaf, slot,
		"invalid previous key objectid, have %llu expect %llu",
		prev_key->objectid, key->objectid);
	return false;
}
197
static int check_extent_data_item(struct extent_buffer *leaf,
198 199
				  struct btrfs_key *key, int slot,
				  struct btrfs_key *prev_key)
200
{
201
	struct btrfs_fs_info *fs_info = leaf->fs_info;
202
	struct btrfs_file_extent_item *fi;
203
	u32 sectorsize = fs_info->sectorsize;
204
	u32 item_size = btrfs_item_size_nr(leaf, slot);
205
	u64 extent_end;
206

207
	if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) {
208
		file_extent_err(leaf, slot,
209 210
"unaligned file_offset for file extent, have %llu should be aligned to %u",
			key->offset, sectorsize);
211 212 213
		return -EUCLEAN;
	}

214 215 216 217 218 219
	/*
	 * Previous key must have the same key->objectid (ino).
	 * It can be XATTR_ITEM, INODE_ITEM or just another EXTENT_DATA.
	 * But if objectids mismatch, it means we have a missing
	 * INODE_ITEM.
	 */
220
	if (unlikely(!check_prev_ino(leaf, key, slot, prev_key)))
221 222
		return -EUCLEAN;

223 224
	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);

225 226 227 228
	/*
	 * Make sure the item contains at least inline header, so the file
	 * extent type is not some garbage.
	 */
229
	if (unlikely(item_size < BTRFS_FILE_EXTENT_INLINE_DATA_START)) {
230
		file_extent_err(leaf, slot,
231
				"invalid item size, have %u expect [%zu, %u)",
232 233 234 235
				item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START,
				SZ_4K);
		return -EUCLEAN;
	}
236 237
	if (unlikely(btrfs_file_extent_type(leaf, fi) >=
		     BTRFS_NR_FILE_EXTENT_TYPES)) {
238
		file_extent_err(leaf, slot,
239 240
		"invalid type for file extent, have %u expect range [0, %u]",
			btrfs_file_extent_type(leaf, fi),
241
			BTRFS_NR_FILE_EXTENT_TYPES - 1);
242 243 244 245
		return -EUCLEAN;
	}

	/*
246
	 * Support for new compression/encryption must introduce incompat flag,
247 248
	 * and must be caught in open_ctree().
	 */
249 250
	if (unlikely(btrfs_file_extent_compression(leaf, fi) >=
		     BTRFS_NR_COMPRESS_TYPES)) {
251
		file_extent_err(leaf, slot,
252 253
	"invalid compression for file extent, have %u expect range [0, %u]",
			btrfs_file_extent_compression(leaf, fi),
254
			BTRFS_NR_COMPRESS_TYPES - 1);
255 256
		return -EUCLEAN;
	}
257
	if (unlikely(btrfs_file_extent_encryption(leaf, fi))) {
258
		file_extent_err(leaf, slot,
259 260
			"invalid encryption for file extent, have %u expect 0",
			btrfs_file_extent_encryption(leaf, fi));
261 262 263 264
		return -EUCLEAN;
	}
	if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
		/* Inline extent must have 0 as key offset */
265
		if (unlikely(key->offset)) {
266
			file_extent_err(leaf, slot,
267 268
		"invalid file_offset for inline file extent, have %llu expect 0",
				key->offset);
269 270 271 272 273 274 275 276 277
			return -EUCLEAN;
		}

		/* Compressed inline extent has no on-disk size, skip it */
		if (btrfs_file_extent_compression(leaf, fi) !=
		    BTRFS_COMPRESS_NONE)
			return 0;

		/* Uncompressed inline extent size must match item size */
278 279
		if (unlikely(item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
					  btrfs_file_extent_ram_bytes(leaf, fi))) {
280
			file_extent_err(leaf, slot,
281 282 283
	"invalid ram_bytes for uncompressed inline extent, have %u expect %llu",
				item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START +
				btrfs_file_extent_ram_bytes(leaf, fi));
284 285 286 287 288 289
			return -EUCLEAN;
		}
		return 0;
	}

	/* Regular or preallocated extent has fixed item size */
290
	if (unlikely(item_size != sizeof(*fi))) {
291
		file_extent_err(leaf, slot,
292
	"invalid item size for reg/prealloc file extent, have %u expect %zu",
293
			item_size, sizeof(*fi));
294 295
		return -EUCLEAN;
	}
296 297 298 299 300
	if (unlikely(CHECK_FE_ALIGNED(leaf, slot, fi, ram_bytes, sectorsize) ||
		     CHECK_FE_ALIGNED(leaf, slot, fi, disk_bytenr, sectorsize) ||
		     CHECK_FE_ALIGNED(leaf, slot, fi, disk_num_bytes, sectorsize) ||
		     CHECK_FE_ALIGNED(leaf, slot, fi, offset, sectorsize) ||
		     CHECK_FE_ALIGNED(leaf, slot, fi, num_bytes, sectorsize)))
301
		return -EUCLEAN;
302

303
	/* Catch extent end overflow */
304 305
	if (unlikely(check_add_overflow(btrfs_file_extent_num_bytes(leaf, fi),
					key->offset, &extent_end))) {
306 307 308 309 310 311 312
		file_extent_err(leaf, slot,
	"extent end overflow, have file offset %llu extent num bytes %llu",
				key->offset,
				btrfs_file_extent_num_bytes(leaf, fi));
		return -EUCLEAN;
	}

313 314 315 316 317 318 319 320 321 322 323 324 325
	/*
	 * Check that no two consecutive file extent items, in the same leaf,
	 * present ranges that overlap each other.
	 */
	if (slot > 0 &&
	    prev_key->objectid == key->objectid &&
	    prev_key->type == BTRFS_EXTENT_DATA_KEY) {
		struct btrfs_file_extent_item *prev_fi;
		u64 prev_end;

		prev_fi = btrfs_item_ptr(leaf, slot - 1,
					 struct btrfs_file_extent_item);
		prev_end = file_extent_end(leaf, prev_key, prev_fi);
326
		if (unlikely(prev_end > key->offset)) {
327 328 329 330 331 332 333
			file_extent_err(leaf, slot - 1,
"file extent end range (%llu) goes beyond start offset (%llu) of the next file extent",
					prev_end, key->offset);
			return -EUCLEAN;
		}
	}

334 335 336
	return 0;
}

337
static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
338
			   int slot, struct btrfs_key *prev_key)
339
{
340
	struct btrfs_fs_info *fs_info = leaf->fs_info;
341
	u32 sectorsize = fs_info->sectorsize;
342
	const u32 csumsize = fs_info->csum_size;
343

344
	if (unlikely(key->objectid != BTRFS_EXTENT_CSUM_OBJECTID)) {
345
		generic_err(leaf, slot,
346 347
		"invalid key objectid for csum item, have %llu expect %llu",
			key->objectid, BTRFS_EXTENT_CSUM_OBJECTID);
348 349
		return -EUCLEAN;
	}
350
	if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) {
351
		generic_err(leaf, slot,
352 353
	"unaligned key offset for csum item, have %llu should be aligned to %u",
			key->offset, sectorsize);
354 355
		return -EUCLEAN;
	}
356
	if (unlikely(!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize))) {
357
		generic_err(leaf, slot,
358 359
	"unaligned item size for csum item, have %u should be aligned to %u",
			btrfs_item_size_nr(leaf, slot), csumsize);
360 361
		return -EUCLEAN;
	}
362 363 364 365 366 367 368
	if (slot > 0 && prev_key->type == BTRFS_EXTENT_CSUM_KEY) {
		u64 prev_csum_end;
		u32 prev_item_size;

		prev_item_size = btrfs_item_size_nr(leaf, slot - 1);
		prev_csum_end = (prev_item_size / csumsize) * sectorsize;
		prev_csum_end += prev_key->offset;
369
		if (unlikely(prev_csum_end > key->offset)) {
370 371 372 373 374 375
			generic_err(leaf, slot - 1,
"csum end range (%llu) goes beyond the start range (%llu) of the next csum item",
				    prev_csum_end, key->offset);
			return -EUCLEAN;
		}
	}
376 377 378
	return 0;
}

379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
/* Inode item error output has the same format as dir_item_err() */
#define inode_item_err(eb, slot, fmt, ...)			\
	dir_item_err(eb, slot, fmt, __VA_ARGS__)

static int check_inode_key(struct extent_buffer *leaf, struct btrfs_key *key,
			   int slot)
{
	struct btrfs_key item_key;
	bool is_inode_item;

	btrfs_item_key_to_cpu(leaf, &item_key, slot);
	is_inode_item = (item_key.type == BTRFS_INODE_ITEM_KEY);

	/* For XATTR_ITEM, location key should be all 0 */
	if (item_key.type == BTRFS_XATTR_ITEM_KEY) {
394 395
		if (unlikely(key->objectid != 0 || key->type != 0 ||
			     key->offset != 0))
396 397 398 399
			return -EUCLEAN;
		return 0;
	}

400 401 402 403
	if (unlikely((key->objectid < BTRFS_FIRST_FREE_OBJECTID ||
		      key->objectid > BTRFS_LAST_FREE_OBJECTID) &&
		     key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID &&
		     key->objectid != BTRFS_FREE_INO_OBJECTID)) {
404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
		if (is_inode_item) {
			generic_err(leaf, slot,
	"invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu",
				key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID,
				BTRFS_FIRST_FREE_OBJECTID,
				BTRFS_LAST_FREE_OBJECTID,
				BTRFS_FREE_INO_OBJECTID);
		} else {
			dir_item_err(leaf, slot,
"invalid location key objectid: has %llu expect %llu or [%llu, %llu] or %llu",
				key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID,
				BTRFS_FIRST_FREE_OBJECTID,
				BTRFS_LAST_FREE_OBJECTID,
				BTRFS_FREE_INO_OBJECTID);
		}
		return -EUCLEAN;
	}
421
	if (unlikely(key->offset != 0)) {
422 423 424 425 426 427 428 429 430 431 432 433 434
		if (is_inode_item)
			inode_item_err(leaf, slot,
				       "invalid key offset: has %llu expect 0",
				       key->offset);
		else
			dir_item_err(leaf, slot,
				"invalid location key offset:has %llu expect 0",
				key->offset);
		return -EUCLEAN;
	}
	return 0;
}

435 436 437 438 439 440 441 442 443 444
static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key,
			  int slot)
{
	struct btrfs_key item_key;
	bool is_root_item;

	btrfs_item_key_to_cpu(leaf, &item_key, slot);
	is_root_item = (item_key.type == BTRFS_ROOT_ITEM_KEY);

	/* No such tree id */
445
	if (unlikely(key->objectid == 0)) {
446 447 448 449 450 451 452 453 454
		if (is_root_item)
			generic_err(leaf, slot, "invalid root id 0");
		else
			dir_item_err(leaf, slot,
				     "invalid location key root id 0");
		return -EUCLEAN;
	}

	/* DIR_ITEM/INDEX/INODE_REF is not allowed to point to non-fs trees */
455
	if (unlikely(!is_fstree(key->objectid) && !is_root_item)) {
456 457 458 459 460 461 462 463 464 465 466 467 468 469 470
		dir_item_err(leaf, slot,
		"invalid location key objectid, have %llu expect [%llu, %llu]",
				key->objectid, BTRFS_FIRST_FREE_OBJECTID,
				BTRFS_LAST_FREE_OBJECTID);
		return -EUCLEAN;
	}

	/*
	 * ROOT_ITEM with non-zero offset means this is a snapshot, created at
	 * @offset transid.
	 * Furthermore, for location key in DIR_ITEM, its offset is always -1.
	 *
	 * So here we only check offset for reloc tree whose key->offset must
	 * be a valid tree.
	 */
471 472
	if (unlikely(key->objectid == BTRFS_TREE_RELOC_OBJECTID &&
		     key->offset == 0)) {
473 474 475 476 477 478
		generic_err(leaf, slot, "invalid root id 0 for reloc tree");
		return -EUCLEAN;
	}
	return 0;
}

479
static int check_dir_item(struct extent_buffer *leaf,
480 481
			  struct btrfs_key *key, struct btrfs_key *prev_key,
			  int slot)
482
{
483
	struct btrfs_fs_info *fs_info = leaf->fs_info;
484 485 486 487
	struct btrfs_dir_item *di;
	u32 item_size = btrfs_item_size_nr(leaf, slot);
	u32 cur = 0;

488
	if (unlikely(!check_prev_ino(leaf, key, slot, prev_key)))
489
		return -EUCLEAN;
490

491 492
	di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
	while (cur < item_size) {
493
		struct btrfs_key location_key;
494 495 496 497 498 499
		u32 name_len;
		u32 data_len;
		u32 max_name_len;
		u32 total_size;
		u32 name_hash;
		u8 dir_type;
500
		int ret;
501 502

		/* header itself should not cross item boundary */
503
		if (unlikely(cur + sizeof(*di) > item_size)) {
504
			dir_item_err(leaf, slot,
505
		"dir item header crosses item boundary, have %zu boundary %u",
506 507 508 509
				cur + sizeof(*di), item_size);
			return -EUCLEAN;
		}

510 511 512 513
		/* Location key check */
		btrfs_dir_item_key_to_cpu(leaf, di, &location_key);
		if (location_key.type == BTRFS_ROOT_ITEM_KEY) {
			ret = check_root_key(leaf, &location_key, slot);
514
			if (unlikely(ret < 0))
515 516 517 518
				return ret;
		} else if (location_key.type == BTRFS_INODE_ITEM_KEY ||
			   location_key.type == 0) {
			ret = check_inode_key(leaf, &location_key, slot);
519
			if (unlikely(ret < 0))
520 521 522 523 524 525 526 527 528
				return ret;
		} else {
			dir_item_err(leaf, slot,
			"invalid location key type, have %u, expect %u or %u",
				     location_key.type, BTRFS_ROOT_ITEM_KEY,
				     BTRFS_INODE_ITEM_KEY);
			return -EUCLEAN;
		}

529 530
		/* dir type check */
		dir_type = btrfs_dir_type(leaf, di);
531
		if (unlikely(dir_type >= BTRFS_FT_MAX)) {
532
			dir_item_err(leaf, slot,
533 534 535 536 537
			"invalid dir item type, have %u expect [0, %u)",
				dir_type, BTRFS_FT_MAX);
			return -EUCLEAN;
		}

538 539
		if (unlikely(key->type == BTRFS_XATTR_ITEM_KEY &&
			     dir_type != BTRFS_FT_XATTR)) {
540
			dir_item_err(leaf, slot,
541 542 543 544
		"invalid dir item type for XATTR key, have %u expect %u",
				dir_type, BTRFS_FT_XATTR);
			return -EUCLEAN;
		}
545 546
		if (unlikely(dir_type == BTRFS_FT_XATTR &&
			     key->type != BTRFS_XATTR_ITEM_KEY)) {
547
			dir_item_err(leaf, slot,
548 549 550 551 552 553 554 555 556 557 558
			"xattr dir type found for non-XATTR key");
			return -EUCLEAN;
		}
		if (dir_type == BTRFS_FT_XATTR)
			max_name_len = XATTR_NAME_MAX;
		else
			max_name_len = BTRFS_NAME_LEN;

		/* Name/data length check */
		name_len = btrfs_dir_name_len(leaf, di);
		data_len = btrfs_dir_data_len(leaf, di);
559
		if (unlikely(name_len > max_name_len)) {
560
			dir_item_err(leaf, slot,
561 562 563 564
			"dir item name len too long, have %u max %u",
				name_len, max_name_len);
			return -EUCLEAN;
		}
565
		if (unlikely(name_len + data_len > BTRFS_MAX_XATTR_SIZE(fs_info))) {
566
			dir_item_err(leaf, slot,
567 568
			"dir item name and data len too long, have %u max %u",
				name_len + data_len,
569
				BTRFS_MAX_XATTR_SIZE(fs_info));
570 571 572
			return -EUCLEAN;
		}

573
		if (unlikely(data_len && dir_type != BTRFS_FT_XATTR)) {
574
			dir_item_err(leaf, slot,
575 576 577 578 579 580 581 582
			"dir item with invalid data len, have %u expect 0",
				data_len);
			return -EUCLEAN;
		}

		total_size = sizeof(*di) + name_len + data_len;

		/* header and name/data should not cross item boundary */
583
		if (unlikely(cur + total_size > item_size)) {
584
			dir_item_err(leaf, slot,
585 586 587 588 589 590 591 592 593 594 595
		"dir item data crosses item boundary, have %u boundary %u",
				cur + total_size, item_size);
			return -EUCLEAN;
		}

		/*
		 * Special check for XATTR/DIR_ITEM, as key->offset is name
		 * hash, should match its name
		 */
		if (key->type == BTRFS_DIR_ITEM_KEY ||
		    key->type == BTRFS_XATTR_ITEM_KEY) {
596 597
			char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];

598 599 600
			read_extent_buffer(leaf, namebuf,
					(unsigned long)(di + 1), name_len);
			name_hash = btrfs_name_hash(namebuf, name_len);
601
			if (unlikely(key->offset != name_hash)) {
602
				dir_item_err(leaf, slot,
603 604 605 606 607 608 609 610 611 612 613
		"name hash mismatch with key, have 0x%016x expect 0x%016llx",
					name_hash, key->offset);
				return -EUCLEAN;
			}
		}
		cur += total_size;
		di = (struct btrfs_dir_item *)((void *)di + total_size);
	}
	return 0;
}

614
__printf(3, 4)
615
__cold
616
static void block_group_err(const struct extent_buffer *eb, int slot,
617 618
			    const char *fmt, ...)
{
619
	const struct btrfs_fs_info *fs_info = eb->fs_info;
620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637
	struct btrfs_key key;
	struct va_format vaf;
	va_list args;

	btrfs_item_key_to_cpu(eb, &key, slot);
	va_start(args, fmt);

	vaf.fmt = fmt;
	vaf.va = &args;

	btrfs_crit(fs_info,
	"corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
		btrfs_header_level(eb) == 0 ? "leaf" : "node",
		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
		key.objectid, key.offset, &vaf);
	va_end(args);
}

638
static int check_block_group_item(struct extent_buffer *leaf,
639 640 641 642 643 644 645 646 647
				  struct btrfs_key *key, int slot)
{
	struct btrfs_block_group_item bgi;
	u32 item_size = btrfs_item_size_nr(leaf, slot);
	u64 flags;
	u64 type;

	/*
	 * Here we don't really care about alignment since extent allocator can
648
	 * handle it.  We care more about the size.
649
	 */
650
	if (unlikely(key->offset == 0)) {
651
		block_group_err(leaf, slot,
652
				"invalid block group size 0");
653 654 655
		return -EUCLEAN;
	}

656
	if (unlikely(item_size != sizeof(bgi))) {
657
		block_group_err(leaf, slot,
658 659 660 661 662 663 664
			"invalid item size, have %u expect %zu",
				item_size, sizeof(bgi));
		return -EUCLEAN;
	}

	read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
			   sizeof(bgi));
665 666
	if (unlikely(btrfs_stack_block_group_chunk_objectid(&bgi) !=
		     BTRFS_FIRST_CHUNK_TREE_OBJECTID)) {
667
		block_group_err(leaf, slot,
668
		"invalid block group chunk objectid, have %llu expect %llu",
669
				btrfs_stack_block_group_chunk_objectid(&bgi),
670 671 672 673
				BTRFS_FIRST_CHUNK_TREE_OBJECTID);
		return -EUCLEAN;
	}

674
	if (unlikely(btrfs_stack_block_group_used(&bgi) > key->offset)) {
675
		block_group_err(leaf, slot,
676
			"invalid block group used, have %llu expect [0, %llu)",
677
				btrfs_stack_block_group_used(&bgi), key->offset);
678 679 680
		return -EUCLEAN;
	}

681
	flags = btrfs_stack_block_group_flags(&bgi);
682
	if (unlikely(hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1)) {
683
		block_group_err(leaf, slot,
684 685 686 687 688 689 690
"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
			flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
			hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
		return -EUCLEAN;
	}

	type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
691 692 693 694 695
	if (unlikely(type != BTRFS_BLOCK_GROUP_DATA &&
		     type != BTRFS_BLOCK_GROUP_METADATA &&
		     type != BTRFS_BLOCK_GROUP_SYSTEM &&
		     type != (BTRFS_BLOCK_GROUP_METADATA |
			      BTRFS_BLOCK_GROUP_DATA))) {
696
		block_group_err(leaf, slot,
697
"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
698 699 700 701 702 703 704
			type, hweight64(type),
			BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
			BTRFS_BLOCK_GROUP_SYSTEM,
			BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
		return -EUCLEAN;
	}
	return 0;
705 706
}

707
__printf(4, 5)
708
__cold
709
static void chunk_err(const struct extent_buffer *leaf,
710 711 712
		      const struct btrfs_chunk *chunk, u64 logical,
		      const char *fmt, ...)
{
713
	const struct btrfs_fs_info *fs_info = leaf->fs_info;
714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751
	bool is_sb;
	struct va_format vaf;
	va_list args;
	int i;
	int slot = -1;

	/* Only superblock eb is able to have such small offset */
	is_sb = (leaf->start == BTRFS_SUPER_INFO_OFFSET);

	if (!is_sb) {
		/*
		 * Get the slot number by iterating through all slots, this
		 * would provide better readability.
		 */
		for (i = 0; i < btrfs_header_nritems(leaf); i++) {
			if (btrfs_item_ptr_offset(leaf, i) ==
					(unsigned long)chunk) {
				slot = i;
				break;
			}
		}
	}
	va_start(args, fmt);
	vaf.fmt = fmt;
	vaf.va = &args;

	if (is_sb)
		btrfs_crit(fs_info,
		"corrupt superblock syschunk array: chunk_start=%llu, %pV",
			   logical, &vaf);
	else
		btrfs_crit(fs_info,
	"corrupt leaf: root=%llu block=%llu slot=%d chunk_start=%llu, %pV",
			   BTRFS_CHUNK_TREE_OBJECTID, leaf->start, slot,
			   logical, &vaf);
	va_end(args);
}

752 753 754
/*
 * The common chunk check which could also work on super block sys chunk array.
 *
755
 * Return -EUCLEAN if anything is corrupted.
756 757
 * Return 0 if everything is OK.
 */
758
int btrfs_check_chunk_valid(struct extent_buffer *leaf,
759 760
			    struct btrfs_chunk *chunk, u64 logical)
{
761
	struct btrfs_fs_info *fs_info = leaf->fs_info;
762
	u64 length;
763
	u64 chunk_end;
764 765 766 767 768 769
	u64 stripe_len;
	u16 num_stripes;
	u16 sub_stripes;
	u64 type;
	u64 features;
	bool mixed = false;
770 771 772
	int raid_index;
	int nparity;
	int ncopies;
773 774 775 776 777 778

	length = btrfs_chunk_length(leaf, chunk);
	stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
	num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
	sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
	type = btrfs_chunk_type(leaf, chunk);
779 780 781
	raid_index = btrfs_bg_flags_to_raid_index(type);
	ncopies = btrfs_raid_array[raid_index].ncopies;
	nparity = btrfs_raid_array[raid_index].nparity;
782

783
	if (unlikely(!num_stripes)) {
784
		chunk_err(leaf, chunk, logical,
785
			  "invalid chunk num_stripes, have %u", num_stripes);
786
		return -EUCLEAN;
787
	}
788
	if (unlikely(num_stripes < ncopies)) {
789 790 791 792 793
		chunk_err(leaf, chunk, logical,
			  "invalid chunk num_stripes < ncopies, have %u < %d",
			  num_stripes, ncopies);
		return -EUCLEAN;
	}
794
	if (unlikely(nparity && num_stripes == nparity)) {
795 796 797 798 799
		chunk_err(leaf, chunk, logical,
			  "invalid chunk num_stripes == nparity, have %u == %d",
			  num_stripes, nparity);
		return -EUCLEAN;
	}
800
	if (unlikely(!IS_ALIGNED(logical, fs_info->sectorsize))) {
801
		chunk_err(leaf, chunk, logical,
802 803
		"invalid chunk logical, have %llu should aligned to %u",
			  logical, fs_info->sectorsize);
804
		return -EUCLEAN;
805
	}
806
	if (unlikely(btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize)) {
807
		chunk_err(leaf, chunk, logical,
808 809 810
			  "invalid chunk sectorsize, have %u expect %u",
			  btrfs_chunk_sector_size(leaf, chunk),
			  fs_info->sectorsize);
811
		return -EUCLEAN;
812
	}
813
	if (unlikely(!length || !IS_ALIGNED(length, fs_info->sectorsize))) {
814
		chunk_err(leaf, chunk, logical,
815
			  "invalid chunk length, have %llu", length);
816
		return -EUCLEAN;
817
	}
818 819 820 821 822 823
	if (unlikely(check_add_overflow(logical, length, &chunk_end))) {
		chunk_err(leaf, chunk, logical,
"invalid chunk logical start and length, have logical start %llu length %llu",
			  logical, length);
		return -EUCLEAN;
	}
824
	if (unlikely(!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN)) {
825
		chunk_err(leaf, chunk, logical,
826
			  "invalid chunk stripe length: %llu",
827
			  stripe_len);
828
		return -EUCLEAN;
829
	}
830 831
	if (unlikely(type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
			      BTRFS_BLOCK_GROUP_PROFILE_MASK))) {
832
		chunk_err(leaf, chunk, logical,
833
			  "unrecognized chunk type: 0x%llx",
834 835 836
			  ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
			    BTRFS_BLOCK_GROUP_PROFILE_MASK) &
			  btrfs_chunk_type(leaf, chunk));
837
		return -EUCLEAN;
838 839
	}

840 841
	if (unlikely(!has_single_bit_set(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
		     (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0)) {
842
		chunk_err(leaf, chunk, logical,
843 844 845 846
		"invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set",
			  type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
		return -EUCLEAN;
	}
847
	if (unlikely((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)) {
848
		chunk_err(leaf, chunk, logical,
849 850
	"missing chunk type flag, have 0x%llx one bit must be set in 0x%llx",
			  type, BTRFS_BLOCK_GROUP_TYPE_MASK);
851
		return -EUCLEAN;
852 853
	}

854 855 856
	if (unlikely((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
		     (type & (BTRFS_BLOCK_GROUP_METADATA |
			      BTRFS_BLOCK_GROUP_DATA)))) {
857
		chunk_err(leaf, chunk, logical,
858 859
			  "system chunk with data or metadata type: 0x%llx",
			  type);
860
		return -EUCLEAN;
861 862 863 864 865 866 867
	}

	features = btrfs_super_incompat_flags(fs_info->super_copy);
	if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
		mixed = true;

	if (!mixed) {
868 869
		if (unlikely((type & BTRFS_BLOCK_GROUP_METADATA) &&
			     (type & BTRFS_BLOCK_GROUP_DATA))) {
870
			chunk_err(leaf, chunk, logical,
871
			"mixed chunk type in non-mixed mode: 0x%llx", type);
872
			return -EUCLEAN;
873 874 875
		}
	}

876 877 878 879 880 881 882
	if (unlikely((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
		     (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) ||
		     (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
		     (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
		     (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) ||
		     ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
		      num_stripes != 1))) {
883
		chunk_err(leaf, chunk, logical,
884 885 886
			"invalid num_stripes:sub_stripes %u:%u for profile %llu",
			num_stripes, sub_stripes,
			type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
887
		return -EUCLEAN;
888 889 890
	}

	return 0;
891 892
}

893 894 895 896 897 898 899 900 901 902 903 904
/*
 * Enhanced version of chunk item checker.
 *
 * The common btrfs_check_chunk_valid() doesn't check item size since it needs
 * to work on super block sys_chunk_array which doesn't have full item ptr.
 */
static int check_leaf_chunk_item(struct extent_buffer *leaf,
				 struct btrfs_chunk *chunk,
				 struct btrfs_key *key, int slot)
{
	int num_stripes;

905
	if (unlikely(btrfs_item_size_nr(leaf, slot) < sizeof(struct btrfs_chunk))) {
906 907 908 909 910 911 912 913 914 915 916 917 918
		chunk_err(leaf, chunk, key->offset,
			"invalid chunk item size: have %u expect [%zu, %u)",
			btrfs_item_size_nr(leaf, slot),
			sizeof(struct btrfs_chunk),
			BTRFS_LEAF_DATA_SIZE(leaf->fs_info));
		return -EUCLEAN;
	}

	num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
	/* Let btrfs_check_chunk_valid() handle this error type */
	if (num_stripes == 0)
		goto out;

919 920
	if (unlikely(btrfs_chunk_item_size(num_stripes) !=
		     btrfs_item_size_nr(leaf, slot))) {
921 922 923 924 925 926 927 928 929 930
		chunk_err(leaf, chunk, key->offset,
			"invalid chunk item size: have %u expect %lu",
			btrfs_item_size_nr(leaf, slot),
			btrfs_chunk_item_size(num_stripes));
		return -EUCLEAN;
	}
out:
	return btrfs_check_chunk_valid(leaf, chunk, key->offset);
}

931
__printf(3, 4)
Q
Qu Wenruo 已提交
932
__cold
933
static void dev_item_err(const struct extent_buffer *eb, int slot,
Q
Qu Wenruo 已提交
934 935 936 937 938 939 940 941 942 943 944 945
			 const char *fmt, ...)
{
	struct btrfs_key key;
	struct va_format vaf;
	va_list args;

	btrfs_item_key_to_cpu(eb, &key, slot);
	va_start(args, fmt);

	vaf.fmt = fmt;
	vaf.va = &args;

946
	btrfs_crit(eb->fs_info,
Q
Qu Wenruo 已提交
947 948 949 950 951 952 953
	"corrupt %s: root=%llu block=%llu slot=%d devid=%llu %pV",
		btrfs_header_level(eb) == 0 ? "leaf" : "node",
		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
		key.objectid, &vaf);
	va_end(args);
}

954
static int check_dev_item(struct extent_buffer *leaf,
Q
Qu Wenruo 已提交
955 956 957 958
			  struct btrfs_key *key, int slot)
{
	struct btrfs_dev_item *ditem;

959
	if (unlikely(key->objectid != BTRFS_DEV_ITEMS_OBJECTID)) {
960
		dev_item_err(leaf, slot,
Q
Qu Wenruo 已提交
961 962 963 964 965
			     "invalid objectid: has=%llu expect=%llu",
			     key->objectid, BTRFS_DEV_ITEMS_OBJECTID);
		return -EUCLEAN;
	}
	ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item);
966
	if (unlikely(btrfs_device_id(leaf, ditem) != key->offset)) {
967
		dev_item_err(leaf, slot,
Q
Qu Wenruo 已提交
968 969 970 971 972 973 974 975 976 977
			     "devid mismatch: key has=%llu item has=%llu",
			     key->offset, btrfs_device_id(leaf, ditem));
		return -EUCLEAN;
	}

	/*
	 * For device total_bytes, we don't have reliable way to check it, as
	 * it can be 0 for device removal. Device size check can only be done
	 * by dev extents check.
	 */
978 979
	if (unlikely(btrfs_device_bytes_used(leaf, ditem) >
		     btrfs_device_total_bytes(leaf, ditem))) {
980
		dev_item_err(leaf, slot,
Q
Qu Wenruo 已提交
981 982 983 984 985 986 987 988 989 990 991 992
			     "invalid bytes used: have %llu expect [0, %llu]",
			     btrfs_device_bytes_used(leaf, ditem),
			     btrfs_device_total_bytes(leaf, ditem));
		return -EUCLEAN;
	}
	/*
	 * Remaining members like io_align/type/gen/dev_group aren't really
	 * utilized.  Skip them to make later usage of them easier.
	 */
	return 0;
}

993
static int check_inode_item(struct extent_buffer *leaf,
994 995
			    struct btrfs_key *key, int slot)
{
996
	struct btrfs_fs_info *fs_info = leaf->fs_info;
997 998 999 1000
	struct btrfs_inode_item *iitem;
	u64 super_gen = btrfs_super_generation(fs_info->super_copy);
	u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
	u32 mode;
1001 1002 1003
	int ret;

	ret = check_inode_key(leaf, key, slot);
1004
	if (unlikely(ret < 0))
1005
		return ret;
1006 1007 1008 1009

	iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item);

	/* Here we use super block generation + 1 to handle log tree */
1010
	if (unlikely(btrfs_inode_generation(leaf, iitem) > super_gen + 1)) {
1011
		inode_item_err(leaf, slot,
1012 1013 1014 1015 1016 1017
			"invalid inode generation: has %llu expect (0, %llu]",
			       btrfs_inode_generation(leaf, iitem),
			       super_gen + 1);
		return -EUCLEAN;
	}
	/* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */
1018
	if (unlikely(btrfs_inode_transid(leaf, iitem) > super_gen + 1)) {
1019
		inode_item_err(leaf, slot,
1020
			"invalid inode transid: has %llu expect [0, %llu]",
1021 1022 1023 1024 1025 1026 1027 1028 1029 1030
			       btrfs_inode_transid(leaf, iitem), super_gen + 1);
		return -EUCLEAN;
	}

	/*
	 * For size and nbytes it's better not to be too strict, as for dir
	 * item its size/nbytes can easily get wrong, but doesn't affect
	 * anything in the fs. So here we skip the check.
	 */
	mode = btrfs_inode_mode(leaf, iitem);
1031
	if (unlikely(mode & ~valid_mask)) {
1032
		inode_item_err(leaf, slot,
1033 1034 1035 1036 1037 1038
			       "unknown mode bit detected: 0x%x",
			       mode & ~valid_mask);
		return -EUCLEAN;
	}

	/*
1039 1040 1041
	 * S_IFMT is not bit mapped so we can't completely rely on
	 * is_power_of_2/has_single_bit_set, but it can save us from checking
	 * FIFO/CHR/DIR/REG.  Only needs to check BLK, LNK and SOCKS
1042
	 */
1043
	if (!has_single_bit_set(mode & S_IFMT)) {
1044
		if (unlikely(!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode))) {
1045
			inode_item_err(leaf, slot,
1046 1047 1048 1049 1050
			"invalid mode: has 0%o expect valid S_IF* bit(s)",
				       mode & S_IFMT);
			return -EUCLEAN;
		}
	}
1051
	if (unlikely(S_ISDIR(mode) && btrfs_inode_nlink(leaf, iitem) > 1)) {
1052
		inode_item_err(leaf, slot,
1053 1054 1055 1056
		       "invalid nlink: has %u expect no more than 1 for dir",
			btrfs_inode_nlink(leaf, iitem));
		return -EUCLEAN;
	}
1057
	if (unlikely(btrfs_inode_flags(leaf, iitem) & ~BTRFS_INODE_FLAG_MASK)) {
1058
		inode_item_err(leaf, slot,
1059 1060 1061 1062 1063 1064 1065 1066
			       "unknown flags detected: 0x%llx",
			       btrfs_inode_flags(leaf, iitem) &
			       ~BTRFS_INODE_FLAG_MASK);
		return -EUCLEAN;
	}
	return 0;
}

1067 1068 1069 1070
static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key,
			   int slot)
{
	struct btrfs_fs_info *fs_info = leaf->fs_info;
1071
	struct btrfs_root_item ri = { 0 };
1072 1073
	const u64 valid_root_flags = BTRFS_ROOT_SUBVOL_RDONLY |
				     BTRFS_ROOT_SUBVOL_DEAD;
1074
	int ret;
1075

1076
	ret = check_root_key(leaf, key, slot);
1077
	if (unlikely(ret < 0))
1078
		return ret;
1079

1080 1081 1082
	if (unlikely(btrfs_item_size_nr(leaf, slot) != sizeof(ri) &&
		     btrfs_item_size_nr(leaf, slot) !=
		     btrfs_legacy_root_item_size())) {
1083
		generic_err(leaf, slot,
1084 1085 1086
			    "invalid root item size, have %u expect %zu or %u",
			    btrfs_item_size_nr(leaf, slot), sizeof(ri),
			    btrfs_legacy_root_item_size());
1087
		return -EUCLEAN;
1088 1089
	}

1090 1091 1092 1093 1094
	/*
	 * For legacy root item, the members starting at generation_v2 will be
	 * all filled with 0.
	 * And since we allow geneartion_v2 as 0, it will still pass the check.
	 */
1095
	read_extent_buffer(leaf, &ri, btrfs_item_ptr_offset(leaf, slot),
1096
			   btrfs_item_size_nr(leaf, slot));
1097 1098

	/* Generation related */
1099 1100
	if (unlikely(btrfs_root_generation(&ri) >
		     btrfs_super_generation(fs_info->super_copy) + 1)) {
1101 1102 1103 1104 1105 1106
		generic_err(leaf, slot,
			"invalid root generation, have %llu expect (0, %llu]",
			    btrfs_root_generation(&ri),
			    btrfs_super_generation(fs_info->super_copy) + 1);
		return -EUCLEAN;
	}
1107 1108
	if (unlikely(btrfs_root_generation_v2(&ri) >
		     btrfs_super_generation(fs_info->super_copy) + 1)) {
1109 1110 1111 1112 1113 1114
		generic_err(leaf, slot,
		"invalid root v2 generation, have %llu expect (0, %llu]",
			    btrfs_root_generation_v2(&ri),
			    btrfs_super_generation(fs_info->super_copy) + 1);
		return -EUCLEAN;
	}
1115 1116
	if (unlikely(btrfs_root_last_snapshot(&ri) >
		     btrfs_super_generation(fs_info->super_copy) + 1)) {
1117 1118 1119 1120 1121 1122 1123 1124
		generic_err(leaf, slot,
		"invalid root last_snapshot, have %llu expect (0, %llu]",
			    btrfs_root_last_snapshot(&ri),
			    btrfs_super_generation(fs_info->super_copy) + 1);
		return -EUCLEAN;
	}

	/* Alignment and level check */
1125
	if (unlikely(!IS_ALIGNED(btrfs_root_bytenr(&ri), fs_info->sectorsize))) {
1126 1127 1128 1129 1130
		generic_err(leaf, slot,
		"invalid root bytenr, have %llu expect to be aligned to %u",
			    btrfs_root_bytenr(&ri), fs_info->sectorsize);
		return -EUCLEAN;
	}
1131
	if (unlikely(btrfs_root_level(&ri) >= BTRFS_MAX_LEVEL)) {
1132 1133 1134 1135 1136
		generic_err(leaf, slot,
			    "invalid root level, have %u expect [0, %u]",
			    btrfs_root_level(&ri), BTRFS_MAX_LEVEL - 1);
		return -EUCLEAN;
	}
1137
	if (unlikely(btrfs_root_drop_level(&ri) >= BTRFS_MAX_LEVEL)) {
1138 1139
		generic_err(leaf, slot,
			    "invalid root level, have %u expect [0, %u]",
1140
			    btrfs_root_drop_level(&ri), BTRFS_MAX_LEVEL - 1);
1141 1142 1143 1144
		return -EUCLEAN;
	}

	/* Flags check */
1145
	if (unlikely(btrfs_root_flags(&ri) & ~valid_root_flags)) {
1146 1147 1148 1149 1150 1151 1152 1153
		generic_err(leaf, slot,
			    "invalid root flags, have 0x%llx expect mask 0x%llx",
			    btrfs_root_flags(&ri), valid_root_flags);
		return -EUCLEAN;
	}
	return 0;
}

1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166
__printf(3,4)
__cold
static void extent_err(const struct extent_buffer *eb, int slot,
		       const char *fmt, ...)
{
	struct btrfs_key key;
	struct va_format vaf;
	va_list args;
	u64 bytenr;
	u64 len;

	btrfs_item_key_to_cpu(eb, &key, slot);
	bytenr = key.objectid;
1167 1168 1169
	if (key.type == BTRFS_METADATA_ITEM_KEY ||
	    key.type == BTRFS_TREE_BLOCK_REF_KEY ||
	    key.type == BTRFS_SHARED_BLOCK_REF_KEY)
1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198
		len = eb->fs_info->nodesize;
	else
		len = key.offset;
	va_start(args, fmt);

	vaf.fmt = fmt;
	vaf.va = &args;

	btrfs_crit(eb->fs_info,
	"corrupt %s: block=%llu slot=%d extent bytenr=%llu len=%llu %pV",
		btrfs_header_level(eb) == 0 ? "leaf" : "node",
		eb->start, slot, bytenr, len, &vaf);
	va_end(args);
}

static int check_extent_item(struct extent_buffer *leaf,
			     struct btrfs_key *key, int slot)
{
	struct btrfs_fs_info *fs_info = leaf->fs_info;
	struct btrfs_extent_item *ei;
	bool is_tree_block = false;
	unsigned long ptr;	/* Current pointer inside inline refs */
	unsigned long end;	/* Extent item end */
	const u32 item_size = btrfs_item_size_nr(leaf, slot);
	u64 flags;
	u64 generation;
	u64 total_refs;		/* Total refs in btrfs_extent_item */
	u64 inline_refs = 0;	/* found total inline refs */

1199 1200
	if (unlikely(key->type == BTRFS_METADATA_ITEM_KEY &&
		     !btrfs_fs_incompat(fs_info, SKINNY_METADATA))) {
1201 1202 1203 1204 1205
		generic_err(leaf, slot,
"invalid key type, METADATA_ITEM type invalid when SKINNY_METADATA feature disabled");
		return -EUCLEAN;
	}
	/* key->objectid is the bytenr for both key types */
1206
	if (unlikely(!IS_ALIGNED(key->objectid, fs_info->sectorsize))) {
1207 1208 1209 1210 1211 1212 1213
		generic_err(leaf, slot,
		"invalid key objectid, have %llu expect to be aligned to %u",
			   key->objectid, fs_info->sectorsize);
		return -EUCLEAN;
	}

	/* key->offset is tree level for METADATA_ITEM_KEY */
1214 1215
	if (unlikely(key->type == BTRFS_METADATA_ITEM_KEY &&
		     key->offset >= BTRFS_MAX_LEVEL)) {
1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240
		extent_err(leaf, slot,
			   "invalid tree level, have %llu expect [0, %u]",
			   key->offset, BTRFS_MAX_LEVEL - 1);
		return -EUCLEAN;
	}

	/*
	 * EXTENT/METADATA_ITEM consists of:
	 * 1) One btrfs_extent_item
	 *    Records the total refs, type and generation of the extent.
	 *
	 * 2) One btrfs_tree_block_info (for EXTENT_ITEM and tree backref only)
	 *    Records the first key and level of the tree block.
	 *
	 * 2) Zero or more btrfs_extent_inline_ref(s)
	 *    Each inline ref has one btrfs_extent_inline_ref shows:
	 *    2.1) The ref type, one of the 4
	 *         TREE_BLOCK_REF	Tree block only
	 *         SHARED_BLOCK_REF	Tree block only
	 *         EXTENT_DATA_REF	Data only
	 *         SHARED_DATA_REF	Data only
	 *    2.2) Ref type specific data
	 *         Either using btrfs_extent_inline_ref::offset, or specific
	 *         data structure.
	 */
1241
	if (unlikely(item_size < sizeof(*ei))) {
1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254
		extent_err(leaf, slot,
			   "invalid item size, have %u expect [%zu, %u)",
			   item_size, sizeof(*ei),
			   BTRFS_LEAF_DATA_SIZE(fs_info));
		return -EUCLEAN;
	}
	end = item_size + btrfs_item_ptr_offset(leaf, slot);

	/* Checks against extent_item */
	ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
	flags = btrfs_extent_flags(leaf, ei);
	total_refs = btrfs_extent_refs(leaf, ei);
	generation = btrfs_extent_generation(leaf, ei);
1255 1256
	if (unlikely(generation >
		     btrfs_super_generation(fs_info->super_copy) + 1)) {
1257 1258 1259 1260 1261 1262
		extent_err(leaf, slot,
			   "invalid generation, have %llu expect (0, %llu]",
			   generation,
			   btrfs_super_generation(fs_info->super_copy) + 1);
		return -EUCLEAN;
	}
1263 1264
	if (unlikely(!has_single_bit_set(flags & (BTRFS_EXTENT_FLAG_DATA |
						  BTRFS_EXTENT_FLAG_TREE_BLOCK)))) {
1265 1266 1267 1268 1269 1270 1271 1272
		extent_err(leaf, slot,
		"invalid extent flag, have 0x%llx expect 1 bit set in 0x%llx",
			flags, BTRFS_EXTENT_FLAG_DATA |
			BTRFS_EXTENT_FLAG_TREE_BLOCK);
		return -EUCLEAN;
	}
	is_tree_block = !!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK);
	if (is_tree_block) {
1273 1274
		if (unlikely(key->type == BTRFS_EXTENT_ITEM_KEY &&
			     key->offset != fs_info->nodesize)) {
1275 1276 1277 1278 1279 1280
			extent_err(leaf, slot,
				   "invalid extent length, have %llu expect %u",
				   key->offset, fs_info->nodesize);
			return -EUCLEAN;
		}
	} else {
1281
		if (unlikely(key->type != BTRFS_EXTENT_ITEM_KEY)) {
1282 1283 1284 1285 1286
			extent_err(leaf, slot,
			"invalid key type, have %u expect %u for data backref",
				   key->type, BTRFS_EXTENT_ITEM_KEY);
			return -EUCLEAN;
		}
1287
		if (unlikely(!IS_ALIGNED(key->offset, fs_info->sectorsize))) {
1288 1289 1290 1291 1292
			extent_err(leaf, slot,
			"invalid extent length, have %llu expect aligned to %u",
				   key->offset, fs_info->sectorsize);
			return -EUCLEAN;
		}
1293 1294 1295 1296 1297
		if (unlikely(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
			extent_err(leaf, slot,
			"invalid extent flag, data has full backref set");
			return -EUCLEAN;
		}
1298 1299 1300 1301 1302 1303 1304 1305
	}
	ptr = (unsigned long)(struct btrfs_extent_item *)(ei + 1);

	/* Check the special case of btrfs_tree_block_info */
	if (is_tree_block && key->type != BTRFS_METADATA_ITEM_KEY) {
		struct btrfs_tree_block_info *info;

		info = (struct btrfs_tree_block_info *)ptr;
1306
		if (unlikely(btrfs_tree_block_level(leaf, info) >= BTRFS_MAX_LEVEL)) {
1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324
			extent_err(leaf, slot,
			"invalid tree block info level, have %u expect [0, %u]",
				   btrfs_tree_block_level(leaf, info),
				   BTRFS_MAX_LEVEL - 1);
			return -EUCLEAN;
		}
		ptr = (unsigned long)(struct btrfs_tree_block_info *)(info + 1);
	}

	/* Check inline refs */
	while (ptr < end) {
		struct btrfs_extent_inline_ref *iref;
		struct btrfs_extent_data_ref *dref;
		struct btrfs_shared_data_ref *sref;
		u64 dref_offset;
		u64 inline_offset;
		u8 inline_type;

1325
		if (unlikely(ptr + sizeof(*iref) > end)) {
1326 1327 1328 1329 1330 1331 1332 1333
			extent_err(leaf, slot,
"inline ref item overflows extent item, ptr %lu iref size %zu end %lu",
				   ptr, sizeof(*iref), end);
			return -EUCLEAN;
		}
		iref = (struct btrfs_extent_inline_ref *)ptr;
		inline_type = btrfs_extent_inline_ref_type(leaf, iref);
		inline_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1334
		if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) {
1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347
			extent_err(leaf, slot,
"inline ref item overflows extent item, ptr %lu iref size %u end %lu",
				   ptr, inline_type, end);
			return -EUCLEAN;
		}

		switch (inline_type) {
		/* inline_offset is subvolid of the owner, no need to check */
		case BTRFS_TREE_BLOCK_REF_KEY:
			inline_refs++;
			break;
		/* Contains parent bytenr */
		case BTRFS_SHARED_BLOCK_REF_KEY:
1348 1349
			if (unlikely(!IS_ALIGNED(inline_offset,
						 fs_info->sectorsize))) {
1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363
				extent_err(leaf, slot,
		"invalid tree parent bytenr, have %llu expect aligned to %u",
					   inline_offset, fs_info->sectorsize);
				return -EUCLEAN;
			}
			inline_refs++;
			break;
		/*
		 * Contains owner subvolid, owner key objectid, adjusted offset.
		 * The only obvious corruption can happen in that offset.
		 */
		case BTRFS_EXTENT_DATA_REF_KEY:
			dref = (struct btrfs_extent_data_ref *)(&iref->offset);
			dref_offset = btrfs_extent_data_ref_offset(leaf, dref);
1364 1365
			if (unlikely(!IS_ALIGNED(dref_offset,
						 fs_info->sectorsize))) {
1366 1367 1368 1369 1370 1371 1372 1373 1374 1375
				extent_err(leaf, slot,
		"invalid data ref offset, have %llu expect aligned to %u",
					   dref_offset, fs_info->sectorsize);
				return -EUCLEAN;
			}
			inline_refs += btrfs_extent_data_ref_count(leaf, dref);
			break;
		/* Contains parent bytenr and ref count */
		case BTRFS_SHARED_DATA_REF_KEY:
			sref = (struct btrfs_shared_data_ref *)(iref + 1);
1376 1377
			if (unlikely(!IS_ALIGNED(inline_offset,
						 fs_info->sectorsize))) {
1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392
				extent_err(leaf, slot,
		"invalid data parent bytenr, have %llu expect aligned to %u",
					   inline_offset, fs_info->sectorsize);
				return -EUCLEAN;
			}
			inline_refs += btrfs_shared_data_ref_count(leaf, sref);
			break;
		default:
			extent_err(leaf, slot, "unknown inline ref type: %u",
				   inline_type);
			return -EUCLEAN;
		}
		ptr += btrfs_extent_inline_ref_size(inline_type);
	}
	/* No padding is allowed */
1393
	if (unlikely(ptr != end)) {
1394 1395 1396 1397 1398 1399
		extent_err(leaf, slot,
			   "invalid extent item size, padding bytes found");
		return -EUCLEAN;
	}

	/* Finally, check the inline refs against total refs */
1400
	if (unlikely(inline_refs > total_refs)) {
1401 1402 1403 1404 1405 1406 1407 1408
		extent_err(leaf, slot,
			"invalid extent refs, have %llu expect >= inline %llu",
			   total_refs, inline_refs);
		return -EUCLEAN;
	}
	return 0;
}

1409 1410 1411 1412 1413 1414 1415 1416
static int check_simple_keyed_refs(struct extent_buffer *leaf,
				   struct btrfs_key *key, int slot)
{
	u32 expect_item_size = 0;

	if (key->type == BTRFS_SHARED_DATA_REF_KEY)
		expect_item_size = sizeof(struct btrfs_shared_data_ref);

1417
	if (unlikely(btrfs_item_size_nr(leaf, slot) != expect_item_size)) {
1418 1419 1420 1421 1422 1423
		generic_err(leaf, slot,
		"invalid item size, have %u expect %u for key type %u",
			    btrfs_item_size_nr(leaf, slot),
			    expect_item_size, key->type);
		return -EUCLEAN;
	}
1424
	if (unlikely(!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize))) {
1425 1426 1427 1428 1429
		generic_err(leaf, slot,
"invalid key objectid for shared block ref, have %llu expect aligned to %u",
			    key->objectid, leaf->fs_info->sectorsize);
		return -EUCLEAN;
	}
1430 1431
	if (unlikely(key->type != BTRFS_TREE_BLOCK_REF_KEY &&
		     !IS_ALIGNED(key->offset, leaf->fs_info->sectorsize))) {
1432 1433 1434 1435 1436 1437 1438 1439
		extent_err(leaf, slot,
		"invalid tree parent bytenr, have %llu expect aligned to %u",
			   key->offset, leaf->fs_info->sectorsize);
		return -EUCLEAN;
	}
	return 0;
}

1440 1441 1442 1443 1444 1445 1446
static int check_extent_data_ref(struct extent_buffer *leaf,
				 struct btrfs_key *key, int slot)
{
	struct btrfs_extent_data_ref *dref;
	unsigned long ptr = btrfs_item_ptr_offset(leaf, slot);
	const unsigned long end = ptr + btrfs_item_size_nr(leaf, slot);

1447
	if (unlikely(btrfs_item_size_nr(leaf, slot) % sizeof(*dref) != 0)) {
1448 1449 1450 1451
		generic_err(leaf, slot,
	"invalid item size, have %u expect aligned to %zu for key type %u",
			    btrfs_item_size_nr(leaf, slot),
			    sizeof(*dref), key->type);
1452
		return -EUCLEAN;
1453
	}
1454
	if (unlikely(!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize))) {
1455 1456 1457 1458 1459 1460 1461 1462
		generic_err(leaf, slot,
"invalid key objectid for shared block ref, have %llu expect aligned to %u",
			    key->objectid, leaf->fs_info->sectorsize);
		return -EUCLEAN;
	}
	for (; ptr < end; ptr += sizeof(*dref)) {
		u64 offset;

1463 1464 1465 1466
		/*
		 * We cannot check the extent_data_ref hash due to possible
		 * overflow from the leaf due to hash collisions.
		 */
1467 1468
		dref = (struct btrfs_extent_data_ref *)ptr;
		offset = btrfs_extent_data_ref_offset(leaf, dref);
1469
		if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) {
1470 1471 1472
			extent_err(leaf, slot,
	"invalid extent data backref offset, have %llu expect aligned to %u",
				   offset, leaf->fs_info->sectorsize);
1473
			return -EUCLEAN;
1474 1475 1476 1477 1478
		}
	}
	return 0;
}

1479 1480
#define inode_ref_err(eb, slot, fmt, args...)			\
	inode_item_err(eb, slot, fmt, ##args)
1481 1482 1483 1484 1485 1486 1487 1488
static int check_inode_ref(struct extent_buffer *leaf,
			   struct btrfs_key *key, struct btrfs_key *prev_key,
			   int slot)
{
	struct btrfs_inode_ref *iref;
	unsigned long ptr;
	unsigned long end;

1489
	if (unlikely(!check_prev_ino(leaf, key, slot, prev_key)))
1490
		return -EUCLEAN;
1491
	/* namelen can't be 0, so item_size == sizeof() is also invalid */
1492
	if (unlikely(btrfs_item_size_nr(leaf, slot) <= sizeof(*iref))) {
1493
		inode_ref_err(leaf, slot,
1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504
			"invalid item size, have %u expect (%zu, %u)",
			btrfs_item_size_nr(leaf, slot),
			sizeof(*iref), BTRFS_LEAF_DATA_SIZE(leaf->fs_info));
		return -EUCLEAN;
	}

	ptr = btrfs_item_ptr_offset(leaf, slot);
	end = ptr + btrfs_item_size_nr(leaf, slot);
	while (ptr < end) {
		u16 namelen;

1505
		if (unlikely(ptr + sizeof(iref) > end)) {
1506
			inode_ref_err(leaf, slot,
1507 1508 1509 1510 1511 1512 1513
			"inode ref overflow, ptr %lu end %lu inode_ref_size %zu",
				ptr, end, sizeof(iref));
			return -EUCLEAN;
		}

		iref = (struct btrfs_inode_ref *)ptr;
		namelen = btrfs_inode_ref_name_len(leaf, iref);
1514
		if (unlikely(ptr + sizeof(*iref) + namelen > end)) {
1515
			inode_ref_err(leaf, slot,
1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530
				"inode ref overflow, ptr %lu end %lu namelen %u",
				ptr, end, namelen);
			return -EUCLEAN;
		}

		/*
		 * NOTE: In theory we should record all found index numbers
		 * to find any duplicated indexes, but that will be too time
		 * consuming for inodes with too many hard links.
		 */
		ptr += sizeof(*iref) + namelen;
	}
	return 0;
}

1531 1532 1533
/*
 * Common point to switch the item-specific validation.
 */
1534
static int check_leaf_item(struct extent_buffer *leaf,
1535 1536
			   struct btrfs_key *key, int slot,
			   struct btrfs_key *prev_key)
1537 1538
{
	int ret = 0;
1539
	struct btrfs_chunk *chunk;
1540 1541 1542

	switch (key->type) {
	case BTRFS_EXTENT_DATA_KEY:
1543
		ret = check_extent_data_item(leaf, key, slot, prev_key);
1544 1545
		break;
	case BTRFS_EXTENT_CSUM_KEY:
1546
		ret = check_csum_item(leaf, key, slot, prev_key);
1547
		break;
1548 1549 1550
	case BTRFS_DIR_ITEM_KEY:
	case BTRFS_DIR_INDEX_KEY:
	case BTRFS_XATTR_ITEM_KEY:
1551
		ret = check_dir_item(leaf, key, prev_key, slot);
1552
		break;
1553 1554 1555
	case BTRFS_INODE_REF_KEY:
		ret = check_inode_ref(leaf, key, prev_key, slot);
		break;
1556
	case BTRFS_BLOCK_GROUP_ITEM_KEY:
1557
		ret = check_block_group_item(leaf, key, slot);
1558
		break;
1559 1560
	case BTRFS_CHUNK_ITEM_KEY:
		chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
1561
		ret = check_leaf_chunk_item(leaf, chunk, key, slot);
1562
		break;
Q
Qu Wenruo 已提交
1563
	case BTRFS_DEV_ITEM_KEY:
1564
		ret = check_dev_item(leaf, key, slot);
Q
Qu Wenruo 已提交
1565
		break;
1566
	case BTRFS_INODE_ITEM_KEY:
1567
		ret = check_inode_item(leaf, key, slot);
1568
		break;
1569 1570 1571
	case BTRFS_ROOT_ITEM_KEY:
		ret = check_root_item(leaf, key, slot);
		break;
1572 1573 1574 1575
	case BTRFS_EXTENT_ITEM_KEY:
	case BTRFS_METADATA_ITEM_KEY:
		ret = check_extent_item(leaf, key, slot);
		break;
1576 1577 1578 1579 1580
	case BTRFS_TREE_BLOCK_REF_KEY:
	case BTRFS_SHARED_DATA_REF_KEY:
	case BTRFS_SHARED_BLOCK_REF_KEY:
		ret = check_simple_keyed_refs(leaf, key, slot);
		break;
1581 1582 1583
	case BTRFS_EXTENT_DATA_REF_KEY:
		ret = check_extent_data_ref(leaf, key, slot);
		break;
1584 1585 1586 1587
	}
	return ret;
}

1588
static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
1589
{
1590
	struct btrfs_fs_info *fs_info = leaf->fs_info;
1591 1592 1593 1594 1595 1596
	/* No valid key type is 0, so all key should be larger than this key */
	struct btrfs_key prev_key = {0, 0, 0};
	struct btrfs_key key;
	u32 nritems = btrfs_header_nritems(leaf);
	int slot;

1597
	if (unlikely(btrfs_header_level(leaf) != 0)) {
1598
		generic_err(leaf, 0,
1599 1600 1601 1602 1603
			"invalid level for leaf, have %d expect 0",
			btrfs_header_level(leaf));
		return -EUCLEAN;
	}

1604 1605 1606 1607 1608 1609 1610 1611 1612
	/*
	 * Extent buffers from a relocation tree have a owner field that
	 * corresponds to the subvolume tree they are based on. So just from an
	 * extent buffer alone we can not find out what is the id of the
	 * corresponding subvolume tree, so we can not figure out if the extent
	 * buffer corresponds to the root of the relocation tree or not. So
	 * skip this check for relocation trees.
	 */
	if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
1613
		u64 owner = btrfs_header_owner(leaf);
1614

1615
		/* These trees must never be empty */
1616 1617 1618 1619 1620 1621
		if (unlikely(owner == BTRFS_ROOT_TREE_OBJECTID ||
			     owner == BTRFS_CHUNK_TREE_OBJECTID ||
			     owner == BTRFS_EXTENT_TREE_OBJECTID ||
			     owner == BTRFS_DEV_TREE_OBJECTID ||
			     owner == BTRFS_FS_TREE_OBJECTID ||
			     owner == BTRFS_DATA_RELOC_TREE_OBJECTID)) {
1622
			generic_err(leaf, 0,
1623 1624 1625 1626
			"invalid root, root %llu must never be empty",
				    owner);
			return -EUCLEAN;
		}
1627
		/* Unknown tree */
1628
		if (unlikely(owner == 0)) {
1629 1630 1631 1632
			generic_err(leaf, 0,
				"invalid owner, root 0 is not defined");
			return -EUCLEAN;
		}
1633 1634 1635
		return 0;
	}

1636
	if (unlikely(nritems == 0))
1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656
		return 0;

	/*
	 * Check the following things to make sure this is a good leaf, and
	 * leaf users won't need to bother with similar sanity checks:
	 *
	 * 1) key ordering
	 * 2) item offset and size
	 *    No overlap, no hole, all inside the leaf.
	 * 3) item content
	 *    If possible, do comprehensive sanity check.
	 *    NOTE: All checks must only rely on the item data itself.
	 */
	for (slot = 0; slot < nritems; slot++) {
		u32 item_end_expected;
		int ret;

		btrfs_item_key_to_cpu(leaf, &key, slot);

		/* Make sure the keys are in the right order */
1657
		if (unlikely(btrfs_comp_cpu_keys(&prev_key, &key) >= 0)) {
1658
			generic_err(leaf, slot,
1659 1660 1661 1662
	"bad key order, prev (%llu %u %llu) current (%llu %u %llu)",
				prev_key.objectid, prev_key.type,
				prev_key.offset, key.objectid, key.type,
				key.offset);
1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675
			return -EUCLEAN;
		}

		/*
		 * Make sure the offset and ends are right, remember that the
		 * item data starts at the end of the leaf and grows towards the
		 * front.
		 */
		if (slot == 0)
			item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info);
		else
			item_end_expected = btrfs_item_offset_nr(leaf,
								 slot - 1);
1676
		if (unlikely(btrfs_item_end_nr(leaf, slot) != item_end_expected)) {
1677
			generic_err(leaf, slot,
1678 1679 1680
				"unexpected item end, have %u expect %u",
				btrfs_item_end_nr(leaf, slot),
				item_end_expected);
1681 1682 1683 1684 1685 1686 1687 1688
			return -EUCLEAN;
		}

		/*
		 * Check to make sure that we don't point outside of the leaf,
		 * just in case all the items are consistent to each other, but
		 * all point outside of the leaf.
		 */
1689 1690
		if (unlikely(btrfs_item_end_nr(leaf, slot) >
			     BTRFS_LEAF_DATA_SIZE(fs_info))) {
1691
			generic_err(leaf, slot,
1692 1693 1694
			"slot end outside of leaf, have %u expect range [0, %u]",
				btrfs_item_end_nr(leaf, slot),
				BTRFS_LEAF_DATA_SIZE(fs_info));
1695 1696 1697 1698
			return -EUCLEAN;
		}

		/* Also check if the item pointer overlaps with btrfs item. */
1699 1700
		if (unlikely(btrfs_item_ptr_offset(leaf, slot) <
			     btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item))) {
1701
			generic_err(leaf, slot,
1702 1703 1704 1705
		"slot overlaps with its data, item end %lu data start %lu",
				btrfs_item_nr_offset(slot) +
				sizeof(struct btrfs_item),
				btrfs_item_ptr_offset(leaf, slot));
1706 1707 1708
			return -EUCLEAN;
		}

1709 1710 1711 1712 1713
		if (check_item_data) {
			/*
			 * Check if the item size and content meet other
			 * criteria
			 */
1714
			ret = check_leaf_item(leaf, &key, slot, &prev_key);
1715
			if (unlikely(ret < 0))
1716 1717
				return ret;
		}
1718 1719 1720 1721 1722 1723 1724 1725 1726

		prev_key.objectid = key.objectid;
		prev_key.type = key.type;
		prev_key.offset = key.offset;
	}

	return 0;
}

1727
int btrfs_check_leaf_full(struct extent_buffer *leaf)
1728
{
1729
	return check_leaf(leaf, true);
1730
}
1731
ALLOW_ERROR_INJECTION(btrfs_check_leaf_full, ERRNO);
1732

1733
int btrfs_check_leaf_relaxed(struct extent_buffer *leaf)
1734
{
1735
	return check_leaf(leaf, false);
1736 1737
}

1738
int btrfs_check_node(struct extent_buffer *node)
1739
{
1740
	struct btrfs_fs_info *fs_info = node->fs_info;
1741 1742 1743
	unsigned long nr = btrfs_header_nritems(node);
	struct btrfs_key key, next_key;
	int slot;
1744
	int level = btrfs_header_level(node);
1745 1746 1747
	u64 bytenr;
	int ret = 0;

1748
	if (unlikely(level <= 0 || level >= BTRFS_MAX_LEVEL)) {
1749
		generic_err(node, 0,
1750 1751 1752 1753
			"invalid level for node, have %d expect [1, %d]",
			level, BTRFS_MAX_LEVEL - 1);
		return -EUCLEAN;
	}
1754
	if (unlikely(nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(fs_info))) {
1755
		btrfs_crit(fs_info,
1756
"corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]",
1757
			   btrfs_header_owner(node), node->start,
1758
			   nr == 0 ? "small" : "large", nr,
1759
			   BTRFS_NODEPTRS_PER_BLOCK(fs_info));
1760
		return -EUCLEAN;
1761 1762 1763 1764 1765 1766 1767
	}

	for (slot = 0; slot < nr - 1; slot++) {
		bytenr = btrfs_node_blockptr(node, slot);
		btrfs_node_key_to_cpu(node, &key, slot);
		btrfs_node_key_to_cpu(node, &next_key, slot + 1);

1768
		if (unlikely(!bytenr)) {
1769
			generic_err(node, slot,
1770 1771 1772 1773
				"invalid NULL node pointer");
			ret = -EUCLEAN;
			goto out;
		}
1774
		if (unlikely(!IS_ALIGNED(bytenr, fs_info->sectorsize))) {
1775
			generic_err(node, slot,
1776
			"unaligned pointer, have %llu should be aligned to %u",
1777
				bytenr, fs_info->sectorsize);
1778
			ret = -EUCLEAN;
1779 1780 1781
			goto out;
		}

1782
		if (unlikely(btrfs_comp_cpu_keys(&key, &next_key) >= 0)) {
1783
			generic_err(node, slot,
1784 1785 1786 1787 1788
	"bad key order, current (%llu %u %llu) next (%llu %u %llu)",
				key.objectid, key.type, key.offset,
				next_key.objectid, next_key.type,
				next_key.offset);
			ret = -EUCLEAN;
1789 1790 1791 1792 1793 1794
			goto out;
		}
	}
out:
	return ret;
}
1795
ALLOW_ERROR_INJECTION(btrfs_check_node, ERRNO);