tree-checker.c 43.5 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * Copyright (C) Qu Wenruo 2017.  All rights reserved.
 */

/*
 * The module is used to catch unexpected/corrupted tree block data.
 * Such behavior can be caused either by a fuzzed image or bugs.
 *
 * The objective is to do leaf/node validation checks when tree block is read
 * from disk, and check *every* possible member, so other code won't
 * need to checking them again.
 *
 * Due to the potential and unwanted damage, every checker needs to be
 * carefully reviewed otherwise so it does not prevent mount of valid images.
 */

18 19 20
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/error-injection.h>
21 22 23 24
#include "ctree.h"
#include "tree-checker.h"
#include "disk-io.h"
#include "compression.h"
25
#include "volumes.h"
26

27 28 29 30 31 32
/*
 * Error message should follow the following format:
 * corrupt <type>: <identifier>, <reason>[, <bad_value>]
 *
 * @type:	leaf or node
 * @identifier:	the necessary info to locate the leaf/node.
33
 * 		It's recommended to decode key.objecitd/offset if it's
34 35
 * 		meaningful.
 * @reason:	describe the error
36
 * @bad_value:	optional, it's recommended to output bad value and its
37 38 39 40 41 42 43 44 45 46
 *		expected value (range).
 *
 * Since comma is used to separate the components, only space is allowed
 * inside each component.
 */

/*
 * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
 * Allows callers to customize the output.
 */
47
__printf(3, 4)
48
__cold
49
static void generic_err(const struct extent_buffer *eb, int slot,
50 51
			const char *fmt, ...)
{
52
	const struct btrfs_fs_info *fs_info = eb->fs_info;
53 54 55 56 57 58 59 60
	struct va_format vaf;
	va_list args;

	va_start(args, fmt);

	vaf.fmt = fmt;
	vaf.va = &args;

61
	btrfs_crit(fs_info,
62 63
		"corrupt %s: root=%llu block=%llu slot=%d, %pV",
		btrfs_header_level(eb) == 0 ? "leaf" : "node",
64
		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, &vaf);
65 66 67
	va_end(args);
}

68 69 70 71
/*
 * Customized reporter for extent data item, since its key objectid and
 * offset has its own meaning.
 */
72
__printf(3, 4)
73
__cold
74
static void file_extent_err(const struct extent_buffer *eb, int slot,
75 76
			    const char *fmt, ...)
{
77
	const struct btrfs_fs_info *fs_info = eb->fs_info;
78 79 80 81 82 83 84 85 86 87
	struct btrfs_key key;
	struct va_format vaf;
	va_list args;

	btrfs_item_key_to_cpu(eb, &key, slot);
	va_start(args, fmt);

	vaf.fmt = fmt;
	vaf.va = &args;

88
	btrfs_crit(fs_info,
89
	"corrupt %s: root=%llu block=%llu slot=%d ino=%llu file_offset=%llu, %pV",
90 91 92
		btrfs_header_level(eb) == 0 ? "leaf" : "node",
		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
		key.objectid, key.offset, &vaf);
93 94 95 96 97 98 99
	va_end(args);
}

/*
 * Return 0 if the btrfs_file_extent_##name is aligned to @alignment
 * Else return 1
 */
100
#define CHECK_FE_ALIGNED(leaf, slot, fi, name, alignment)		      \
101 102
({									      \
	if (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))) \
103
		file_extent_err((leaf), (slot),				      \
104 105 106 107 108 109
	"invalid %s for file extent, have %llu, should be aligned to %u",     \
			(#name), btrfs_file_extent_##name((leaf), (fi)),      \
			(alignment));					      \
	(!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment)));   \
})

110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
static u64 file_extent_end(struct extent_buffer *leaf,
			   struct btrfs_key *key,
			   struct btrfs_file_extent_item *extent)
{
	u64 end;
	u64 len;

	if (btrfs_file_extent_type(leaf, extent) == BTRFS_FILE_EXTENT_INLINE) {
		len = btrfs_file_extent_ram_bytes(leaf, extent);
		end = ALIGN(key->offset + len, leaf->fs_info->sectorsize);
	} else {
		len = btrfs_file_extent_num_bytes(leaf, extent);
		end = key->offset + len;
	}
	return end;
}

127
static int check_extent_data_item(struct extent_buffer *leaf,
128 129
				  struct btrfs_key *key, int slot,
				  struct btrfs_key *prev_key)
130
{
131
	struct btrfs_fs_info *fs_info = leaf->fs_info;
132
	struct btrfs_file_extent_item *fi;
133
	u32 sectorsize = fs_info->sectorsize;
134
	u32 item_size = btrfs_item_size_nr(leaf, slot);
135
	u64 extent_end;
136 137

	if (!IS_ALIGNED(key->offset, sectorsize)) {
138
		file_extent_err(leaf, slot,
139 140
"unaligned file_offset for file extent, have %llu should be aligned to %u",
			key->offset, sectorsize);
141 142 143 144 145 146
		return -EUCLEAN;
	}

	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);

	if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
147
		file_extent_err(leaf, slot,
148 149 150
		"invalid type for file extent, have %u expect range [0, %u]",
			btrfs_file_extent_type(leaf, fi),
			BTRFS_FILE_EXTENT_TYPES);
151 152 153 154
		return -EUCLEAN;
	}

	/*
155
	 * Support for new compression/encryption must introduce incompat flag,
156 157 158
	 * and must be caught in open_ctree().
	 */
	if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
159
		file_extent_err(leaf, slot,
160 161 162
	"invalid compression for file extent, have %u expect range [0, %u]",
			btrfs_file_extent_compression(leaf, fi),
			BTRFS_COMPRESS_TYPES);
163 164 165
		return -EUCLEAN;
	}
	if (btrfs_file_extent_encryption(leaf, fi)) {
166
		file_extent_err(leaf, slot,
167 168
			"invalid encryption for file extent, have %u expect 0",
			btrfs_file_extent_encryption(leaf, fi));
169 170 171 172 173
		return -EUCLEAN;
	}
	if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
		/* Inline extent must have 0 as key offset */
		if (key->offset) {
174
			file_extent_err(leaf, slot,
175 176
		"invalid file_offset for inline file extent, have %llu expect 0",
				key->offset);
177 178 179 180 181 182 183 184 185 186 187
			return -EUCLEAN;
		}

		/* Compressed inline extent has no on-disk size, skip it */
		if (btrfs_file_extent_compression(leaf, fi) !=
		    BTRFS_COMPRESS_NONE)
			return 0;

		/* Uncompressed inline extent size must match item size */
		if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
		    btrfs_file_extent_ram_bytes(leaf, fi)) {
188
			file_extent_err(leaf, slot,
189 190 191
	"invalid ram_bytes for uncompressed inline extent, have %u expect %llu",
				item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START +
				btrfs_file_extent_ram_bytes(leaf, fi));
192 193 194 195 196 197 198
			return -EUCLEAN;
		}
		return 0;
	}

	/* Regular or preallocated extent has fixed item size */
	if (item_size != sizeof(*fi)) {
199
		file_extent_err(leaf, slot,
200
	"invalid item size for reg/prealloc file extent, have %u expect %zu",
201
			item_size, sizeof(*fi));
202 203
		return -EUCLEAN;
	}
204 205 206 207 208
	if (CHECK_FE_ALIGNED(leaf, slot, fi, ram_bytes, sectorsize) ||
	    CHECK_FE_ALIGNED(leaf, slot, fi, disk_bytenr, sectorsize) ||
	    CHECK_FE_ALIGNED(leaf, slot, fi, disk_num_bytes, sectorsize) ||
	    CHECK_FE_ALIGNED(leaf, slot, fi, offset, sectorsize) ||
	    CHECK_FE_ALIGNED(leaf, slot, fi, num_bytes, sectorsize))
209
		return -EUCLEAN;
210

211 212 213 214 215 216 217 218 219 220
	/* Catch extent end overflow */
	if (check_add_overflow(btrfs_file_extent_num_bytes(leaf, fi),
			       key->offset, &extent_end)) {
		file_extent_err(leaf, slot,
	"extent end overflow, have file offset %llu extent num bytes %llu",
				key->offset,
				btrfs_file_extent_num_bytes(leaf, fi));
		return -EUCLEAN;
	}

221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
	/*
	 * Check that no two consecutive file extent items, in the same leaf,
	 * present ranges that overlap each other.
	 */
	if (slot > 0 &&
	    prev_key->objectid == key->objectid &&
	    prev_key->type == BTRFS_EXTENT_DATA_KEY) {
		struct btrfs_file_extent_item *prev_fi;
		u64 prev_end;

		prev_fi = btrfs_item_ptr(leaf, slot - 1,
					 struct btrfs_file_extent_item);
		prev_end = file_extent_end(leaf, prev_key, prev_fi);
		if (prev_end > key->offset) {
			file_extent_err(leaf, slot - 1,
"file extent end range (%llu) goes beyond start offset (%llu) of the next file extent",
					prev_end, key->offset);
			return -EUCLEAN;
		}
	}

242 243 244
	return 0;
}

245
static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
246
			   int slot)
247
{
248
	struct btrfs_fs_info *fs_info = leaf->fs_info;
249 250
	u32 sectorsize = fs_info->sectorsize;
	u32 csumsize = btrfs_super_csum_size(fs_info->super_copy);
251 252

	if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
253
		generic_err(leaf, slot,
254 255
		"invalid key objectid for csum item, have %llu expect %llu",
			key->objectid, BTRFS_EXTENT_CSUM_OBJECTID);
256 257 258
		return -EUCLEAN;
	}
	if (!IS_ALIGNED(key->offset, sectorsize)) {
259
		generic_err(leaf, slot,
260 261
	"unaligned key offset for csum item, have %llu should be aligned to %u",
			key->offset, sectorsize);
262 263 264
		return -EUCLEAN;
	}
	if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
265
		generic_err(leaf, slot,
266 267
	"unaligned item size for csum item, have %u should be aligned to %u",
			btrfs_item_size_nr(leaf, slot), csumsize);
268 269 270 271 272
		return -EUCLEAN;
	}
	return 0;
}

273 274 275 276
/*
 * Customized reported for dir_item, only important new info is key->objectid,
 * which represents inode number
 */
277
__printf(3, 4)
278
__cold
279
static void dir_item_err(const struct extent_buffer *eb, int slot,
280 281
			 const char *fmt, ...)
{
282
	const struct btrfs_fs_info *fs_info = eb->fs_info;
283 284 285 286 287 288 289 290 291 292
	struct btrfs_key key;
	struct va_format vaf;
	va_list args;

	btrfs_item_key_to_cpu(eb, &key, slot);
	va_start(args, fmt);

	vaf.fmt = fmt;
	vaf.va = &args;

293
	btrfs_crit(fs_info,
294
	"corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
295 296 297
		btrfs_header_level(eb) == 0 ? "leaf" : "node",
		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
		key.objectid, &vaf);
298 299 300
	va_end(args);
}

301
static int check_dir_item(struct extent_buffer *leaf,
302 303
			  struct btrfs_key *key, int slot)
{
304
	struct btrfs_fs_info *fs_info = leaf->fs_info;
305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
	struct btrfs_dir_item *di;
	u32 item_size = btrfs_item_size_nr(leaf, slot);
	u32 cur = 0;

	di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
	while (cur < item_size) {
		u32 name_len;
		u32 data_len;
		u32 max_name_len;
		u32 total_size;
		u32 name_hash;
		u8 dir_type;

		/* header itself should not cross item boundary */
		if (cur + sizeof(*di) > item_size) {
320
			dir_item_err(leaf, slot,
321
		"dir item header crosses item boundary, have %zu boundary %u",
322 323 324 325 326 327 328
				cur + sizeof(*di), item_size);
			return -EUCLEAN;
		}

		/* dir type check */
		dir_type = btrfs_dir_type(leaf, di);
		if (dir_type >= BTRFS_FT_MAX) {
329
			dir_item_err(leaf, slot,
330 331 332 333 334 335 336
			"invalid dir item type, have %u expect [0, %u)",
				dir_type, BTRFS_FT_MAX);
			return -EUCLEAN;
		}

		if (key->type == BTRFS_XATTR_ITEM_KEY &&
		    dir_type != BTRFS_FT_XATTR) {
337
			dir_item_err(leaf, slot,
338 339 340 341 342 343
		"invalid dir item type for XATTR key, have %u expect %u",
				dir_type, BTRFS_FT_XATTR);
			return -EUCLEAN;
		}
		if (dir_type == BTRFS_FT_XATTR &&
		    key->type != BTRFS_XATTR_ITEM_KEY) {
344
			dir_item_err(leaf, slot,
345 346 347 348 349 350 351 352 353 354 355 356
			"xattr dir type found for non-XATTR key");
			return -EUCLEAN;
		}
		if (dir_type == BTRFS_FT_XATTR)
			max_name_len = XATTR_NAME_MAX;
		else
			max_name_len = BTRFS_NAME_LEN;

		/* Name/data length check */
		name_len = btrfs_dir_name_len(leaf, di);
		data_len = btrfs_dir_data_len(leaf, di);
		if (name_len > max_name_len) {
357
			dir_item_err(leaf, slot,
358 359 360 361
			"dir item name len too long, have %u max %u",
				name_len, max_name_len);
			return -EUCLEAN;
		}
362
		if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(fs_info)) {
363
			dir_item_err(leaf, slot,
364 365
			"dir item name and data len too long, have %u max %u",
				name_len + data_len,
366
				BTRFS_MAX_XATTR_SIZE(fs_info));
367 368 369 370
			return -EUCLEAN;
		}

		if (data_len && dir_type != BTRFS_FT_XATTR) {
371
			dir_item_err(leaf, slot,
372 373 374 375 376 377 378 379 380
			"dir item with invalid data len, have %u expect 0",
				data_len);
			return -EUCLEAN;
		}

		total_size = sizeof(*di) + name_len + data_len;

		/* header and name/data should not cross item boundary */
		if (cur + total_size > item_size) {
381
			dir_item_err(leaf, slot,
382 383 384 385 386 387 388 389 390 391 392
		"dir item data crosses item boundary, have %u boundary %u",
				cur + total_size, item_size);
			return -EUCLEAN;
		}

		/*
		 * Special check for XATTR/DIR_ITEM, as key->offset is name
		 * hash, should match its name
		 */
		if (key->type == BTRFS_DIR_ITEM_KEY ||
		    key->type == BTRFS_XATTR_ITEM_KEY) {
393 394
			char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];

395 396 397 398
			read_extent_buffer(leaf, namebuf,
					(unsigned long)(di + 1), name_len);
			name_hash = btrfs_name_hash(namebuf, name_len);
			if (key->offset != name_hash) {
399
				dir_item_err(leaf, slot,
400 401 402 403 404 405 406 407 408 409 410
		"name hash mismatch with key, have 0x%016x expect 0x%016llx",
					name_hash, key->offset);
				return -EUCLEAN;
			}
		}
		cur += total_size;
		di = (struct btrfs_dir_item *)((void *)di + total_size);
	}
	return 0;
}

411
__printf(3, 4)
412
__cold
413
static void block_group_err(const struct extent_buffer *eb, int slot,
414 415
			    const char *fmt, ...)
{
416
	const struct btrfs_fs_info *fs_info = eb->fs_info;
417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434
	struct btrfs_key key;
	struct va_format vaf;
	va_list args;

	btrfs_item_key_to_cpu(eb, &key, slot);
	va_start(args, fmt);

	vaf.fmt = fmt;
	vaf.va = &args;

	btrfs_crit(fs_info,
	"corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
		btrfs_header_level(eb) == 0 ? "leaf" : "node",
		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
		key.objectid, key.offset, &vaf);
	va_end(args);
}

435
static int check_block_group_item(struct extent_buffer *leaf,
436 437 438 439 440 441 442 443 444
				  struct btrfs_key *key, int slot)
{
	struct btrfs_block_group_item bgi;
	u32 item_size = btrfs_item_size_nr(leaf, slot);
	u64 flags;
	u64 type;

	/*
	 * Here we don't really care about alignment since extent allocator can
445
	 * handle it.  We care more about the size.
446
	 */
447
	if (key->offset == 0) {
448
		block_group_err(leaf, slot,
449
				"invalid block group size 0");
450 451 452 453
		return -EUCLEAN;
	}

	if (item_size != sizeof(bgi)) {
454
		block_group_err(leaf, slot,
455 456 457 458 459 460 461 462 463
			"invalid item size, have %u expect %zu",
				item_size, sizeof(bgi));
		return -EUCLEAN;
	}

	read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
			   sizeof(bgi));
	if (btrfs_block_group_chunk_objectid(&bgi) !=
	    BTRFS_FIRST_CHUNK_TREE_OBJECTID) {
464
		block_group_err(leaf, slot,
465 466 467 468 469 470 471
		"invalid block group chunk objectid, have %llu expect %llu",
				btrfs_block_group_chunk_objectid(&bgi),
				BTRFS_FIRST_CHUNK_TREE_OBJECTID);
		return -EUCLEAN;
	}

	if (btrfs_block_group_used(&bgi) > key->offset) {
472
		block_group_err(leaf, slot,
473 474 475 476 477 478 479
			"invalid block group used, have %llu expect [0, %llu)",
				btrfs_block_group_used(&bgi), key->offset);
		return -EUCLEAN;
	}

	flags = btrfs_block_group_flags(&bgi);
	if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
480
		block_group_err(leaf, slot,
481 482 483 484 485 486 487 488 489 490 491 492
"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
			flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
			hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
		return -EUCLEAN;
	}

	type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
	if (type != BTRFS_BLOCK_GROUP_DATA &&
	    type != BTRFS_BLOCK_GROUP_METADATA &&
	    type != BTRFS_BLOCK_GROUP_SYSTEM &&
	    type != (BTRFS_BLOCK_GROUP_METADATA |
			   BTRFS_BLOCK_GROUP_DATA)) {
493
		block_group_err(leaf, slot,
494
"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
495 496 497 498 499 500 501
			type, hweight64(type),
			BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
			BTRFS_BLOCK_GROUP_SYSTEM,
			BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
		return -EUCLEAN;
	}
	return 0;
502 503
}

504
__printf(4, 5)
505
__cold
506
static void chunk_err(const struct extent_buffer *leaf,
507 508 509
		      const struct btrfs_chunk *chunk, u64 logical,
		      const char *fmt, ...)
{
510
	const struct btrfs_fs_info *fs_info = leaf->fs_info;
511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548
	bool is_sb;
	struct va_format vaf;
	va_list args;
	int i;
	int slot = -1;

	/* Only superblock eb is able to have such small offset */
	is_sb = (leaf->start == BTRFS_SUPER_INFO_OFFSET);

	if (!is_sb) {
		/*
		 * Get the slot number by iterating through all slots, this
		 * would provide better readability.
		 */
		for (i = 0; i < btrfs_header_nritems(leaf); i++) {
			if (btrfs_item_ptr_offset(leaf, i) ==
					(unsigned long)chunk) {
				slot = i;
				break;
			}
		}
	}
	va_start(args, fmt);
	vaf.fmt = fmt;
	vaf.va = &args;

	if (is_sb)
		btrfs_crit(fs_info,
		"corrupt superblock syschunk array: chunk_start=%llu, %pV",
			   logical, &vaf);
	else
		btrfs_crit(fs_info,
	"corrupt leaf: root=%llu block=%llu slot=%d chunk_start=%llu, %pV",
			   BTRFS_CHUNK_TREE_OBJECTID, leaf->start, slot,
			   logical, &vaf);
	va_end(args);
}

549 550 551
/*
 * The common chunk check which could also work on super block sys chunk array.
 *
552
 * Return -EUCLEAN if anything is corrupted.
553 554
 * Return 0 if everything is OK.
 */
555
int btrfs_check_chunk_valid(struct extent_buffer *leaf,
556 557
			    struct btrfs_chunk *chunk, u64 logical)
{
558
	struct btrfs_fs_info *fs_info = leaf->fs_info;
559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
	u64 length;
	u64 stripe_len;
	u16 num_stripes;
	u16 sub_stripes;
	u64 type;
	u64 features;
	bool mixed = false;

	length = btrfs_chunk_length(leaf, chunk);
	stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
	num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
	sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
	type = btrfs_chunk_type(leaf, chunk);

	if (!num_stripes) {
574
		chunk_err(leaf, chunk, logical,
575
			  "invalid chunk num_stripes, have %u", num_stripes);
576
		return -EUCLEAN;
577 578
	}
	if (!IS_ALIGNED(logical, fs_info->sectorsize)) {
579
		chunk_err(leaf, chunk, logical,
580 581
		"invalid chunk logical, have %llu should aligned to %u",
			  logical, fs_info->sectorsize);
582
		return -EUCLEAN;
583 584
	}
	if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) {
585
		chunk_err(leaf, chunk, logical,
586 587 588
			  "invalid chunk sectorsize, have %u expect %u",
			  btrfs_chunk_sector_size(leaf, chunk),
			  fs_info->sectorsize);
589
		return -EUCLEAN;
590 591
	}
	if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) {
592
		chunk_err(leaf, chunk, logical,
593
			  "invalid chunk length, have %llu", length);
594
		return -EUCLEAN;
595 596
	}
	if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) {
597
		chunk_err(leaf, chunk, logical,
598
			  "invalid chunk stripe length: %llu",
599
			  stripe_len);
600
		return -EUCLEAN;
601 602 603
	}
	if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
	    type) {
604
		chunk_err(leaf, chunk, logical,
605
			  "unrecognized chunk type: 0x%llx",
606 607 608
			  ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
			    BTRFS_BLOCK_GROUP_PROFILE_MASK) &
			  btrfs_chunk_type(leaf, chunk));
609
		return -EUCLEAN;
610 611
	}

612 613
	if (!is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
	    (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) {
614
		chunk_err(leaf, chunk, logical,
615 616 617 618
		"invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set",
			  type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
		return -EUCLEAN;
	}
619
	if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
620
		chunk_err(leaf, chunk, logical,
621 622
	"missing chunk type flag, have 0x%llx one bit must be set in 0x%llx",
			  type, BTRFS_BLOCK_GROUP_TYPE_MASK);
623
		return -EUCLEAN;
624 625 626 627
	}

	if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
	    (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
628
		chunk_err(leaf, chunk, logical,
629 630
			  "system chunk with data or metadata type: 0x%llx",
			  type);
631
		return -EUCLEAN;
632 633 634 635 636 637 638 639 640
	}

	features = btrfs_super_incompat_flags(fs_info->super_copy);
	if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
		mixed = true;

	if (!mixed) {
		if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
		    (type & BTRFS_BLOCK_GROUP_DATA)) {
641
			chunk_err(leaf, chunk, logical,
642
			"mixed chunk type in non-mixed mode: 0x%llx", type);
643
			return -EUCLEAN;
644 645 646 647 648 649 650 651 652
		}
	}

	if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
	    (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) ||
	    (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
	    (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
	    (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) ||
	    ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && num_stripes != 1)) {
653
		chunk_err(leaf, chunk, logical,
654 655 656
			"invalid num_stripes:sub_stripes %u:%u for profile %llu",
			num_stripes, sub_stripes,
			type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
657
		return -EUCLEAN;
658 659 660
	}

	return 0;
661 662
}

663
__printf(3, 4)
Q
Qu Wenruo 已提交
664
__cold
665
static void dev_item_err(const struct extent_buffer *eb, int slot,
Q
Qu Wenruo 已提交
666 667 668 669 670 671 672 673 674 675 676 677
			 const char *fmt, ...)
{
	struct btrfs_key key;
	struct va_format vaf;
	va_list args;

	btrfs_item_key_to_cpu(eb, &key, slot);
	va_start(args, fmt);

	vaf.fmt = fmt;
	vaf.va = &args;

678
	btrfs_crit(eb->fs_info,
Q
Qu Wenruo 已提交
679 680 681 682 683 684 685
	"corrupt %s: root=%llu block=%llu slot=%d devid=%llu %pV",
		btrfs_header_level(eb) == 0 ? "leaf" : "node",
		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
		key.objectid, &vaf);
	va_end(args);
}

686
static int check_dev_item(struct extent_buffer *leaf,
Q
Qu Wenruo 已提交
687 688 689 690 691
			  struct btrfs_key *key, int slot)
{
	struct btrfs_dev_item *ditem;

	if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) {
692
		dev_item_err(leaf, slot,
Q
Qu Wenruo 已提交
693 694 695 696 697 698
			     "invalid objectid: has=%llu expect=%llu",
			     key->objectid, BTRFS_DEV_ITEMS_OBJECTID);
		return -EUCLEAN;
	}
	ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item);
	if (btrfs_device_id(leaf, ditem) != key->offset) {
699
		dev_item_err(leaf, slot,
Q
Qu Wenruo 已提交
700 701 702 703 704 705 706 707 708 709 710 711
			     "devid mismatch: key has=%llu item has=%llu",
			     key->offset, btrfs_device_id(leaf, ditem));
		return -EUCLEAN;
	}

	/*
	 * For device total_bytes, we don't have reliable way to check it, as
	 * it can be 0 for device removal. Device size check can only be done
	 * by dev extents check.
	 */
	if (btrfs_device_bytes_used(leaf, ditem) >
	    btrfs_device_total_bytes(leaf, ditem)) {
712
		dev_item_err(leaf, slot,
Q
Qu Wenruo 已提交
713 714 715 716 717 718 719 720 721 722 723 724
			     "invalid bytes used: have %llu expect [0, %llu]",
			     btrfs_device_bytes_used(leaf, ditem),
			     btrfs_device_total_bytes(leaf, ditem));
		return -EUCLEAN;
	}
	/*
	 * Remaining members like io_align/type/gen/dev_group aren't really
	 * utilized.  Skip them to make later usage of them easier.
	 */
	return 0;
}

725 726
/* Inode item error output has the same format as dir_item_err() */
#define inode_item_err(fs_info, eb, slot, fmt, ...)			\
727
	dir_item_err(eb, slot, fmt, __VA_ARGS__)
728

729
static int check_inode_item(struct extent_buffer *leaf,
730 731
			    struct btrfs_key *key, int slot)
{
732
	struct btrfs_fs_info *fs_info = leaf->fs_info;
733 734 735 736 737 738 739 740 741
	struct btrfs_inode_item *iitem;
	u64 super_gen = btrfs_super_generation(fs_info->super_copy);
	u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
	u32 mode;

	if ((key->objectid < BTRFS_FIRST_FREE_OBJECTID ||
	     key->objectid > BTRFS_LAST_FREE_OBJECTID) &&
	    key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID &&
	    key->objectid != BTRFS_FREE_INO_OBJECTID) {
742
		generic_err(leaf, slot,
743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815
	"invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu",
			    key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID,
			    BTRFS_FIRST_FREE_OBJECTID,
			    BTRFS_LAST_FREE_OBJECTID,
			    BTRFS_FREE_INO_OBJECTID);
		return -EUCLEAN;
	}
	if (key->offset != 0) {
		inode_item_err(fs_info, leaf, slot,
			"invalid key offset: has %llu expect 0",
			key->offset);
		return -EUCLEAN;
	}
	iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item);

	/* Here we use super block generation + 1 to handle log tree */
	if (btrfs_inode_generation(leaf, iitem) > super_gen + 1) {
		inode_item_err(fs_info, leaf, slot,
			"invalid inode generation: has %llu expect (0, %llu]",
			       btrfs_inode_generation(leaf, iitem),
			       super_gen + 1);
		return -EUCLEAN;
	}
	/* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */
	if (btrfs_inode_transid(leaf, iitem) > super_gen + 1) {
		inode_item_err(fs_info, leaf, slot,
			"invalid inode generation: has %llu expect [0, %llu]",
			       btrfs_inode_transid(leaf, iitem), super_gen + 1);
		return -EUCLEAN;
	}

	/*
	 * For size and nbytes it's better not to be too strict, as for dir
	 * item its size/nbytes can easily get wrong, but doesn't affect
	 * anything in the fs. So here we skip the check.
	 */
	mode = btrfs_inode_mode(leaf, iitem);
	if (mode & ~valid_mask) {
		inode_item_err(fs_info, leaf, slot,
			       "unknown mode bit detected: 0x%x",
			       mode & ~valid_mask);
		return -EUCLEAN;
	}

	/*
	 * S_IFMT is not bit mapped so we can't completely rely on is_power_of_2,
	 * but is_power_of_2() can save us from checking FIFO/CHR/DIR/REG.
	 * Only needs to check BLK, LNK and SOCKS
	 */
	if (!is_power_of_2(mode & S_IFMT)) {
		if (!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode)) {
			inode_item_err(fs_info, leaf, slot,
			"invalid mode: has 0%o expect valid S_IF* bit(s)",
				       mode & S_IFMT);
			return -EUCLEAN;
		}
	}
	if (S_ISDIR(mode) && btrfs_inode_nlink(leaf, iitem) > 1) {
		inode_item_err(fs_info, leaf, slot,
		       "invalid nlink: has %u expect no more than 1 for dir",
			btrfs_inode_nlink(leaf, iitem));
		return -EUCLEAN;
	}
	if (btrfs_inode_flags(leaf, iitem) & ~BTRFS_INODE_FLAG_MASK) {
		inode_item_err(fs_info, leaf, slot,
			       "unknown flags detected: 0x%llx",
			       btrfs_inode_flags(leaf, iitem) &
			       ~BTRFS_INODE_FLAG_MASK);
		return -EUCLEAN;
	}
	return 0;
}

816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904
static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key,
			   int slot)
{
	struct btrfs_fs_info *fs_info = leaf->fs_info;
	struct btrfs_root_item ri;
	const u64 valid_root_flags = BTRFS_ROOT_SUBVOL_RDONLY |
				     BTRFS_ROOT_SUBVOL_DEAD;

	/* No such tree id */
	if (key->objectid == 0) {
		generic_err(leaf, slot, "invalid root id 0");
		return -EUCLEAN;
	}

	/*
	 * Some older kernel may create ROOT_ITEM with non-zero offset, so here
	 * we only check offset for reloc tree whose key->offset must be a
	 * valid tree.
	 */
	if (key->objectid == BTRFS_TREE_RELOC_OBJECTID && key->offset == 0) {
		generic_err(leaf, slot, "invalid root id 0 for reloc tree");
		return -EUCLEAN;
	}

	if (btrfs_item_size_nr(leaf, slot) != sizeof(ri)) {
		generic_err(leaf, slot,
			    "invalid root item size, have %u expect %zu",
			    btrfs_item_size_nr(leaf, slot), sizeof(ri));
	}

	read_extent_buffer(leaf, &ri, btrfs_item_ptr_offset(leaf, slot),
			   sizeof(ri));

	/* Generation related */
	if (btrfs_root_generation(&ri) >
	    btrfs_super_generation(fs_info->super_copy) + 1) {
		generic_err(leaf, slot,
			"invalid root generation, have %llu expect (0, %llu]",
			    btrfs_root_generation(&ri),
			    btrfs_super_generation(fs_info->super_copy) + 1);
		return -EUCLEAN;
	}
	if (btrfs_root_generation_v2(&ri) >
	    btrfs_super_generation(fs_info->super_copy) + 1) {
		generic_err(leaf, slot,
		"invalid root v2 generation, have %llu expect (0, %llu]",
			    btrfs_root_generation_v2(&ri),
			    btrfs_super_generation(fs_info->super_copy) + 1);
		return -EUCLEAN;
	}
	if (btrfs_root_last_snapshot(&ri) >
	    btrfs_super_generation(fs_info->super_copy) + 1) {
		generic_err(leaf, slot,
		"invalid root last_snapshot, have %llu expect (0, %llu]",
			    btrfs_root_last_snapshot(&ri),
			    btrfs_super_generation(fs_info->super_copy) + 1);
		return -EUCLEAN;
	}

	/* Alignment and level check */
	if (!IS_ALIGNED(btrfs_root_bytenr(&ri), fs_info->sectorsize)) {
		generic_err(leaf, slot,
		"invalid root bytenr, have %llu expect to be aligned to %u",
			    btrfs_root_bytenr(&ri), fs_info->sectorsize);
		return -EUCLEAN;
	}
	if (btrfs_root_level(&ri) >= BTRFS_MAX_LEVEL) {
		generic_err(leaf, slot,
			    "invalid root level, have %u expect [0, %u]",
			    btrfs_root_level(&ri), BTRFS_MAX_LEVEL - 1);
		return -EUCLEAN;
	}
	if (ri.drop_level >= BTRFS_MAX_LEVEL) {
		generic_err(leaf, slot,
			    "invalid root level, have %u expect [0, %u]",
			    ri.drop_level, BTRFS_MAX_LEVEL - 1);
		return -EUCLEAN;
	}

	/* Flags check */
	if (btrfs_root_flags(&ri) & ~valid_root_flags) {
		generic_err(leaf, slot,
			    "invalid root flags, have 0x%llx expect mask 0x%llx",
			    btrfs_root_flags(&ri), valid_root_flags);
		return -EUCLEAN;
	}
	return 0;
}

905 906 907 908 909 910 911 912 913 914 915 916 917
__printf(3,4)
__cold
static void extent_err(const struct extent_buffer *eb, int slot,
		       const char *fmt, ...)
{
	struct btrfs_key key;
	struct va_format vaf;
	va_list args;
	u64 bytenr;
	u64 len;

	btrfs_item_key_to_cpu(eb, &key, slot);
	bytenr = key.objectid;
918 919 920
	if (key.type == BTRFS_METADATA_ITEM_KEY ||
	    key.type == BTRFS_TREE_BLOCK_REF_KEY ||
	    key.type == BTRFS_SHARED_BLOCK_REF_KEY)
921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150
		len = eb->fs_info->nodesize;
	else
		len = key.offset;
	va_start(args, fmt);

	vaf.fmt = fmt;
	vaf.va = &args;

	btrfs_crit(eb->fs_info,
	"corrupt %s: block=%llu slot=%d extent bytenr=%llu len=%llu %pV",
		btrfs_header_level(eb) == 0 ? "leaf" : "node",
		eb->start, slot, bytenr, len, &vaf);
	va_end(args);
}

static int check_extent_item(struct extent_buffer *leaf,
			     struct btrfs_key *key, int slot)
{
	struct btrfs_fs_info *fs_info = leaf->fs_info;
	struct btrfs_extent_item *ei;
	bool is_tree_block = false;
	unsigned long ptr;	/* Current pointer inside inline refs */
	unsigned long end;	/* Extent item end */
	const u32 item_size = btrfs_item_size_nr(leaf, slot);
	u64 flags;
	u64 generation;
	u64 total_refs;		/* Total refs in btrfs_extent_item */
	u64 inline_refs = 0;	/* found total inline refs */

	if (key->type == BTRFS_METADATA_ITEM_KEY &&
	    !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
		generic_err(leaf, slot,
"invalid key type, METADATA_ITEM type invalid when SKINNY_METADATA feature disabled");
		return -EUCLEAN;
	}
	/* key->objectid is the bytenr for both key types */
	if (!IS_ALIGNED(key->objectid, fs_info->sectorsize)) {
		generic_err(leaf, slot,
		"invalid key objectid, have %llu expect to be aligned to %u",
			   key->objectid, fs_info->sectorsize);
		return -EUCLEAN;
	}

	/* key->offset is tree level for METADATA_ITEM_KEY */
	if (key->type == BTRFS_METADATA_ITEM_KEY &&
	    key->offset >= BTRFS_MAX_LEVEL) {
		extent_err(leaf, slot,
			   "invalid tree level, have %llu expect [0, %u]",
			   key->offset, BTRFS_MAX_LEVEL - 1);
		return -EUCLEAN;
	}

	/*
	 * EXTENT/METADATA_ITEM consists of:
	 * 1) One btrfs_extent_item
	 *    Records the total refs, type and generation of the extent.
	 *
	 * 2) One btrfs_tree_block_info (for EXTENT_ITEM and tree backref only)
	 *    Records the first key and level of the tree block.
	 *
	 * 2) Zero or more btrfs_extent_inline_ref(s)
	 *    Each inline ref has one btrfs_extent_inline_ref shows:
	 *    2.1) The ref type, one of the 4
	 *         TREE_BLOCK_REF	Tree block only
	 *         SHARED_BLOCK_REF	Tree block only
	 *         EXTENT_DATA_REF	Data only
	 *         SHARED_DATA_REF	Data only
	 *    2.2) Ref type specific data
	 *         Either using btrfs_extent_inline_ref::offset, or specific
	 *         data structure.
	 */
	if (item_size < sizeof(*ei)) {
		extent_err(leaf, slot,
			   "invalid item size, have %u expect [%zu, %u)",
			   item_size, sizeof(*ei),
			   BTRFS_LEAF_DATA_SIZE(fs_info));
		return -EUCLEAN;
	}
	end = item_size + btrfs_item_ptr_offset(leaf, slot);

	/* Checks against extent_item */
	ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
	flags = btrfs_extent_flags(leaf, ei);
	total_refs = btrfs_extent_refs(leaf, ei);
	generation = btrfs_extent_generation(leaf, ei);
	if (generation > btrfs_super_generation(fs_info->super_copy) + 1) {
		extent_err(leaf, slot,
			   "invalid generation, have %llu expect (0, %llu]",
			   generation,
			   btrfs_super_generation(fs_info->super_copy) + 1);
		return -EUCLEAN;
	}
	if (!is_power_of_2(flags & (BTRFS_EXTENT_FLAG_DATA |
				    BTRFS_EXTENT_FLAG_TREE_BLOCK))) {
		extent_err(leaf, slot,
		"invalid extent flag, have 0x%llx expect 1 bit set in 0x%llx",
			flags, BTRFS_EXTENT_FLAG_DATA |
			BTRFS_EXTENT_FLAG_TREE_BLOCK);
		return -EUCLEAN;
	}
	is_tree_block = !!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK);
	if (is_tree_block) {
		if (key->type == BTRFS_EXTENT_ITEM_KEY &&
		    key->offset != fs_info->nodesize) {
			extent_err(leaf, slot,
				   "invalid extent length, have %llu expect %u",
				   key->offset, fs_info->nodesize);
			return -EUCLEAN;
		}
	} else {
		if (key->type != BTRFS_EXTENT_ITEM_KEY) {
			extent_err(leaf, slot,
			"invalid key type, have %u expect %u for data backref",
				   key->type, BTRFS_EXTENT_ITEM_KEY);
			return -EUCLEAN;
		}
		if (!IS_ALIGNED(key->offset, fs_info->sectorsize)) {
			extent_err(leaf, slot,
			"invalid extent length, have %llu expect aligned to %u",
				   key->offset, fs_info->sectorsize);
			return -EUCLEAN;
		}
	}
	ptr = (unsigned long)(struct btrfs_extent_item *)(ei + 1);

	/* Check the special case of btrfs_tree_block_info */
	if (is_tree_block && key->type != BTRFS_METADATA_ITEM_KEY) {
		struct btrfs_tree_block_info *info;

		info = (struct btrfs_tree_block_info *)ptr;
		if (btrfs_tree_block_level(leaf, info) >= BTRFS_MAX_LEVEL) {
			extent_err(leaf, slot,
			"invalid tree block info level, have %u expect [0, %u]",
				   btrfs_tree_block_level(leaf, info),
				   BTRFS_MAX_LEVEL - 1);
			return -EUCLEAN;
		}
		ptr = (unsigned long)(struct btrfs_tree_block_info *)(info + 1);
	}

	/* Check inline refs */
	while (ptr < end) {
		struct btrfs_extent_inline_ref *iref;
		struct btrfs_extent_data_ref *dref;
		struct btrfs_shared_data_ref *sref;
		u64 dref_offset;
		u64 inline_offset;
		u8 inline_type;

		if (ptr + sizeof(*iref) > end) {
			extent_err(leaf, slot,
"inline ref item overflows extent item, ptr %lu iref size %zu end %lu",
				   ptr, sizeof(*iref), end);
			return -EUCLEAN;
		}
		iref = (struct btrfs_extent_inline_ref *)ptr;
		inline_type = btrfs_extent_inline_ref_type(leaf, iref);
		inline_offset = btrfs_extent_inline_ref_offset(leaf, iref);
		if (ptr + btrfs_extent_inline_ref_size(inline_type) > end) {
			extent_err(leaf, slot,
"inline ref item overflows extent item, ptr %lu iref size %u end %lu",
				   ptr, inline_type, end);
			return -EUCLEAN;
		}

		switch (inline_type) {
		/* inline_offset is subvolid of the owner, no need to check */
		case BTRFS_TREE_BLOCK_REF_KEY:
			inline_refs++;
			break;
		/* Contains parent bytenr */
		case BTRFS_SHARED_BLOCK_REF_KEY:
			if (!IS_ALIGNED(inline_offset, fs_info->sectorsize)) {
				extent_err(leaf, slot,
		"invalid tree parent bytenr, have %llu expect aligned to %u",
					   inline_offset, fs_info->sectorsize);
				return -EUCLEAN;
			}
			inline_refs++;
			break;
		/*
		 * Contains owner subvolid, owner key objectid, adjusted offset.
		 * The only obvious corruption can happen in that offset.
		 */
		case BTRFS_EXTENT_DATA_REF_KEY:
			dref = (struct btrfs_extent_data_ref *)(&iref->offset);
			dref_offset = btrfs_extent_data_ref_offset(leaf, dref);
			if (!IS_ALIGNED(dref_offset, fs_info->sectorsize)) {
				extent_err(leaf, slot,
		"invalid data ref offset, have %llu expect aligned to %u",
					   dref_offset, fs_info->sectorsize);
				return -EUCLEAN;
			}
			inline_refs += btrfs_extent_data_ref_count(leaf, dref);
			break;
		/* Contains parent bytenr and ref count */
		case BTRFS_SHARED_DATA_REF_KEY:
			sref = (struct btrfs_shared_data_ref *)(iref + 1);
			if (!IS_ALIGNED(inline_offset, fs_info->sectorsize)) {
				extent_err(leaf, slot,
		"invalid data parent bytenr, have %llu expect aligned to %u",
					   inline_offset, fs_info->sectorsize);
				return -EUCLEAN;
			}
			inline_refs += btrfs_shared_data_ref_count(leaf, sref);
			break;
		default:
			extent_err(leaf, slot, "unknown inline ref type: %u",
				   inline_type);
			return -EUCLEAN;
		}
		ptr += btrfs_extent_inline_ref_size(inline_type);
	}
	/* No padding is allowed */
	if (ptr != end) {
		extent_err(leaf, slot,
			   "invalid extent item size, padding bytes found");
		return -EUCLEAN;
	}

	/* Finally, check the inline refs against total refs */
	if (inline_refs > total_refs) {
		extent_err(leaf, slot,
			"invalid extent refs, have %llu expect >= inline %llu",
			   total_refs, inline_refs);
		return -EUCLEAN;
	}
	return 0;
}

1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181
static int check_simple_keyed_refs(struct extent_buffer *leaf,
				   struct btrfs_key *key, int slot)
{
	u32 expect_item_size = 0;

	if (key->type == BTRFS_SHARED_DATA_REF_KEY)
		expect_item_size = sizeof(struct btrfs_shared_data_ref);

	if (btrfs_item_size_nr(leaf, slot) != expect_item_size) {
		generic_err(leaf, slot,
		"invalid item size, have %u expect %u for key type %u",
			    btrfs_item_size_nr(leaf, slot),
			    expect_item_size, key->type);
		return -EUCLEAN;
	}
	if (!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize)) {
		generic_err(leaf, slot,
"invalid key objectid for shared block ref, have %llu expect aligned to %u",
			    key->objectid, leaf->fs_info->sectorsize);
		return -EUCLEAN;
	}
	if (key->type != BTRFS_TREE_BLOCK_REF_KEY &&
	    !IS_ALIGNED(key->offset, leaf->fs_info->sectorsize)) {
		extent_err(leaf, slot,
		"invalid tree parent bytenr, have %llu expect aligned to %u",
			   key->offset, leaf->fs_info->sectorsize);
		return -EUCLEAN;
	}
	return 0;
}

1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226
static int check_extent_data_ref(struct extent_buffer *leaf,
				 struct btrfs_key *key, int slot)
{
	struct btrfs_extent_data_ref *dref;
	unsigned long ptr = btrfs_item_ptr_offset(leaf, slot);
	const unsigned long end = ptr + btrfs_item_size_nr(leaf, slot);

	if (btrfs_item_size_nr(leaf, slot) % sizeof(*dref) != 0) {
		generic_err(leaf, slot,
	"invalid item size, have %u expect aligned to %zu for key type %u",
			    btrfs_item_size_nr(leaf, slot),
			    sizeof(*dref), key->type);
	}
	if (!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize)) {
		generic_err(leaf, slot,
"invalid key objectid for shared block ref, have %llu expect aligned to %u",
			    key->objectid, leaf->fs_info->sectorsize);
		return -EUCLEAN;
	}
	for (; ptr < end; ptr += sizeof(*dref)) {
		u64 root_objectid;
		u64 owner;
		u64 offset;
		u64 hash;

		dref = (struct btrfs_extent_data_ref *)ptr;
		root_objectid = btrfs_extent_data_ref_root(leaf, dref);
		owner = btrfs_extent_data_ref_objectid(leaf, dref);
		offset = btrfs_extent_data_ref_offset(leaf, dref);
		hash = hash_extent_data_ref(root_objectid, owner, offset);
		if (hash != key->offset) {
			extent_err(leaf, slot,
	"invalid extent data ref hash, item has 0x%016llx key has 0x%016llx",
				   hash, key->offset);
			return -EUCLEAN;
		}
		if (!IS_ALIGNED(offset, leaf->fs_info->sectorsize)) {
			extent_err(leaf, slot,
	"invalid extent data backref offset, have %llu expect aligned to %u",
				   offset, leaf->fs_info->sectorsize);
		}
	}
	return 0;
}

1227 1228 1229
/*
 * Common point to switch the item-specific validation.
 */
1230
static int check_leaf_item(struct extent_buffer *leaf,
1231 1232
			   struct btrfs_key *key, int slot,
			   struct btrfs_key *prev_key)
1233 1234
{
	int ret = 0;
1235
	struct btrfs_chunk *chunk;
1236 1237 1238

	switch (key->type) {
	case BTRFS_EXTENT_DATA_KEY:
1239
		ret = check_extent_data_item(leaf, key, slot, prev_key);
1240 1241
		break;
	case BTRFS_EXTENT_CSUM_KEY:
1242
		ret = check_csum_item(leaf, key, slot);
1243
		break;
1244 1245 1246
	case BTRFS_DIR_ITEM_KEY:
	case BTRFS_DIR_INDEX_KEY:
	case BTRFS_XATTR_ITEM_KEY:
1247
		ret = check_dir_item(leaf, key, slot);
1248
		break;
1249
	case BTRFS_BLOCK_GROUP_ITEM_KEY:
1250
		ret = check_block_group_item(leaf, key, slot);
1251
		break;
1252 1253
	case BTRFS_CHUNK_ITEM_KEY:
		chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
1254
		ret = btrfs_check_chunk_valid(leaf, chunk, key->offset);
1255
		break;
Q
Qu Wenruo 已提交
1256
	case BTRFS_DEV_ITEM_KEY:
1257
		ret = check_dev_item(leaf, key, slot);
Q
Qu Wenruo 已提交
1258
		break;
1259
	case BTRFS_INODE_ITEM_KEY:
1260
		ret = check_inode_item(leaf, key, slot);
1261
		break;
1262 1263 1264
	case BTRFS_ROOT_ITEM_KEY:
		ret = check_root_item(leaf, key, slot);
		break;
1265 1266 1267 1268
	case BTRFS_EXTENT_ITEM_KEY:
	case BTRFS_METADATA_ITEM_KEY:
		ret = check_extent_item(leaf, key, slot);
		break;
1269 1270 1271 1272 1273
	case BTRFS_TREE_BLOCK_REF_KEY:
	case BTRFS_SHARED_DATA_REF_KEY:
	case BTRFS_SHARED_BLOCK_REF_KEY:
		ret = check_simple_keyed_refs(leaf, key, slot);
		break;
1274 1275 1276
	case BTRFS_EXTENT_DATA_REF_KEY:
		ret = check_extent_data_ref(leaf, key, slot);
		break;
1277 1278 1279 1280
	}
	return ret;
}

1281
static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
1282
{
1283
	struct btrfs_fs_info *fs_info = leaf->fs_info;
1284 1285 1286 1287 1288 1289
	/* No valid key type is 0, so all key should be larger than this key */
	struct btrfs_key prev_key = {0, 0, 0};
	struct btrfs_key key;
	u32 nritems = btrfs_header_nritems(leaf);
	int slot;

1290
	if (btrfs_header_level(leaf) != 0) {
1291
		generic_err(leaf, 0,
1292 1293 1294 1295 1296
			"invalid level for leaf, have %d expect 0",
			btrfs_header_level(leaf));
		return -EUCLEAN;
	}

1297 1298 1299 1300 1301 1302 1303 1304 1305
	/*
	 * Extent buffers from a relocation tree have a owner field that
	 * corresponds to the subvolume tree they are based on. So just from an
	 * extent buffer alone we can not find out what is the id of the
	 * corresponding subvolume tree, so we can not figure out if the extent
	 * buffer corresponds to the root of the relocation tree or not. So
	 * skip this check for relocation trees.
	 */
	if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
1306
		u64 owner = btrfs_header_owner(leaf);
1307

1308 1309 1310 1311 1312 1313 1314
		/* These trees must never be empty */
		if (owner == BTRFS_ROOT_TREE_OBJECTID ||
		    owner == BTRFS_CHUNK_TREE_OBJECTID ||
		    owner == BTRFS_EXTENT_TREE_OBJECTID ||
		    owner == BTRFS_DEV_TREE_OBJECTID ||
		    owner == BTRFS_FS_TREE_OBJECTID ||
		    owner == BTRFS_DATA_RELOC_TREE_OBJECTID) {
1315
			generic_err(leaf, 0,
1316 1317 1318 1319
			"invalid root, root %llu must never be empty",
				    owner);
			return -EUCLEAN;
		}
1320 1321 1322 1323 1324 1325
		/* Unknown tree */
		if (owner == 0) {
			generic_err(leaf, 0,
				"invalid owner, root 0 is not defined");
			return -EUCLEAN;
		}
1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350
		return 0;
	}

	if (nritems == 0)
		return 0;

	/*
	 * Check the following things to make sure this is a good leaf, and
	 * leaf users won't need to bother with similar sanity checks:
	 *
	 * 1) key ordering
	 * 2) item offset and size
	 *    No overlap, no hole, all inside the leaf.
	 * 3) item content
	 *    If possible, do comprehensive sanity check.
	 *    NOTE: All checks must only rely on the item data itself.
	 */
	for (slot = 0; slot < nritems; slot++) {
		u32 item_end_expected;
		int ret;

		btrfs_item_key_to_cpu(leaf, &key, slot);

		/* Make sure the keys are in the right order */
		if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
1351
			generic_err(leaf, slot,
1352 1353 1354 1355
	"bad key order, prev (%llu %u %llu) current (%llu %u %llu)",
				prev_key.objectid, prev_key.type,
				prev_key.offset, key.objectid, key.type,
				key.offset);
1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369
			return -EUCLEAN;
		}

		/*
		 * Make sure the offset and ends are right, remember that the
		 * item data starts at the end of the leaf and grows towards the
		 * front.
		 */
		if (slot == 0)
			item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info);
		else
			item_end_expected = btrfs_item_offset_nr(leaf,
								 slot - 1);
		if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
1370
			generic_err(leaf, slot,
1371 1372 1373
				"unexpected item end, have %u expect %u",
				btrfs_item_end_nr(leaf, slot),
				item_end_expected);
1374 1375 1376 1377 1378 1379 1380 1381 1382 1383
			return -EUCLEAN;
		}

		/*
		 * Check to make sure that we don't point outside of the leaf,
		 * just in case all the items are consistent to each other, but
		 * all point outside of the leaf.
		 */
		if (btrfs_item_end_nr(leaf, slot) >
		    BTRFS_LEAF_DATA_SIZE(fs_info)) {
1384
			generic_err(leaf, slot,
1385 1386 1387
			"slot end outside of leaf, have %u expect range [0, %u]",
				btrfs_item_end_nr(leaf, slot),
				BTRFS_LEAF_DATA_SIZE(fs_info));
1388 1389 1390 1391 1392 1393
			return -EUCLEAN;
		}

		/* Also check if the item pointer overlaps with btrfs item. */
		if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
		    btrfs_item_ptr_offset(leaf, slot)) {
1394
			generic_err(leaf, slot,
1395 1396 1397 1398
		"slot overlaps with its data, item end %lu data start %lu",
				btrfs_item_nr_offset(slot) +
				sizeof(struct btrfs_item),
				btrfs_item_ptr_offset(leaf, slot));
1399 1400 1401
			return -EUCLEAN;
		}

1402 1403 1404 1405 1406
		if (check_item_data) {
			/*
			 * Check if the item size and content meet other
			 * criteria
			 */
1407
			ret = check_leaf_item(leaf, &key, slot, &prev_key);
1408 1409 1410
			if (ret < 0)
				return ret;
		}
1411 1412 1413 1414 1415 1416 1417 1418 1419

		prev_key.objectid = key.objectid;
		prev_key.type = key.type;
		prev_key.offset = key.offset;
	}

	return 0;
}

1420
int btrfs_check_leaf_full(struct extent_buffer *leaf)
1421
{
1422
	return check_leaf(leaf, true);
1423
}
1424
ALLOW_ERROR_INJECTION(btrfs_check_leaf_full, ERRNO);
1425

1426
int btrfs_check_leaf_relaxed(struct extent_buffer *leaf)
1427
{
1428
	return check_leaf(leaf, false);
1429 1430
}

1431
int btrfs_check_node(struct extent_buffer *node)
1432
{
1433
	struct btrfs_fs_info *fs_info = node->fs_info;
1434 1435 1436
	unsigned long nr = btrfs_header_nritems(node);
	struct btrfs_key key, next_key;
	int slot;
1437
	int level = btrfs_header_level(node);
1438 1439 1440
	u64 bytenr;
	int ret = 0;

1441
	if (level <= 0 || level >= BTRFS_MAX_LEVEL) {
1442
		generic_err(node, 0,
1443 1444 1445 1446
			"invalid level for node, have %d expect [1, %d]",
			level, BTRFS_MAX_LEVEL - 1);
		return -EUCLEAN;
	}
1447 1448
	if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(fs_info)) {
		btrfs_crit(fs_info,
1449
"corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]",
1450
			   btrfs_header_owner(node), node->start,
1451
			   nr == 0 ? "small" : "large", nr,
1452
			   BTRFS_NODEPTRS_PER_BLOCK(fs_info));
1453
		return -EUCLEAN;
1454 1455 1456 1457 1458 1459 1460 1461
	}

	for (slot = 0; slot < nr - 1; slot++) {
		bytenr = btrfs_node_blockptr(node, slot);
		btrfs_node_key_to_cpu(node, &key, slot);
		btrfs_node_key_to_cpu(node, &next_key, slot + 1);

		if (!bytenr) {
1462
			generic_err(node, slot,
1463 1464 1465 1466
				"invalid NULL node pointer");
			ret = -EUCLEAN;
			goto out;
		}
1467
		if (!IS_ALIGNED(bytenr, fs_info->sectorsize)) {
1468
			generic_err(node, slot,
1469
			"unaligned pointer, have %llu should be aligned to %u",
1470
				bytenr, fs_info->sectorsize);
1471
			ret = -EUCLEAN;
1472 1473 1474 1475
			goto out;
		}

		if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
1476
			generic_err(node, slot,
1477 1478 1479 1480 1481
	"bad key order, current (%llu %u %llu) next (%llu %u %llu)",
				key.objectid, key.type, key.offset,
				next_key.objectid, next_key.type,
				next_key.offset);
			ret = -EUCLEAN;
1482 1483 1484 1485 1486 1487
			goto out;
		}
	}
out:
	return ret;
}
1488
ALLOW_ERROR_INJECTION(btrfs_check_node, ERRNO);