extent_io.h 17.6 KB
Newer Older
1
/* SPDX-License-Identifier: GPL-2.0 */
2 3 4

#ifndef BTRFS_EXTENT_IO_H
#define BTRFS_EXTENT_IO_H
5 6

#include <linux/rbtree.h>
7
#include <linux/refcount.h>
8
#include "ulist.h"
9 10

/* bits for the extent state */
11 12 13 14 15 16 17 18 19
#define EXTENT_DIRTY		(1U << 0)
#define EXTENT_WRITEBACK	(1U << 1)
#define EXTENT_UPTODATE		(1U << 2)
#define EXTENT_LOCKED		(1U << 3)
#define EXTENT_NEW		(1U << 4)
#define EXTENT_DELALLOC		(1U << 5)
#define EXTENT_DEFRAG		(1U << 6)
#define EXTENT_BOUNDARY		(1U << 9)
#define EXTENT_NODATASUM	(1U << 10)
20
#define EXTENT_CLEAR_META_RESV	(1U << 11)
21 22 23 24
#define EXTENT_FIRST_DELALLOC	(1U << 12)
#define EXTENT_NEED_WAIT	(1U << 13)
#define EXTENT_DAMAGED		(1U << 14)
#define EXTENT_NORESERVE	(1U << 15)
25
#define EXTENT_QGROUP_RESERVED	(1U << 16)
26
#define EXTENT_CLEAR_DATA_RESV	(1U << 17)
27
#define EXTENT_DELALLOC_NEW	(1U << 18)
28
#define EXTENT_IOBITS		(EXTENT_LOCKED | EXTENT_WRITEBACK)
29 30
#define EXTENT_DO_ACCOUNTING    (EXTENT_CLEAR_META_RESV | \
				 EXTENT_CLEAR_DATA_RESV)
31
#define EXTENT_CTLBITS		(EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
32

33 34 35 36
/*
 * flags for bio submission. The high bits indicate the compression
 * type for this bio
 */
C
Chris Mason 已提交
37
#define EXTENT_BIO_COMPRESSED 1
38
#define EXTENT_BIO_FLAG_SHIFT 16
C
Chris Mason 已提交
39

40 41
/* these are bit numbers for test/set bit */
#define EXTENT_BUFFER_UPTODATE 0
42
#define EXTENT_BUFFER_DIRTY 2
43
#define EXTENT_BUFFER_CORRUPT 3
44
#define EXTENT_BUFFER_READAHEAD 4	/* this got triggered by readahead */
45 46
#define EXTENT_BUFFER_TREE_REF 5
#define EXTENT_BUFFER_STALE 6
47
#define EXTENT_BUFFER_WRITEBACK 7
48
#define EXTENT_BUFFER_READ_ERR 8        /* read IO error */
49
#define EXTENT_BUFFER_UNMAPPED 9
50
#define EXTENT_BUFFER_IN_TREE 10
51
#define EXTENT_BUFFER_WRITE_ERR 11    /* write IO error */
52

53
/* these are flags for __process_pages_contig */
54 55 56 57 58
#define PAGE_UNLOCK		(1 << 0)
#define PAGE_CLEAR_DIRTY	(1 << 1)
#define PAGE_SET_WRITEBACK	(1 << 2)
#define PAGE_END_WRITEBACK	(1 << 3)
#define PAGE_SET_PRIVATE2	(1 << 4)
59
#define PAGE_SET_ERROR		(1 << 5)
60
#define PAGE_LOCK		(1 << 6)
61

62 63 64 65 66 67
/*
 * page->private values.  Every page that is controlled by the extent
 * map has page->private set to one.
 */
#define EXTENT_PAGE_PRIVATE 1

68 69 70 71 72 73 74 75 76 77 78 79 80 81
/*
 * The extent buffer bitmap operations are done with byte granularity instead of
 * word granularity for two reasons:
 * 1. The bitmaps must be little-endian on disk.
 * 2. Bitmap items are not guaranteed to be aligned to a word and therefore a
 *    single word in a bitmap may straddle two pages in the extent buffer.
 */
#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1)
#define BITMAP_FIRST_BYTE_MASK(start) \
	((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
#define BITMAP_LAST_BYTE_MASK(nbits) \
	(BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))

82
struct extent_state;
83
struct btrfs_root;
84
struct btrfs_inode;
85
struct btrfs_io_bio;
86
struct io_failure_record;
87

88
typedef	blk_status_t (extent_submit_bio_hook_t)(void *private_data, struct bio *bio,
89 90
				       int mirror_num, unsigned long bio_flags,
				       u64 bio_offset);
91 92

typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
93
		struct bio *bio, u64 bio_offset);
94

95
struct extent_io_ops {
96 97 98 99
	/*
	 * The following callbacks must be allways defined, the function
	 * pointer will be called unconditionally.
	 */
100
	extent_submit_bio_hook_t *submit_bio_hook;
101 102 103
	int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
				    struct page *page, u64 start, u64 end,
				    int mirror);
104
	int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
105 106 107 108
};

struct extent_io_tree {
	struct rb_root state;
109
	void *private_data;
110
	u64 dirty_bytes;
111
	int track_uptodate;
112
	spinlock_t lock;
113
	const struct extent_io_ops *ops;
114 115 116 117 118 119
};

struct extent_state {
	u64 start;
	u64 end; /* inclusive */
	struct rb_node rb_node;
J
Josef Bacik 已提交
120 121

	/* ADD NEW ELEMENTS AFTER THIS */
122
	wait_queue_head_t wq;
123
	refcount_t refs;
124
	unsigned state;
125

126
	struct io_failure_record *failrec;
127

128
#ifdef CONFIG_BTRFS_DEBUG
129
	struct list_head leak_list;
130
#endif
131 132
};

133
#define INLINE_EXTENT_BUFFER_PAGES 16
134
#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_SIZE)
135 136 137
struct extent_buffer {
	u64 start;
	unsigned long len;
138
	unsigned long bflags;
139
	struct btrfs_fs_info *fs_info;
140
	spinlock_t refs_lock;
141
	atomic_t refs;
142
	atomic_t io_pages;
143
	int read_mirror;
144
	struct rcu_head rcu_head;
145
	pid_t lock_owner;
146

147 148 149 150 151 152 153
	/* count of read lock holders on the extent buffer */
	atomic_t write_locks;
	atomic_t read_locks;
	atomic_t blocking_writers;
	atomic_t blocking_readers;
	atomic_t spinning_readers;
	atomic_t spinning_writers;
154 155 156
	short lock_nested;
	/* >= 0 if eb belongs to a log tree, -1 otherwise */
	short log_index;
157 158 159 160 161 162 163 164

	/* protects write locks */
	rwlock_t lock;

	/* readers use lock_wq while they wait for the write
	 * lock holders to unlock
	 */
	wait_queue_head_t write_lock_wq;
165

166 167
	/* writers use read_lock_wq while they wait for readers
	 * to unlock
168
	 */
169
	wait_queue_head_t read_lock_wq;
170
	struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
171 172 173
#ifdef CONFIG_BTRFS_DEBUG
	struct list_head leak_list;
#endif
174 175
};

176 177 178 179 180
/*
 * Structure to record how many bytes and which ranges are set/cleared
 */
struct extent_changeset {
	/* How many bytes are set/cleared in this operation */
181
	unsigned int bytes_changed;
182 183

	/* Changed ranges */
184
	struct ulist range_changed;
185 186
};

187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
static inline void extent_changeset_init(struct extent_changeset *changeset)
{
	changeset->bytes_changed = 0;
	ulist_init(&changeset->range_changed);
}

static inline struct extent_changeset *extent_changeset_alloc(void)
{
	struct extent_changeset *ret;

	ret = kmalloc(sizeof(*ret), GFP_KERNEL);
	if (!ret)
		return NULL;

	extent_changeset_init(ret);
	return ret;
}

static inline void extent_changeset_release(struct extent_changeset *changeset)
{
	if (!changeset)
		return;
	changeset->bytes_changed = 0;
	ulist_release(&changeset->range_changed);
}

static inline void extent_changeset_free(struct extent_changeset *changeset)
{
	if (!changeset)
		return;
	extent_changeset_release(changeset);
	kfree(changeset);
}

221 222 223 224 225 226 227 228 229 230 231
static inline void extent_set_compress_type(unsigned long *bio_flags,
					    int compress_type)
{
	*bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT;
}

static inline int extent_compress_type(unsigned long bio_flags)
{
	return bio_flags >> EXTENT_BIO_FLAG_SHIFT;
}

232 233
struct extent_map_tree;

234
typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
235
					  struct page *page,
236
					  size_t pg_offset,
237 238 239
					  u64 start, u64 len,
					  int create);

240
void extent_io_tree_init(struct extent_io_tree *tree, void *private_data);
241
int try_release_extent_mapping(struct page *page, gfp_t mask);
242
int try_release_extent_buffer(struct page *page);
243
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
244
		     struct extent_state **cached);
245 246 247 248 249 250

static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
{
	return lock_extent_bits(tree, start, end, NULL);
}

251
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
252
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
253
			  get_extent_t *get_extent, int mirror_num);
254
int __init extent_io_init(void);
255
void __cold extent_io_exit(void);
256 257 258

u64 count_range_bits(struct extent_io_tree *tree,
		     u64 *start, u64 search_end,
259
		     u64 max_bytes, unsigned bits, int contig);
260

261
void free_extent_state(struct extent_state *state);
262
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
263
		   unsigned bits, int filled,
264
		   struct extent_state *cached_state);
265
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
266
		unsigned bits, struct extent_changeset *changeset);
267
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
268
		     unsigned bits, int wake, int delete,
269
		     struct extent_state **cached);
270 271 272 273
int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
		     unsigned bits, int wake, int delete,
		     struct extent_state **cached, gfp_t mask,
		     struct extent_changeset *changeset);
274

275 276
static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
{
277
	return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL);
278 279 280
}

static inline int unlock_extent_cached(struct extent_io_tree *tree, u64 start,
281
		u64 end, struct extent_state **cached)
282
{
283
	return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
284
				GFP_NOFS, NULL);
285 286
}

287 288
static inline int unlock_extent_cached_atomic(struct extent_io_tree *tree,
		u64 start, u64 end, struct extent_state **cached)
289
{
290 291
	return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
				GFP_ATOMIC, NULL);
292 293 294
}

static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
295
		u64 end, unsigned bits)
296 297 298 299 300 301
{
	int wake = 0;

	if (bits & EXTENT_LOCKED)
		wake = 1;

302
	return clear_extent_bit(tree, start, end, bits, wake, 0, NULL);
303 304
}

305
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
306
			   unsigned bits, struct extent_changeset *changeset);
307
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
308
		   unsigned bits, u64 *failed_start,
309
		   struct extent_state **cached_state, gfp_t mask);
310 311

static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
312
		u64 end, unsigned bits)
313
{
314
	return set_extent_bit(tree, start, end, bits, NULL, NULL, GFP_NOFS);
315 316
}

317
static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
318
		u64 end, struct extent_state **cached_state)
319
{
320
	return __clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
321
				cached_state, GFP_NOFS, NULL);
322
}
323 324 325 326 327 328 329 330

static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start,
		u64 end, gfp_t mask)
{
	return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL,
			      NULL, mask);
}

331
static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
332
		u64 end)
333 334 335
{
	return clear_extent_bit(tree, start, end,
				EXTENT_DIRTY | EXTENT_DELALLOC |
336
				EXTENT_DO_ACCOUNTING, 0, 0, NULL);
337 338
}

J
Josef Bacik 已提交
339
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
340
		       unsigned bits, unsigned clear_bits,
341
		       struct extent_state **cached_state);
342 343

static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start,
344 345
				      u64 end, unsigned int extra_bits,
				      struct extent_state **cached_state)
346 347
{
	return set_extent_bit(tree, start, end,
348
			      EXTENT_DELALLOC | EXTENT_UPTODATE | extra_bits,
349
			      NULL, cached_state, GFP_NOFS);
350 351 352
}

static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
353
		u64 end, struct extent_state **cached_state)
354 355 356
{
	return set_extent_bit(tree, start, end,
			      EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
357
			      NULL, cached_state, GFP_NOFS);
358 359 360
}

static inline int set_extent_new(struct extent_io_tree *tree, u64 start,
361
		u64 end)
362
{
363 364
	return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, NULL,
			GFP_NOFS);
365 366 367 368 369 370 371 372 373
}

static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start,
		u64 end, struct extent_state **cached_state, gfp_t mask)
{
	return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
			      cached_state, mask);
}

374
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
375
			  u64 *start_ret, u64 *end_ret, unsigned bits,
376
			  struct extent_state **cached_state);
377 378
int extent_invalidatepage(struct extent_io_tree *tree,
			  struct page *page, unsigned long offset);
379
int extent_write_full_page(struct page *page, struct writeback_control *wbc);
380
int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
381
			      int mode);
382
int extent_writepages(struct address_space *mapping,
383
		      struct writeback_control *wbc);
384 385
int btree_write_cache_pages(struct address_space *mapping,
			    struct writeback_control *wbc);
386 387
int extent_readpages(struct address_space *mapping, struct list_head *pages,
		     unsigned nr_pages);
Y
Yehuda Sadeh 已提交
388
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
389
		__u64 start, __u64 len);
390 391
void set_page_extent_mapped(struct page *page);

392
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
393
					  u64 start);
394 395
struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
						  u64 start, unsigned long len);
396
struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
397
						u64 start);
398
struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
399
struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
400
					 u64 start);
401
void free_extent_buffer(struct extent_buffer *eb);
402
void free_extent_buffer_stale(struct extent_buffer *eb);
403 404 405
#define WAIT_NONE	0
#define WAIT_COMPLETE	1
#define WAIT_PAGE_LOCK	2
406
int read_extent_buffer_pages(struct extent_io_tree *tree,
407
			     struct extent_buffer *eb, int wait,
408
			     int mirror_num);
409
void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
410

411
static inline int num_extent_pages(const struct extent_buffer *eb)
412
{
413 414
	return (round_up(eb->start + eb->len, PAGE_SIZE) >> PAGE_SHIFT) -
	       (eb->start >> PAGE_SHIFT);
415 416
}

417 418 419 420 421
static inline void extent_buffer_get(struct extent_buffer *eb)
{
	atomic_inc(&eb->refs);
}

422 423 424 425 426
static inline int extent_buffer_uptodate(struct extent_buffer *eb)
{
	return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
}

427 428 429
int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
			 unsigned long start, unsigned long len);
void read_extent_buffer(const struct extent_buffer *eb, void *dst,
430 431
			unsigned long start,
			unsigned long len);
432 433
int read_extent_buffer_to_user(const struct extent_buffer *eb,
			       void __user *dst, unsigned long start,
434
			       unsigned long len);
435 436 437
void write_extent_buffer_fsid(struct extent_buffer *eb, const void *src);
void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb,
		const void *src);
438 439
void write_extent_buffer(struct extent_buffer *eb, const void *src,
			 unsigned long start, unsigned long len);
440 441
void copy_extent_buffer_full(struct extent_buffer *dst,
			     struct extent_buffer *src);
442 443 444 445 446 447 448
void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
			unsigned long dst_offset, unsigned long src_offset,
			unsigned long len);
void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
			   unsigned long src_offset, unsigned long len);
void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
			   unsigned long src_offset, unsigned long len);
449 450
void memzero_extent_buffer(struct extent_buffer *eb, unsigned long start,
			   unsigned long len);
451 452 453 454 455 456
int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
			   unsigned long pos);
void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
			      unsigned long pos, unsigned long len);
void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
				unsigned long pos, unsigned long len);
457
void clear_extent_buffer_dirty(struct extent_buffer *eb);
458
bool set_extent_buffer_dirty(struct extent_buffer *eb);
459
void set_extent_buffer_uptodate(struct extent_buffer *eb);
460
void clear_extent_buffer_uptodate(struct extent_buffer *eb);
461
int extent_buffer_under_io(struct extent_buffer *eb);
462 463 464 465
int map_private_extent_buffer(const struct extent_buffer *eb,
			      unsigned long offset, unsigned long min_len,
			      char **map, unsigned long *map_start,
			      unsigned long *map_len);
466
void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
467
void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
468
void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
469
				 u64 delalloc_end, struct page *locked_page,
470
				 unsigned bits_to_clear,
471
				 unsigned long page_ops);
472
struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte);
473
struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs);
474
struct bio *btrfs_bio_clone(struct bio *bio);
475
struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size);
476

477
struct btrfs_fs_info;
478
struct btrfs_inode;
479

480 481 482
int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
		      u64 length, u64 logical, struct page *page,
		      unsigned int pg_offset, int mirror_num);
483 484 485 486
int clean_io_failure(struct btrfs_fs_info *fs_info,
		     struct extent_io_tree *failure_tree,
		     struct extent_io_tree *io_tree, u64 start,
		     struct page *page, u64 ino, unsigned int pg_offset);
487
void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
488 489
int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
			 struct extent_buffer *eb, int mirror_num);
490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509

/*
 * When IO fails, either with EIO or csum verification fails, we
 * try other mirrors that might have a good copy of the data.  This
 * io_failure_record is used to record state as we go through all the
 * mirrors.  If another mirror has good data, the page is set up to date
 * and things continue.  If a good mirror can't be found, the original
 * bio end_io callback is called to indicate things have failed.
 */
struct io_failure_record {
	struct page *page;
	u64 start;
	u64 len;
	u64 logical;
	unsigned long bio_flags;
	int this_mirror;
	int failed_mirror;
	int in_validation;
};

510

511 512
void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start,
		u64 end);
513 514
int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
				struct io_failure_record **failrec_ret);
515
bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
516
			    struct io_failure_record *failrec, int fail_mirror);
517 518 519
struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
				    struct io_failure_record *failrec,
				    struct page *page, int pg_offset, int icsum,
520
				    bio_end_io_t *endio_func, void *data);
521 522 523
int free_io_failure(struct extent_io_tree *failure_tree,
		    struct extent_io_tree *io_tree,
		    struct io_failure_record *rec);
524
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
525 526 527
u64 find_lock_delalloc_range(struct inode *inode, struct extent_io_tree *tree,
			     struct page *locked_page, u64 *start,
			     u64 *end);
528
#endif
529
struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
530
					       u64 start);
531

532
#endif