You need to sign in or sign up before continuing.
ctree.h 66.5 KB
Newer Older
1
/* SPDX-License-Identifier: GPL-2.0 */
C
Chris Mason 已提交
2 3 4 5
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 */

6 7
#ifndef BTRFS_CTREE_H
#define BTRFS_CTREE_H
8

9
#include <linux/mm.h>
10
#include <linux/sched/signal.h>
11
#include <linux/highmem.h>
C
Chris Mason 已提交
12
#include <linux/fs.h>
A
Arne Jansen 已提交
13
#include <linux/rwsem.h>
S
Stefan Behrens 已提交
14
#include <linux/semaphore.h>
15
#include <linux/completion.h>
C
Chris Mason 已提交
16
#include <linux/backing-dev.h>
17
#include <linux/wait.h>
18
#include <linux/slab.h>
19
#include <trace/events/btrfs.h>
20
#include <asm/unaligned.h>
21
#include <linux/pagemap.h>
22
#include <linux/btrfs.h>
23
#include <linux/btrfs_tree.h>
24
#include <linux/workqueue.h>
25
#include <linux/security.h>
26
#include <linux/sizes.h>
J
Jeff Mahoney 已提交
27
#include <linux/dynamic_debug.h>
28
#include <linux/refcount.h>
29
#include <linux/crc32c.h>
30
#include <linux/iomap.h>
31
#include "extent-io-tree.h"
32
#include "extent_io.h"
33
#include "extent_map.h"
34
#include "async-thread.h"
35
#include "block-rsv.h"
N
Nikolay Borisov 已提交
36
#include "locking.h"
37
#include "misc.h"
C
Chris Mason 已提交
38

39
struct btrfs_trans_handle;
C
Chris Mason 已提交
40
struct btrfs_transaction;
41
struct btrfs_pending_snapshot;
42
struct btrfs_delayed_ref_root;
43
struct btrfs_space_info;
44
struct btrfs_block_group;
45
struct btrfs_ordered_sum;
46
struct btrfs_ref;
47
struct btrfs_bio;
48
struct btrfs_ioctl_encoded_io_args;
49 50 51 52 53
struct btrfs_device;
struct btrfs_fs_devices;
struct btrfs_balance_control;
struct btrfs_delayed_root;
struct reloc_control;
54

55 56
struct btrfs_map_token;

57 58
#define BTRFS_OLDEST_GENERATION	0ULL

59
#define BTRFS_EMPTY_DIR_SIZE 0
C
Chris Mason 已提交
60

61
#define BTRFS_DIRTY_METADATA_THRESH	SZ_32M
62

63
#define BTRFS_MAX_EXTENT_SIZE SZ_128M
64

65 66 67 68 69 70 71
static inline unsigned long btrfs_chunk_item_size(int num_stripes)
{
	BUG_ON(num_stripes == 0);
	return sizeof(struct btrfs_chunk) +
		sizeof(struct btrfs_stripe) * (num_stripes - 1);
}

72 73
#define BTRFS_SUPER_INFO_OFFSET			SZ_64K
#define BTRFS_SUPER_INFO_SIZE			4096
74
static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
75

76 77 78 79 80 81 82
/*
 * The reserved space at the beginning of each device.
 * It covers the primary super block and leaves space for potential use by other
 * tools like bootloaders or to lower potential damage of accidental overwrite.
 */
#define BTRFS_DEVICE_RANGE_RESERVED			(SZ_1M)

83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
/* Read ahead values for struct btrfs_path.reada */
enum {
	READA_NONE,
	READA_BACK,
	READA_FORWARD,
	/*
	 * Similar to READA_FORWARD but unlike it:
	 *
	 * 1) It will trigger readahead even for leaves that are not close to
	 *    each other on disk;
	 * 2) It also triggers readahead for nodes;
	 * 3) During a search, even when a node or leaf is already in memory, it
	 *    will still trigger readahead for other nodes and leaves that follow
	 *    it.
	 *
	 * This is meant to be used only when we know we are iterating over the
	 * entire tree or a very large part of it.
	 */
	READA_FORWARD_ALWAYS,
};

104
/*
C
Chris Mason 已提交
105 106
 * btrfs_paths remember the path taken from the root down to the leaf.
 * level 0 is always the leaf, and nodes[1...BTRFS_MAX_LEVEL] will point
107 108 109 110 111
 * to any other levels that are present.
 *
 * The slots array records the index of the item or block pointer
 * used while walking the tree.
 */
C
Chris Mason 已提交
112
struct btrfs_path {
113
	struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
C
Chris Mason 已提交
114
	int slots[BTRFS_MAX_LEVEL];
115
	/* if there is real range locking, this locks field will change */
116
	u8 locks[BTRFS_MAX_LEVEL];
117
	u8 reada;
118
	/* keep some upper locks as we walk down */
119
	u8 lowest_level;
120 121 122 123 124

	/*
	 * set by btrfs_split_item, tells search_slot to keep all locks
	 * and to force calls to keep space in the nodes
	 */
125 126 127
	unsigned int search_for_split:1;
	unsigned int keep_locks:1;
	unsigned int skip_locking:1;
128
	unsigned int search_commit_root:1;
129
	unsigned int need_commit_sem:1;
130
	unsigned int skip_release_on_error:1;
131 132 133 134 135 136
	/*
	 * Indicate that new item (btrfs_search_slot) is extending already
	 * existing item and ins_len contains only the data size and not item
	 * header (ie. sizeof(struct btrfs_item) is not included).
	 */
	unsigned int search_for_extension:1;
137 138
	/* Stop search if any locks need to be taken (for read) */
	unsigned int nowait:1;
139
};
140

141 142
struct btrfs_dev_replace {
	u64 replace_state;	/* see #define above */
143 144
	time64_t time_started;	/* seconds since 1-Jan-1970 */
	time64_t time_stopped;	/* seconds since 1-Jan-1970 */
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
	atomic64_t num_write_errors;
	atomic64_t num_uncorrectable_read_errors;

	u64 cursor_left;
	u64 committed_cursor_left;
	u64 cursor_left_last_write_of_item;
	u64 cursor_right;

	u64 cont_reading_from_srcdev_mode;	/* see #define above */

	int is_valid;
	int item_needs_writeback;
	struct btrfs_device *srcdev;
	struct btrfs_device *tgtdev;

	struct mutex lock_finishing_cancel_unmount;
161
	struct rw_semaphore rwsem;
162 163

	struct btrfs_scrub_progress scrub_progress;
164 165 166

	struct percpu_counter bio_counter;
	wait_queue_head_t replace_wait;
167 168
};

169 170
/*
 * free clusters are used to claim free space in relatively large chunks,
171 172
 * allowing us to do less seeky writes. They are used for all metadata
 * allocations. In ssd_spread mode they are also used for data allocations.
173 174 175 176 177 178 179 180 181 182 183 184
 */
struct btrfs_free_cluster {
	spinlock_t lock;
	spinlock_t refill_lock;
	struct rb_root root;

	/* largest extent in this cluster */
	u64 max_size;

	/* first extent starting offset */
	u64 window_start;

185 186 187
	/* We did a full search and couldn't create a cluster */
	bool fragmented;

188
	struct btrfs_block_group *block_group;
189 190 191 192 193 194
	/*
	 * when a cluster is allocated from a block group, we put the
	 * cluster onto a list in the block group so that it can
	 * be freed before the block group is freed.
	 */
	struct list_head block_group_list;
195 196
};

197 198 199
/* Discard control. */
/*
 * Async discard uses multiple lists to differentiate the discard filter
200 201 202 203
 * parameters.  Index 0 is for completely free block groups where we need to
 * ensure the entire block group is trimmed without being lossy.  Indices
 * afterwards represent monotonically decreasing discard filter sizes to
 * prioritize what should be discarded next.
204
 */
D
Dennis Zhou 已提交
205
#define BTRFS_NR_DISCARD_LISTS		3
206 207
#define BTRFS_DISCARD_INDEX_UNUSED	0
#define BTRFS_DISCARD_INDEX_START	1
208 209 210 211 212 213 214

struct btrfs_discard_ctl {
	struct workqueue_struct *discard_workers;
	struct delayed_work work;
	spinlock_t lock;
	struct btrfs_block_group *block_group;
	struct list_head discard_list[BTRFS_NR_DISCARD_LISTS];
215
	u64 prev_discard;
216
	u64 prev_discard_time;
217
	atomic_t discardable_extents;
218
	atomic64_t discardable_bytes;
219
	u64 max_discard_size;
220
	u64 delay_ms;
221
	u32 iops_limit;
222
	u32 kbps_limit;
223 224 225
	u64 discard_extent_bytes;
	u64 discard_bitmap_bytes;
	atomic64_t discard_bytes_saved;
226 227
};

228 229 230 231 232
/*
 * Exclusive operations (device replace, resize, device add/remove, balance)
 */
enum btrfs_exclusive_operation {
	BTRFS_EXCLOP_NONE,
233
	BTRFS_EXCLOP_BALANCE_PAUSED,
234 235 236 237 238 239 240 241
	BTRFS_EXCLOP_BALANCE,
	BTRFS_EXCLOP_DEV_ADD,
	BTRFS_EXCLOP_DEV_REMOVE,
	BTRFS_EXCLOP_DEV_REPLACE,
	BTRFS_EXCLOP_RESIZE,
	BTRFS_EXCLOP_SWAP_ACTIVATE,
};

242 243 244 245 246 247 248 249 250 251 252 253
/* Store data about transaction commits, exported via sysfs. */
struct btrfs_commit_stats {
	/* Total number of commits */
	u64 commit_count;
	/* The maximum commit duration so far in ns */
	u64 max_commit_dur;
	/* The last commit duration in ns */
	u64 last_commit_dur;
	/* The total commit duration in ns */
	u64 total_commit_dur;
};

254
struct btrfs_fs_info {
255
	u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
256
	unsigned long flags;
257
	struct btrfs_root *tree_root;
258 259
	struct btrfs_root *chunk_root;
	struct btrfs_root *dev_root;
260
	struct btrfs_root *fs_root;
261
	struct btrfs_root *quota_root;
262
	struct btrfs_root *uuid_root;
263
	struct btrfs_root *data_reloc_root;
264
	struct btrfs_root *block_group_root;
265 266 267

	/* the log root tree is a directory of all the other log roots */
	struct btrfs_root *log_root_tree;
268

269 270 271 272
	/* The tree that holds the global roots (csum, extent, etc) */
	rwlock_t global_root_lock;
	struct rb_root global_root_tree;

273 274
	spinlock_t fs_roots_radix_lock;
	struct radix_tree_root fs_roots_radix;
275

J
Josef Bacik 已提交
276
	/* block group cache stuff */
277
	rwlock_t block_group_cache_lock;
278
	struct rb_root_cached block_group_cache_tree;
J
Josef Bacik 已提交
279

280
	/* keep track of unallocated space */
281
	atomic64_t free_chunk_space;
282

283 284
	/* Track ranges which are used by log trees blocks/logged data extents */
	struct extent_io_tree excluded_extents;
285

286
	/* logical->physical extent mapping */
287
	struct extent_map_tree mapping_tree;
288

289 290 291 292
	/*
	 * block reservation for extent, checksum, root tree and
	 * delayed dir index item
	 */
293 294 295 296 297
	struct btrfs_block_rsv global_block_rsv;
	/* block reservation for metadata operations */
	struct btrfs_block_rsv trans_block_rsv;
	/* block reservation for chunk tree */
	struct btrfs_block_rsv chunk_block_rsv;
298 299
	/* block reservation for delayed operations */
	struct btrfs_block_rsv delayed_block_rsv;
J
Josef Bacik 已提交
300 301
	/* block reservation for delayed refs */
	struct btrfs_block_rsv delayed_refs_rsv;
302 303 304

	struct btrfs_block_rsv empty_block_rsv;

305
	u64 generation;
306
	u64 last_trans_committed;
307 308 309 310 311 312
	/*
	 * Generation of the last transaction used for block group relocation
	 * since the filesystem was last mounted (or 0 if none happened yet).
	 * Must be written and read while holding btrfs_fs_info::commit_root_sem.
	 */
	u64 last_reloc_trans;
313
	u64 avg_delayed_ref_runtime;
314 315 316 317 318 319

	/*
	 * this is updated to the current trans every time a full commit
	 * is required instead of the faster short fsync log commits
	 */
	u64 last_trans_log_full_commit;
320
	unsigned long mount_opt;
321

322
	unsigned long compress_type:4;
323
	unsigned int compress_level;
324
	u32 commit_interval;
325 326 327 328 329 330
	/*
	 * It is a suggestive number, the read side is safe even it gets a
	 * wrong number because we will write out the data into a regular
	 * extent. The write side(mount/remount) is under ->s_umount lock,
	 * so it is also safe.
	 */
331
	u64 max_inline;
332

C
Chris Mason 已提交
333
	struct btrfs_transaction *running_transaction;
334
	wait_queue_head_t transaction_throttle;
335
	wait_queue_head_t transaction_wait;
S
Sage Weil 已提交
336
	wait_queue_head_t transaction_blocked_wait;
337
	wait_queue_head_t async_submit_wait;
338

339 340 341 342 343 344 345 346 347 348 349
	/*
	 * Used to protect the incompat_flags, compat_flags, compat_ro_flags
	 * when they are updated.
	 *
	 * Because we do not clear the flags for ever, so we needn't use
	 * the lock on the read side.
	 *
	 * We also needn't use the lock when we mount the fs, because
	 * there is no other task which will update the flag.
	 */
	spinlock_t super_lock;
350 351
	struct btrfs_super_block *super_copy;
	struct btrfs_super_block *super_for_commit;
C
Chris Mason 已提交
352
	struct super_block *sb;
353
	struct inode *btree_inode;
354
	struct mutex tree_log_mutex;
355 356
	struct mutex transaction_kthread_mutex;
	struct mutex cleaner_mutex;
357
	struct mutex chunk_mutex;
D
David Woodhouse 已提交
358

359 360 361 362 363 364
	/*
	 * this is taken to make sure we don't set block groups ro after
	 * the free space cache has been allocated on them
	 */
	struct mutex ro_block_group_mutex;

D
David Woodhouse 已提交
365 366 367 368 369 370
	/* this is used during read/modify/write to make sure
	 * no two ios are trying to mod the same stripe at the same
	 * time
	 */
	struct btrfs_stripe_hash_table *stripe_hash_table;

371 372 373 374 375 376 377 378
	/*
	 * this protects the ordered operations list only while we are
	 * processing all of the entries on it.  This way we make
	 * sure the commit code doesn't find the list temporarily empty
	 * because another function happens to be doing non-waiting preflush
	 * before jumping into the main commit.
	 */
	struct mutex ordered_operations_mutex;
379

380
	struct rw_semaphore commit_root_sem;
381

382
	struct rw_semaphore cleanup_work_sem;
383

384
	struct rw_semaphore subvol_sem;
385

J
Josef Bacik 已提交
386
	spinlock_t trans_lock;
C
Chris Mason 已提交
387 388 389 390 391 392
	/*
	 * the reloc mutex goes with the trans lock, it is taken
	 * during commit to protect us from the relocation code
	 */
	struct mutex reloc_mutex;

C
Chris Mason 已提交
393
	struct list_head trans_list;
394
	struct list_head dead_roots;
395
	struct list_head caching_block_groups;
396

Y
Yan, Zheng 已提交
397 398
	spinlock_t delayed_iput_lock;
	struct list_head delayed_iputs;
399 400
	atomic_t nr_delayed_iputs;
	wait_queue_head_t delayed_iputs_wait;
Y
Yan, Zheng 已提交
401

402
	atomic64_t tree_mod_seq;
J
Jan Schmidt 已提交
403

404
	/* this protects tree_mod_log and tree_mod_seq_list */
J
Jan Schmidt 已提交
405 406
	rwlock_t tree_mod_log_lock;
	struct rb_root tree_mod_log;
407
	struct list_head tree_mod_seq_list;
J
Jan Schmidt 已提交
408

409
	atomic_t async_delalloc_pages;
410

411
	/*
412
	 * this is used to protect the following list -- ordered_roots.
413
	 */
414
	spinlock_t ordered_root_lock;
415 416

	/*
417 418 419
	 * all fs/file tree roots in which there are data=ordered extents
	 * pending writeback are added into this list.
	 *
420 421 422
	 * these can span multiple transactions and basically include
	 * every dirty data page that isn't from nodatacow
	 */
423
	struct list_head ordered_roots;
424

425
	struct mutex delalloc_root_mutex;
426 427 428
	spinlock_t delalloc_root_lock;
	/* all fs/file tree roots that have delalloc inodes. */
	struct list_head delalloc_roots;
429

430 431 432 433 434 435
	/*
	 * there is a pool of worker threads for checksumming during writes
	 * and a pool for checksumming after reads.  This is because readers
	 * can run with FS locks held, and the writers may be waiting for
	 * those locks.  We don't want ordering in the pending list to cause
	 * deadlocks, and so the two are serviced separately.
436 437 438
	 *
	 * A third pool does submit_bio to avoid deadlocking with the other
	 * two
439
	 */
440
	struct btrfs_workqueue *workers;
441
	struct btrfs_workqueue *hipri_workers;
442 443
	struct btrfs_workqueue *delalloc_workers;
	struct btrfs_workqueue *flush_workers;
444 445
	struct workqueue_struct *endio_workers;
	struct workqueue_struct *endio_meta_workers;
446
	struct workqueue_struct *endio_raid56_workers;
447
	struct workqueue_struct *rmw_workers;
448
	struct workqueue_struct *compressed_write_workers;
449 450 451
	struct btrfs_workqueue *endio_write_workers;
	struct btrfs_workqueue *endio_freespace_worker;
	struct btrfs_workqueue *caching_workers;
452

453 454 455 456 457
	/*
	 * fixup workers take dirty pages that didn't properly go through
	 * the cow mechanism and make them safe to write.  It happens
	 * for the sys_munmap function call path
	 */
458 459
	struct btrfs_workqueue *fixup_workers;
	struct btrfs_workqueue *delayed_workers;
C
Chris Mason 已提交
460

461 462
	struct task_struct *transaction_kthread;
	struct task_struct *cleaner_kthread;
463
	u32 thread_pool_size;
464

465
	struct kobject *space_info_kobj;
466
	struct kobject *qgroups_kobj;
467
	struct kobject *discard_kobj;
468

469 470
	/* used to keep from writing metadata until there is a nice batch */
	struct percpu_counter dirty_metadata_bytes;
471
	struct percpu_counter delalloc_bytes;
472
	struct percpu_counter ordered_bytes;
473
	s32 dirty_metadata_batch;
474 475
	s32 delalloc_batch;

476 477
	struct list_head dirty_cowonly_roots;

478
	struct btrfs_fs_devices *fs_devices;
479 480

	/*
481 482 483
	 * The space_info list is effectively read only after initial
	 * setup.  It is populated at mount time and cleaned up after
	 * all block groups are removed.  RCU is used to protect it.
484
	 */
485
	struct list_head space_info;
486

487 488
	struct btrfs_space_info *data_sinfo;

489 490
	struct reloc_control *reloc_ctl;

491
	/* data_alloc_cluster is only used in ssd_spread mode */
492 493 494 495
	struct btrfs_free_cluster data_alloc_cluster;

	/* all metadata allocations go through this cluster */
	struct btrfs_free_cluster meta_alloc_cluster;
C
Chris Mason 已提交
496

C
Chris Mason 已提交
497 498 499 500 501
	/* auto defrag inodes go here */
	spinlock_t defrag_inodes_lock;
	struct rb_root defrag_inodes;
	atomic_t defrag_running;

502 503
	/* Used to protect avail_{data, metadata, system}_alloc_bits */
	seqlock_t profiles_lock;
504 505 506 507 508
	/*
	 * these three are in extended format (availability of single
	 * chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other
	 * types are denoted by corresponding BTRFS_BLOCK_GROUP_* bits)
	 */
C
Chris Mason 已提交
509 510 511
	u64 avail_data_alloc_bits;
	u64 avail_metadata_alloc_bits;
	u64 avail_system_alloc_bits;
512

513 514 515
	/* restriper state */
	spinlock_t balance_lock;
	struct mutex balance_mutex;
516
	atomic_t balance_pause_req;
517
	atomic_t balance_cancel_req;
518
	struct btrfs_balance_control *balance_ctl;
519
	wait_queue_head_t balance_wait_q;
520

521 522 523
	/* Cancellation requests for chunk relocation */
	atomic_t reloc_cancel_req;

524 525
	u32 data_chunk_allocations;
	u32 metadata_ratio;
526

527
	void *bdev_holder;
L
liubo 已提交
528

A
Arne Jansen 已提交
529 530 531 532 533 534 535
	/* private scrub information */
	struct mutex scrub_lock;
	atomic_t scrubs_running;
	atomic_t scrub_pause_req;
	atomic_t scrubs_paused;
	atomic_t scrub_cancel_req;
	wait_queue_head_t scrub_pause_wait;
536 537 538 539
	/*
	 * The worker pointers are NULL iff the refcount is 0, ie. scrub is not
	 * running.
	 */
540
	refcount_t scrub_workers_refcnt;
541 542 543
	struct workqueue_struct *scrub_workers;
	struct workqueue_struct *scrub_wr_completion_workers;
	struct workqueue_struct *scrub_parity_workers;
544
	struct btrfs_subpage_info *subpage_info;
A
Arne Jansen 已提交
545

546 547
	struct btrfs_discard_ctl discard_ctl;

548 549 550
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
	u32 check_integrity_print_mask;
#endif
551 552 553 554 555 556 557
	/* is qgroup tracking in a consistent state? */
	u64 qgroup_flags;

	/* holds configuration and tracking. Protected by qgroup_lock */
	struct rb_root qgroup_tree;
	spinlock_t qgroup_lock;

558 559 560 561 562 563
	/*
	 * used to avoid frequently calling ulist_alloc()/ulist_free()
	 * when doing qgroup accounting, it must be protected by qgroup_lock.
	 */
	struct ulist *qgroup_ulist;

564 565 566 567
	/*
	 * Protect user change for quota operations. If a transaction is needed,
	 * it must be started before locking this lock.
	 */
568 569
	struct mutex qgroup_ioctl_lock;

570 571 572
	/* list of dirty qgroups to be written at next commit */
	struct list_head dirty_qgroups;

573
	/* used by qgroup for an efficient tree traversal */
574
	u64 qgroup_seq;
575

J
Jan Schmidt 已提交
576 577 578
	/* qgroup rescan items */
	struct mutex qgroup_rescan_lock; /* protects the progress item */
	struct btrfs_key qgroup_rescan_progress;
579
	struct btrfs_workqueue *qgroup_rescan_workers;
580
	struct completion qgroup_rescan_completion;
581
	struct btrfs_work qgroup_rescan_work;
582
	bool qgroup_rescan_running;	/* protected by qgroup_rescan_lock */
583
	u8 qgroup_drop_subtree_thres;
J
Jan Schmidt 已提交
584

L
liubo 已提交
585
	/* filesystem state */
586
	unsigned long fs_state;
587 588

	struct btrfs_delayed_root *delayed_root;
C
Chris Mason 已提交
589

590
	/* Extent buffer radix tree */
591
	spinlock_t buffer_lock;
592
	/* Entries are eb->start / sectorsize */
593
	struct radix_tree_root buffer_radix;
594

C
Chris Mason 已提交
595 596
	/* next backup root to be overwritten */
	int backup_root_index;
597

598 599
	/* device replace state */
	struct btrfs_dev_replace dev_replace;
600

S
Stefan Behrens 已提交
601
	struct semaphore uuid_tree_rescan_sem;
602 603 604

	/* Used to reclaim the metadata space in the background. */
	struct work_struct async_reclaim_work;
605
	struct work_struct async_data_reclaim_work;
606
	struct work_struct preempt_reclaim_work;
607

608 609 610 611 612
	/* Reclaim partially filled block groups in the background */
	struct work_struct reclaim_bgs_work;
	struct list_head reclaim_bgs;
	int bg_reclaim_threshold;

613 614
	spinlock_t unused_bgs_lock;
	struct list_head unused_bgs;
615
	struct mutex unused_bg_unpin_mutex;
616 617
	/* Protect block groups that are going to be deleted */
	struct mutex reclaim_bgs_lock;
618

619 620 621
	/* Cached block sizes */
	u32 nodesize;
	u32 sectorsize;
622 623
	/* ilog2 of sectorsize, use to avoid 64bit division */
	u32 sectorsize_bits;
624
	u32 csum_size;
625
	u32 csums_per_leaf;
626
	u32 stripesize;
J
Josef Bacik 已提交
627

628 629 630 631 632 633
	/*
	 * Maximum size of an extent. BTRFS_MAX_EXTENT_SIZE on regular
	 * filesystem, on zoned it depends on the device constraints.
	 */
	u64 max_extent_size;

634 635 636 637
	/* Block groups and devices containing active swapfiles. */
	spinlock_t swapfile_pins_lock;
	struct rb_root swapfile_pins;

638 639
	struct crypto_shash *csum_shash;

640 641
	/* Type of exclusive operation running, protected by super_lock */
	enum btrfs_exclusive_operation exclusive_operation;
642

N
Naohiro Aota 已提交
643 644 645 646
	/*
	 * Zone size > 0 when in ZONED mode, otherwise it's used for a check
	 * if the mode is enabled
	 */
647
	u64 zone_size;
N
Naohiro Aota 已提交
648

649 650
	/* Max size to emit ZONE_APPEND write command */
	u64 max_zone_append_size;
651
	struct mutex zoned_meta_io_lock;
652 653
	spinlock_t treelog_bg_lock;
	u64 treelog_bg;
654

655 656 657 658 659 660
	/*
	 * Start of the dedicated data relocation block group, protected by
	 * relocation_bg_lock.
	 */
	spinlock_t relocation_bg_lock;
	u64 data_reloc_bg;
661
	struct mutex zoned_data_reloc_io_lock;
662

663 664
	u64 nr_global_roots;

665 666 667
	spinlock_t zone_active_bgs_lock;
	struct list_head zone_active_bgs;

668 669 670
	/* Updates are not protected by any lock */
	struct btrfs_commit_stats commit_stats;

671 672 673 674 675 676 677
	/*
	 * Last generation where we dropped a non-relocation root.
	 * Use btrfs_set_last_root_drop_gen() and btrfs_get_last_root_drop_gen()
	 * to change it and to read it, respectively.
	 */
	u64 last_root_drop_gen;

678 679 680 681 682
	/*
	 * Annotations for transaction events (structures are empty when
	 * compiled without lockdep).
	 */
	struct lockdep_map btrfs_trans_num_writers_map;
683
	struct lockdep_map btrfs_trans_num_extwriters_map;
684
	struct lockdep_map btrfs_state_change_map[4];
685
	struct lockdep_map btrfs_trans_pending_ordered_map;
686
	struct lockdep_map btrfs_ordered_extent_map;
687

J
Josef Bacik 已提交
688 689 690 691
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
	spinlock_t ref_verify_lock;
	struct rb_root block_tree;
#endif
692 693 694

#ifdef CONFIG_BTRFS_DEBUG
	struct kobject *debug_kobj;
J
Josef Bacik 已提交
695
	struct list_head allocated_roots;
696 697 698

	spinlock_t eb_leak_lock;
	struct list_head allocated_ebs;
699
#endif
700
};
701

702 703 704 705 706 707 708 709 710 711 712
static inline void btrfs_set_last_root_drop_gen(struct btrfs_fs_info *fs_info,
						u64 gen)
{
	WRITE_ONCE(fs_info->last_root_drop_gen, gen);
}

static inline u64 btrfs_get_last_root_drop_gen(const struct btrfs_fs_info *fs_info)
{
	return READ_ONCE(fs_info->last_root_drop_gen);
}

713 714 715 716 717
static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
{
	return sb->s_fs_info;
}

718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779
/*
 * Take the number of bytes to be checksummed and figure out how many leaves
 * it would require to store the csums for that many bytes.
 */
static inline u64 btrfs_csum_bytes_to_leaves(
			const struct btrfs_fs_info *fs_info, u64 csum_bytes)
{
	const u64 num_csums = csum_bytes >> fs_info->sectorsize_bits;

	return DIV_ROUND_UP_ULL(num_csums, fs_info->csums_per_leaf);
}

/*
 * Use this if we would be adding new items, as we could split nodes as we cow
 * down the tree.
 */
static inline u64 btrfs_calc_insert_metadata_size(struct btrfs_fs_info *fs_info,
						  unsigned num_items)
{
	return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
}

/*
 * Doing a truncate or a modification won't result in new nodes or leaves, just
 * what we need for COW.
 */
static inline u64 btrfs_calc_metadata_size(struct btrfs_fs_info *fs_info,
						 unsigned num_items)
{
	return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items;
}

#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \
					sizeof(struct btrfs_item))

static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
{
	return fs_info->zone_size > 0;
}

/*
 * Count how many fs_info->max_extent_size cover the @size
 */
static inline u32 count_max_extents(struct btrfs_fs_info *fs_info, u64 size)
{
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
	if (!fs_info)
		return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
#endif

	return div_u64(size + fs_info->max_extent_size - 1, fs_info->max_extent_size);
}

bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
			enum btrfs_exclusive_operation type);
bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
				 enum btrfs_exclusive_operation type);
void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info);
void btrfs_exclop_finish(struct btrfs_fs_info *fs_info);
void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
			  enum btrfs_exclusive_operation op);

780 781 782
/*
 * The state of btrfs root
 */
D
David Sterba 已提交
783 784 785 786 787 788 789 790
enum {
	/*
	 * btrfs_record_root_in_trans is a multi-step process, and it can race
	 * with the balancing code.   But the race is very small, and only the
	 * first time the root is added to each transaction.  So IN_TRANS_SETUP
	 * is used to tell us when more checks are required
	 */
	BTRFS_ROOT_IN_TRANS_SETUP,
791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812

	/*
	 * Set if tree blocks of this root can be shared by other roots.
	 * Only subvolume trees and their reloc trees have this bit set.
	 * Conflicts with TRACK_DIRTY bit.
	 *
	 * This affects two things:
	 *
	 * - How balance works
	 *   For shareable roots, we need to use reloc tree and do path
	 *   replacement for balance, and need various pre/post hooks for
	 *   snapshot creation to handle them.
	 *
	 *   While for non-shareable trees, we just simply do a tree search
	 *   with COW.
	 *
	 * - How dirty roots are tracked
	 *   For shareable roots, btrfs_record_root_in_trans() is needed to
	 *   track them, while non-subvolume roots have TRACK_DIRTY bit, they
	 *   don't need to set this manually.
	 */
	BTRFS_ROOT_SHAREABLE,
D
David Sterba 已提交
813
	BTRFS_ROOT_TRACK_DIRTY,
814
	BTRFS_ROOT_IN_RADIX,
D
David Sterba 已提交
815 816 817 818 819
	BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
	BTRFS_ROOT_DEFRAG_RUNNING,
	BTRFS_ROOT_FORCE_COW,
	BTRFS_ROOT_MULTI_LOG_TASKS,
	BTRFS_ROOT_DIRTY,
820
	BTRFS_ROOT_DELETING,
821 822 823 824 825 826 827

	/*
	 * Reloc tree is orphan, only kept here for qgroup delayed subtree scan
	 *
	 * Set for the subvolume tree owning the reloc tree.
	 */
	BTRFS_ROOT_DEAD_RELOC_TREE,
828 829
	/* Mark dead root stored on device whose cleanup needs to be resumed */
	BTRFS_ROOT_DEAD_TREE,
830
	/* The root has a log tree. Used for subvolume roots and the tree root. */
831
	BTRFS_ROOT_HAS_LOG_TREE,
832 833
	/* Qgroup flushing is in progress */
	BTRFS_ROOT_QGROUP_FLUSHING,
834 835
	/* We started the orphan cleanup for this root. */
	BTRFS_ROOT_ORPHAN_CLEANUP,
836 837
	/* This root has a drop operation that was started previously. */
	BTRFS_ROOT_UNFINISHED_DROP,
838 839
	/* This reloc root needs to have its buffers lockdep class reset. */
	BTRFS_ROOT_RESET_LOCKDEP_CLASS,
D
David Sterba 已提交
840
};
841

842 843 844 845 846 847 848
enum btrfs_lockdep_trans_states {
	BTRFS_LOCKDEP_TRANS_COMMIT_START,
	BTRFS_LOCKDEP_TRANS_UNBLOCKED,
	BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED,
	BTRFS_LOCKDEP_TRANS_COMPLETED,
};

849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886
/*
 * Lockdep annotation for wait events.
 *
 * @owner:  The struct where the lockdep map is defined
 * @lock:   The lockdep map corresponding to a wait event
 *
 * This macro is used to annotate a wait event. In this case a thread acquires
 * the lockdep map as writer (exclusive lock) because it has to block until all
 * the threads that hold the lock as readers signal the condition for the wait
 * event and release their locks.
 */
#define btrfs_might_wait_for_event(owner, lock)					\
	do {									\
		rwsem_acquire(&owner->lock##_map, 0, 0, _THIS_IP_);		\
		rwsem_release(&owner->lock##_map, _THIS_IP_);			\
	} while (0)

/*
 * Protection for the resource/condition of a wait event.
 *
 * @owner:  The struct where the lockdep map is defined
 * @lock:   The lockdep map corresponding to a wait event
 *
 * Many threads can modify the condition for the wait event at the same time
 * and signal the threads that block on the wait event. The threads that modify
 * the condition and do the signaling acquire the lock as readers (shared
 * lock).
 */
#define btrfs_lockdep_acquire(owner, lock)					\
	rwsem_acquire_read(&owner->lock##_map, 0, 0, _THIS_IP_)

/*
 * Used after signaling the condition for a wait event to release the lockdep
 * map held by a reader thread.
 */
#define btrfs_lockdep_release(owner, lock)					\
	rwsem_release(&owner->lock##_map, _THIS_IP_)

887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902
/*
 * Macros for the transaction states wait events, similar to the generic wait
 * event macros.
 */
#define btrfs_might_wait_for_state(owner, i)					\
	do {									\
		rwsem_acquire(&owner->btrfs_state_change_map[i], 0, 0, _THIS_IP_); \
		rwsem_release(&owner->btrfs_state_change_map[i], _THIS_IP_);	\
	} while (0)

#define btrfs_trans_state_lockdep_acquire(owner, i)				\
	rwsem_acquire_read(&owner->btrfs_state_change_map[i], 0, 0, _THIS_IP_)

#define btrfs_trans_state_lockdep_release(owner, i)				\
	rwsem_release(&owner->btrfs_state_change_map[i], _THIS_IP_)

903 904 905 906 907 908 909
/* Initialization of the lockdep map */
#define btrfs_lockdep_init_map(owner, lock)					\
	do {									\
		static struct lock_class_key lock##_key;			\
		lockdep_init_map(&owner->lock##_map, #lock, &lock##_key, 0);	\
	} while (0)

910 911 912 913 914 915 916 917
/* Initialization of the transaction states lockdep maps. */
#define btrfs_state_lockdep_init_map(owner, lock, state)			\
	do {									\
		static struct lock_class_key lock##_key;			\
		lockdep_init_map(&owner->btrfs_state_change_map[state], #lock,	\
				 &lock##_key, 0);				\
	} while (0)

918 919 920 921 922 923 924 925 926 927 928
/*
 * Record swapped tree blocks of a subvolume tree for delayed subtree trace
 * code. For detail check comment in fs/btrfs/qgroup.c.
 */
struct btrfs_qgroup_swapped_blocks {
	spinlock_t lock;
	/* RM_EMPTY_ROOT() of above blocks[] */
	bool swapped;
	struct rb_root blocks[BTRFS_MAX_LEVEL];
};

929 930
/*
 * in ram representation of the tree.  extent_root is used for all allocations
931
 * and for the extent tree extent_root root.
932 933
 */
struct btrfs_root {
934 935
	struct rb_node rb_node;

936
	struct extent_buffer *node;
937

938
	struct extent_buffer *commit_root;
939
	struct btrfs_root *log_root;
Z
Zheng Yan 已提交
940
	struct btrfs_root *reloc_root;
Y
Yan Zheng 已提交
941

942
	unsigned long state;
943 944
	struct btrfs_root_item root_item;
	struct btrfs_key root_key;
945
	struct btrfs_fs_info *fs_info;
946 947
	struct extent_io_tree dirty_log_pages;

948
	struct mutex objectid_mutex;
Y
Yan Zheng 已提交
949

950 951 952
	spinlock_t accounting_lock;
	struct btrfs_block_rsv *block_rsv;

953
	struct mutex log_mutex;
Y
Yan Zheng 已提交
954 955
	wait_queue_head_t log_writer_wait;
	wait_queue_head_t log_commit_wait[2];
956
	struct list_head log_ctxs[2];
957
	/* Used only for log trees of subvolumes, not for the log root tree */
Y
Yan Zheng 已提交
958 959
	atomic_t log_writers;
	atomic_t log_commit[2];
960
	/* Used only for log trees of subvolumes, not for the log root tree */
M
Miao Xie 已提交
961
	atomic_t log_batch;
962
	int log_transid;
963 964 965
	/* No matter the commit succeeds or not*/
	int log_transid_committed;
	/* Just be updated when the commit succeeds. */
966
	int last_log_commit;
967
	pid_t log_start_pid;
968

969
	u64 last_trans;
970

971
	u32 type;
972

973
	u64 free_objectid;
C
Chris Mason 已提交
974

975
	struct btrfs_key defrag_progress;
C
Chris Mason 已提交
976
	struct btrfs_key defrag_max;
977

978
	/* The dirty list is only used by non-shareable roots */
979
	struct list_head dirty_list;
980

981 982
	struct list_head root_list;

983 984 985
	spinlock_t log_extents_lock[2];
	struct list_head logged_list[2];

986 987 988 989
	spinlock_t inode_lock;
	/* red-black tree that keeps track of in-memory inodes */
	struct rb_root inode_tree;

990
	/*
991 992
	 * radix tree that keeps track of delayed nodes of every inode,
	 * protected by inode_lock
993
	 */
994
	struct radix_tree_root delayed_nodes_tree;
995 996 997 998
	/*
	 * right now this just gets used so that a root has its own devid
	 * for stat.  It may be used for more later
	 */
999
	dev_t anon_dev;
1000

1001
	spinlock_t root_item_lock;
1002
	refcount_t refs;
1003

1004
	struct mutex delalloc_mutex;
1005 1006 1007 1008 1009 1010 1011 1012 1013
	spinlock_t delalloc_lock;
	/*
	 * all of the inodes that have delalloc bytes.  It is possible for
	 * this list to be empty even when there is still dirty data=ordered
	 * extents waiting to finish IO.
	 */
	struct list_head delalloc_inodes;
	struct list_head delalloc_root;
	u64 nr_delalloc_inodes;
1014 1015

	struct mutex ordered_extent_mutex;
1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029
	/*
	 * this is used by the balancing code to wait for all the pending
	 * ordered extents
	 */
	spinlock_t ordered_extent_lock;

	/*
	 * all of the data=ordered extents pending writeback
	 * these can span multiple transactions and basically include
	 * every dirty data page that isn't from nodatacow
	 */
	struct list_head ordered_extents;
	struct list_head ordered_root;
	u64 nr_ordered_extents;
1030

1031 1032 1033 1034 1035 1036 1037 1038
	/*
	 * Not empty if this subvolume root has gone through tree block swap
	 * (relocation)
	 *
	 * Will be used by reloc_control::dirty_subvol_roots.
	 */
	struct list_head reloc_dirty_list;

1039 1040 1041 1042 1043
	/*
	 * Number of currently running SEND ioctls to prevent
	 * manipulation with the read-only status via SUBVOL_SETFLAGS
	 */
	int send_in_progress;
1044 1045 1046 1047 1048 1049
	/*
	 * Number of currently running deduplication operations that have a
	 * destination inode belonging to this root. Protected by the lock
	 * root_item_lock.
	 */
	int dedupe_in_progress;
1050 1051 1052
	/* For exclusion of snapshot creation and nocow writes */
	struct btrfs_drew_lock snapshot_lock;

1053
	atomic_t snapshot_force_cow;
1054 1055 1056 1057 1058

	/* For qgroup metadata reserved space */
	spinlock_t qgroup_meta_rsv_lock;
	u64 qgroup_meta_rsv_pertrans;
	u64 qgroup_meta_rsv_prealloc;
1059
	wait_queue_head_t qgroup_flush_wait;
1060

1061 1062 1063
	/* Number of active swapfiles */
	atomic_t nr_swapfiles;

1064 1065 1066
	/* Record pairs of swapped blocks for qgroup */
	struct btrfs_qgroup_swapped_blocks swapped_blocks;

1067 1068 1069
	/* Used only by log trees, when logging csum items */
	struct extent_io_tree log_csum_range;

1070 1071 1072
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
	u64 alloc_bytenr;
#endif
J
Josef Bacik 已提交
1073 1074 1075 1076

#ifdef CONFIG_BTRFS_DEBUG
	struct list_head leak_list;
#endif
1077
};
1078

1079 1080 1081 1082 1083
/*
 * Structure that conveys information about an extent that is going to replace
 * all the extents in a file range.
 */
struct btrfs_replace_extent_info {
1084 1085 1086 1087 1088
	u64 disk_offset;
	u64 disk_len;
	u64 data_offset;
	u64 data_len;
	u64 file_offset;
1089
	/* Pointer to a file extent item of type regular or prealloc. */
1090
	char *extent_buf;
1091 1092 1093 1094 1095 1096
	/*
	 * Set to true when attempting to replace a file range with a new extent
	 * described by this structure, set to false when attempting to clone an
	 * existing extent into a file range.
	 */
	bool is_new_extent;
1097 1098
	/* Indicate if we should update the inode's mtime and ctime. */
	bool update_times;
1099 1100 1101 1102 1103 1104 1105 1106 1107 1108
	/* Meaningful only if is_new_extent is true. */
	int qgroup_reserved;
	/*
	 * Meaningful only if is_new_extent is true.
	 * Used to track how many extent items we have already inserted in a
	 * subvolume tree that refer to the extent described by this structure,
	 * so that we know when to create a new delayed ref or update an existing
	 * one.
	 */
	int insertions;
1109 1110
};

1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151
/* Arguments for btrfs_drop_extents() */
struct btrfs_drop_extents_args {
	/* Input parameters */

	/*
	 * If NULL, btrfs_drop_extents() will allocate and free its own path.
	 * If 'replace_extent' is true, this must not be NULL. Also the path
	 * is always released except if 'replace_extent' is true and
	 * btrfs_drop_extents() sets 'extent_inserted' to true, in which case
	 * the path is kept locked.
	 */
	struct btrfs_path *path;
	/* Start offset of the range to drop extents from */
	u64 start;
	/* End (exclusive, last byte + 1) of the range to drop extents from */
	u64 end;
	/* If true drop all the extent maps in the range */
	bool drop_cache;
	/*
	 * If true it means we want to insert a new extent after dropping all
	 * the extents in the range. If this is true, the 'extent_item_size'
	 * parameter must be set as well and the 'extent_inserted' field will
	 * be set to true by btrfs_drop_extents() if it could insert the new
	 * extent.
	 * Note: when this is set to true the path must not be NULL.
	 */
	bool replace_extent;
	/*
	 * Used if 'replace_extent' is true. Size of the file extent item to
	 * insert after dropping all existing extents in the range
	 */
	u32 extent_item_size;

	/* Output parameters */

	/*
	 * Set to the minimum between the input parameter 'end' and the end
	 * (exclusive, last byte + 1) of the last dropped extent. This is always
	 * set even if btrfs_drop_extents() returns an error.
	 */
	u64 drop_end;
1152 1153 1154 1155 1156
	/*
	 * The number of allocated bytes found in the range. This can be smaller
	 * than the range's length when there are holes in the range.
	 */
	u64 bytes_found;
1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167
	/*
	 * Only set if 'replace_extent' is true. Set to true if we were able
	 * to insert a replacement extent after dropping all extents in the
	 * range, otherwise set to false by btrfs_drop_extents().
	 * Also, if btrfs_drop_extents() has set this to true it means it
	 * returned with the path locked, otherwise if it has set this to
	 * false it has returned with the path released.
	 */
	bool extent_inserted;
};

1168 1169 1170 1171
struct btrfs_file_private {
	void *filldir_buf;
};

1172

1173
static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info)
1174
{
1175 1176

	return info->nodesize - sizeof(struct btrfs_header);
1177 1178
}

1179
static inline u32 BTRFS_MAX_ITEM_SIZE(const struct btrfs_fs_info *info)
1180
{
1181
	return BTRFS_LEAF_DATA_SIZE(info) - sizeof(struct btrfs_item);
1182 1183
}

1184
static inline u32 BTRFS_NODEPTRS_PER_BLOCK(const struct btrfs_fs_info *info)
1185
{
1186
	return BTRFS_LEAF_DATA_SIZE(info) / sizeof(struct btrfs_key_ptr);
1187 1188 1189 1190
}

#define BTRFS_FILE_EXTENT_INLINE_DATA_START		\
		(offsetof(struct btrfs_file_extent_item, disk_bytenr))
1191
static inline u32 BTRFS_MAX_INLINE_DATA_SIZE(const struct btrfs_fs_info *info)
1192
{
1193
	return BTRFS_MAX_ITEM_SIZE(info) -
1194 1195 1196
	       BTRFS_FILE_EXTENT_INLINE_DATA_START;
}

1197
static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
1198
{
1199
	return BTRFS_MAX_ITEM_SIZE(info) - sizeof(struct btrfs_dir_item);
1200 1201
}

1202
#define BTRFS_BYTES_TO_BLKS(fs_info, bytes) \
1203
				((bytes) >> (fs_info)->sectorsize_bits)
1204

1205 1206 1207 1208 1209 1210 1211 1212 1213 1214
static inline u32 btrfs_crc32c(u32 crc, const void *address, unsigned length)
{
	return crc32c(crc, address, length);
}

static inline void btrfs_crc32c_final(u32 crc, u8 *result)
{
	put_unaligned_le32(~crc, result);
}

1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228
static inline u64 btrfs_name_hash(const char *name, int len)
{
       return crc32c((u32)~1, name, len);
}

/*
 * Figure the key offset of an extended inode ref
 */
static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name,
                                   int len)
{
       return (u64) crc32c(parent_objectid, name, len);
}

1229 1230
static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
{
1231
	return mapping_gfp_constraint(mapping, ~__GFP_FS);
1232 1233
}

C
Chris Mason 已提交
1234
/* extent-tree.c */
1235

1236
enum btrfs_inline_ref_type {
1237 1238 1239 1240
	BTRFS_REF_TYPE_INVALID,
	BTRFS_REF_TYPE_BLOCK,
	BTRFS_REF_TYPE_DATA,
	BTRFS_REF_TYPE_ANY,
1241 1242 1243 1244 1245
};

int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
				     struct btrfs_extent_inline_ref *iref,
				     enum btrfs_inline_ref_type is_data);
1246
u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset);
1247

1248

1249 1250
int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
			      u64 start, u64 num_bytes);
1251
void btrfs_free_excluded_extents(struct btrfs_block_group *cache);
1252
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1253
			   unsigned long count);
1254 1255 1256
void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
				  struct btrfs_delayed_ref_root *delayed_refs,
				  struct btrfs_delayed_ref_head *head);
1257
int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len);
1258
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
1259
			     struct btrfs_fs_info *fs_info, u64 bytenr,
1260
			     u64 offset, int metadata, u64 *refs, u64 *flags);
1261 1262
int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num,
		     int reserved);
1263
int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
1264
				    u64 bytenr, u64 num_bytes);
1265
int btrfs_exclude_logged_extents(struct extent_buffer *eb);
1266
int btrfs_cross_ref_exist(struct btrfs_root *root,
1267 1268
			  u64 objectid, u64 offset, u64 bytenr, bool strict,
			  struct btrfs_path *path);
1269
struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
1270 1271 1272 1273
					     struct btrfs_root *root,
					     u64 parent, u64 root_objectid,
					     const struct btrfs_disk_key *key,
					     int level, u64 hint,
1274 1275
					     u64 empty_size,
					     enum btrfs_lock_nesting nest);
1276
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
1277
			   u64 root_id,
1278
			   struct extent_buffer *buf,
1279
			   u64 parent, int last_ref);
1280
int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
1281
				     struct btrfs_root *root, u64 owner,
1282 1283
				     u64 offset, u64 ram_bytes,
				     struct btrfs_key *ins);
1284 1285 1286
int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
				   u64 root_objectid, u64 owner, u64 offset,
				   struct btrfs_key *ins);
1287
int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes,
1288
			 u64 min_alloc_size, u64 empty_size, u64 hint_byte,
1289
			 struct btrfs_key *ins, int is_data, int delalloc);
1290
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1291
		  struct extent_buffer *buf, int full_backref);
1292
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1293
		  struct extent_buffer *buf, int full_backref);
1294
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
1295
				struct extent_buffer *eb, u64 flags, int level);
1296
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref);
1297

1298 1299
int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
			       u64 start, u64 len, int delalloc);
1300
int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
1301
			      u64 len);
1302
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
C
Chris Mason 已提交
1303
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1304
			 struct btrfs_ref *generic_ref);
1305

1306
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
M
Miao Xie 已提交
1307

1308 1309
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
				     struct btrfs_block_rsv *rsv,
1310
				     int nitems, bool use_global_rsv);
1311
void btrfs_subvolume_release_metadata(struct btrfs_root *root,
1312
				      struct btrfs_block_rsv *rsv);
1313
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
J
Josef Bacik 已提交
1314

1315
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
1316
				    u64 disk_num_bytes, bool noflush);
1317
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
1318
int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
L
liubo 已提交
1319
				   u64 start, u64 end);
1320
int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
1321
			 u64 num_bytes, u64 *actual_bytes);
1322
int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range);
L
liubo 已提交
1323

1324
int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
1325 1326
int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
					 struct btrfs_fs_info *fs_info);
1327 1328
int btrfs_start_write_no_snapshotting(struct btrfs_root *root);
void btrfs_end_write_no_snapshotting(struct btrfs_root *root);
1329
void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
1330

C
Chris Mason 已提交
1331
/* ctree.c */
1332 1333
int __init btrfs_ctree_init(void);
void __cold btrfs_ctree_exit(void);
1334
int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
1335
		     int *slot);
1336
int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2);
1337 1338 1339
int btrfs_previous_item(struct btrfs_root *root,
			struct btrfs_path *path, u64 min_objectid,
			int type);
1340 1341
int btrfs_previous_extent_item(struct btrfs_root *root,
			struct btrfs_path *path, u64 min_objectid);
1342 1343
void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
			     struct btrfs_path *path,
1344
			     const struct btrfs_key *new_key);
1345
struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
1346
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
1347
			struct btrfs_key *key, int lowest_level,
1348
			u64 min_trans);
1349
int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
1350
			 struct btrfs_path *path,
1351
			 u64 min_trans);
1352 1353 1354
struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
					   int slot);

1355 1356 1357
int btrfs_cow_block(struct btrfs_trans_handle *trans,
		    struct btrfs_root *root, struct extent_buffer *buf,
		    struct extent_buffer *parent, int parent_slot,
1358 1359
		    struct extent_buffer **cow_ret,
		    enum btrfs_lock_nesting nest);
1360 1361 1362 1363
int btrfs_copy_root(struct btrfs_trans_handle *trans,
		      struct btrfs_root *root,
		      struct extent_buffer *buf,
		      struct extent_buffer **cow_ret, u64 new_root_objectid);
1364 1365
int btrfs_block_can_be_shared(struct btrfs_root *root,
			      struct extent_buffer *buf);
1366
void btrfs_extend_item(struct btrfs_path *path, u32 data_size);
1367
void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end);
1368 1369 1370
int btrfs_split_item(struct btrfs_trans_handle *trans,
		     struct btrfs_root *root,
		     struct btrfs_path *path,
1371
		     const struct btrfs_key *new_key,
1372
		     unsigned long split_offset);
Y
Yan, Zheng 已提交
1373 1374 1375
int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
			 struct btrfs_root *root,
			 struct btrfs_path *path,
1376
			 const struct btrfs_key *new_key);
1377 1378
int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
		u64 inum, u64 ioff, u8 key_type, struct btrfs_key *found_key);
1379 1380 1381 1382
int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
		      const struct btrfs_key *key, struct btrfs_path *p,
		      int ins_len, int cow);
int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
J
Jan Schmidt 已提交
1383
			  struct btrfs_path *p, u64 time_seq);
1384
int btrfs_search_slot_for_read(struct btrfs_root *root,
1385 1386 1387
			       const struct btrfs_key *key,
			       struct btrfs_path *p, int find_higher,
			       int return_any);
1388
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1389
		       struct btrfs_root *root, struct extent_buffer *parent,
1390
		       int start_slot, u64 *last_ret,
1391
		       struct btrfs_key *progress);
1392
void btrfs_release_path(struct btrfs_path *p);
C
Chris Mason 已提交
1393 1394
struct btrfs_path *btrfs_alloc_path(void);
void btrfs_free_path(struct btrfs_path *p);
1395

1396 1397 1398 1399 1400 1401 1402 1403 1404
int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
		   struct btrfs_path *path, int slot, int nr);
static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
				 struct btrfs_root *root,
				 struct btrfs_path *path)
{
	return btrfs_del_items(trans, root, path, path->slots[0], 1);
}

1405 1406
/*
 * Describes a batch of items to insert in a btree. This is used by
1407
 * btrfs_insert_empty_items().
1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430
 */
struct btrfs_item_batch {
	/*
	 * Pointer to an array containing the keys of the items to insert (in
	 * sorted order).
	 */
	const struct btrfs_key *keys;
	/* Pointer to an array containing the data size for each item to insert. */
	const u32 *data_sizes;
	/*
	 * The sum of data sizes for all items. The caller can compute this while
	 * setting up the data_sizes array, so it ends up being more efficient
	 * than having btrfs_insert_empty_items() or setup_item_for_insert()
	 * doing it, as it would avoid an extra loop over a potentially large
	 * array, and in the case of setup_item_for_insert(), we would be doing
	 * it while holding a write lock on a leaf and often on upper level nodes
	 * too, unnecessarily increasing the size of a critical section.
	 */
	u32 total_data_size;
	/* Size of the keys and data_sizes arrays (number of items in the batch). */
	int nr;
};

1431 1432 1433 1434
void btrfs_setup_item_for_insert(struct btrfs_root *root,
				 struct btrfs_path *path,
				 const struct btrfs_key *key,
				 u32 data_size);
1435 1436
int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
		      const struct btrfs_key *key, void *data, u32 data_size);
1437 1438 1439
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
			     struct btrfs_root *root,
			     struct btrfs_path *path,
1440
			     const struct btrfs_item_batch *batch);
1441 1442 1443 1444

static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
					  struct btrfs_root *root,
					  struct btrfs_path *path,
1445
					  const struct btrfs_key *key,
1446 1447
					  u32 data_size)
{
1448 1449 1450 1451 1452 1453 1454 1455
	struct btrfs_item_batch batch;

	batch.keys = key;
	batch.data_sizes = &data_size;
	batch.total_data_size = data_size;
	batch.nr = 1;

	return btrfs_insert_empty_items(trans, root, path, &batch);
1456 1457
}

1458
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
J
Jan Schmidt 已提交
1459 1460
int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
			u64 time_seq);
1461 1462 1463 1464

int btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key,
			   struct btrfs_path *path);

1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493
int btrfs_get_next_valid_item(struct btrfs_root *root, struct btrfs_key *key,
			      struct btrfs_path *path);

/*
 * Search in @root for a given @key, and store the slot found in @found_key.
 *
 * @root:	The root node of the tree.
 * @key:	The key we are looking for.
 * @found_key:	Will hold the found item.
 * @path:	Holds the current slot/leaf.
 * @iter_ret:	Contains the value returned from btrfs_search_slot or
 * 		btrfs_get_next_valid_item, whichever was executed last.
 *
 * The @iter_ret is an output variable that will contain the return value of
 * btrfs_search_slot, if it encountered an error, or the value returned from
 * btrfs_get_next_valid_item otherwise. That return value can be 0, if a valid
 * slot was found, 1 if there were no more leaves, and <0 if there was an error.
 *
 * It's recommended to use a separate variable for iter_ret and then use it to
 * set the function return value so there's no confusion of the 0/1/errno
 * values stemming from btrfs_search_slot.
 */
#define btrfs_for_each_slot(root, key, found_key, path, iter_ret)		\
	for (iter_ret = btrfs_search_slot(NULL, (root), (key), (path), 0, 0);	\
		(iter_ret) >= 0 &&						\
		(iter_ret = btrfs_get_next_valid_item((root), (found_key), (path))) == 0; \
		(path)->slots[0]++						\
	)

1494
int btrfs_next_old_item(struct btrfs_root *root, struct btrfs_path *path, u64 time_seq);
1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506

/*
 * Search the tree again to find a leaf with greater keys.
 *
 * Returns 0 if it found something or 1 if there are no greater leaves.
 * Returns < 0 on error.
 */
static inline int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
{
	return btrfs_next_old_leaf(root, path, 0);
}

1507 1508 1509 1510
static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
{
	return btrfs_next_old_item(root, p, 0);
}
1511
int btrfs_leaf_free_space(struct extent_buffer *leaf);
1512 1513
int __must_check btrfs_drop_snapshot(struct btrfs_root *root, int update_ref,
				     int for_reloc);
Y
Yan Zheng 已提交
1514 1515 1516 1517
int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
			struct btrfs_root *root,
			struct extent_buffer *node,
			struct extent_buffer *parent);
1518

C
Chris Mason 已提交
1519
/* root-item.c */
1520 1521 1522
int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
		       u64 ref_id, u64 dirid, u64 sequence, const char *name,
		       int name_len);
1523 1524 1525
int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
		       u64 ref_id, u64 dirid, u64 *sequence, const char *name,
		       int name_len);
1526
int btrfs_del_root(struct btrfs_trans_handle *trans,
1527
		   const struct btrfs_key *key);
1528 1529 1530
int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
		      const struct btrfs_key *key,
		      struct btrfs_root_item *item);
1531 1532 1533 1534
int __must_check btrfs_update_root(struct btrfs_trans_handle *trans,
				   struct btrfs_root *root,
				   struct btrfs_key *key,
				   struct btrfs_root_item *item);
1535
int btrfs_find_root(struct btrfs_root *root, const struct btrfs_key *search_key,
1536 1537
		    struct btrfs_path *path, struct btrfs_root_item *root_item,
		    struct btrfs_key *root_key);
1538
int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info);
1539 1540
void btrfs_set_root_node(struct btrfs_root_item *item,
			 struct extent_buffer *node);
1541
void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
1542 1543
void btrfs_update_root_times(struct btrfs_trans_handle *trans,
			     struct btrfs_root *root);
1544

1545
/* uuid-tree.c */
1546
int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
1547
			u64 subid);
1548
int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
1549
			u64 subid);
1550
int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info);
1551

C
Chris Mason 已提交
1552
/* dir-item.c */
C
Chris Mason 已提交
1553 1554
int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
			  const char *name, int name_len);
1555
int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name,
1556
			  int name_len, struct btrfs_inode *dir,
1557
			  struct btrfs_key *location, u8 type, u64 index);
1558 1559 1560 1561 1562 1563 1564 1565 1566
struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
					     struct btrfs_root *root,
					     struct btrfs_path *path, u64 dir,
					     const char *name, int name_len,
					     int mod);
struct btrfs_dir_item *
btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
			    struct btrfs_root *root,
			    struct btrfs_path *path, u64 dir,
1567
			    u64 index, const char *name, int name_len,
1568
			    int mod);
1569 1570 1571 1572
struct btrfs_dir_item *
btrfs_search_dir_index_item(struct btrfs_root *root,
			    struct btrfs_path *path, u64 dirid,
			    const char *name, int name_len);
1573 1574 1575 1576
int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
			      struct btrfs_root *root,
			      struct btrfs_path *path,
			      struct btrfs_dir_item *di);
J
Josef Bacik 已提交
1577
int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
1578 1579 1580 1581
			    struct btrfs_root *root,
			    struct btrfs_path *path, u64 objectid,
			    const char *name, u16 name_len,
			    const void *data, u16 data_len);
J
Josef Bacik 已提交
1582 1583 1584 1585 1586
struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
					  struct btrfs_root *root,
					  struct btrfs_path *path, u64 dir,
					  const char *name, u16 name_len,
					  int mod);
1587
struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info,
1588 1589 1590
						 struct btrfs_path *path,
						 const char *name,
						 int name_len);
1591 1592 1593 1594 1595 1596

/* orphan.c */
int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
			     struct btrfs_root *root, u64 offset);
int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root, u64 offset);
1597
int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset);
1598

C
Chris Mason 已提交
1599
/* file-item.c */
1600
int btrfs_del_csums(struct btrfs_trans_handle *trans,
1601
		    struct btrfs_root *root, u64 bytenr, u64 len);
1602
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst);
1603 1604 1605
int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans,
			     struct btrfs_root *root, u64 objectid, u64 pos,
			     u64 num_bytes);
C
Chris Mason 已提交
1606 1607 1608
int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
			     struct btrfs_root *root,
			     struct btrfs_path *path, u64 objectid,
1609
			     u64 bytenr, int mod);
1610
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
1611
			   struct btrfs_root *root,
1612
			   struct btrfs_ordered_sum *sums);
1613
blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
1614
				u64 offset, bool one_ordered);
A
Arne Jansen 已提交
1615
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
1616 1617
			     struct list_head *list, int search_commit,
			     bool nowait);
1618
void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
1619 1620 1621 1622
				     const struct btrfs_path *path,
				     struct btrfs_file_extent_item *fi,
				     const bool new_inline,
				     struct extent_map *em);
1623 1624 1625 1626
int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
					u64 len);
int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
				      u64 len);
1627
void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size);
1628
u64 btrfs_file_extent_end(const struct btrfs_path *path);
1629

C
Chris Mason 已提交
1630
/* inode.c */
1631 1632 1633
void btrfs_submit_data_write_bio(struct inode *inode, struct bio *bio, int mirror_num);
void btrfs_submit_data_read_bio(struct inode *inode, struct bio *bio,
			int mirror_num, enum btrfs_compression_type compress_type);
1634 1635
int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
			    u32 pgoff, u8 *csum, const u8 * const csum_expected);
1636 1637
int btrfs_check_data_csum(struct inode *inode, struct btrfs_bio *bbio,
			  u32 bio_offset, struct page *page, u32 pgoff);
1638 1639 1640
unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
				    u32 bio_offset, struct page *page,
				    u64 start, u64 end);
1641 1642
int btrfs_check_data_csum(struct inode *inode, struct btrfs_bio *bbio,
			  u32 bio_offset, struct page *page, u32 pgoff);
1643
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
1644
			      u64 *orig_start, u64 *orig_block_len,
1645
			      u64 *ram_bytes, bool nowait, bool strict);
1646

1647 1648
void __btrfs_del_delalloc_inode(struct btrfs_root *root,
				struct btrfs_inode *inode);
1649
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
1650
int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
1651
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
1652
		       struct btrfs_inode *dir, struct btrfs_inode *inode,
1653 1654
		       const char *name, int name_len);
int btrfs_add_link(struct btrfs_trans_handle *trans,
1655
		   struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
1656
		   const char *name, int name_len, int add_backref, u64 index);
1657
int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry);
1658 1659
int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
			 int front);
1660

1661
int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context);
1662
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
1663
			       bool in_reclaim_context);
1664
int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
1665
			      unsigned int extra_bits,
1666
			      struct extent_state **cached_state);
1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684
struct btrfs_new_inode_args {
	/* Input */
	struct inode *dir;
	struct dentry *dentry;
	struct inode *inode;
	bool orphan;
	bool subvol;

	/*
	 * Output from btrfs_new_inode_prepare(), input to
	 * btrfs_create_new_inode().
	 */
	struct posix_acl *default_acl;
	struct posix_acl *acl;
};
int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args,
			    unsigned int *trans_num_items);
int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
1685
			   struct btrfs_new_inode_args *args);
1686
void btrfs_new_inode_args_destroy(struct btrfs_new_inode_args *args);
1687 1688
struct inode *btrfs_new_subvol_inode(struct user_namespace *mnt_userns,
				     struct inode *dir);
1689
 void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state,
1690
			        u32 bits);
1691
void btrfs_clear_delalloc_extent(struct inode *inode,
1692
				 struct extent_state *state, u32 bits);
1693 1694
void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
				 struct extent_state *other);
1695 1696
void btrfs_split_delalloc_extent(struct inode *inode,
				 struct extent_state *orig, u64 split);
1697
void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end);
1698
vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf);
A
Al Viro 已提交
1699
void btrfs_evict_inode(struct inode *inode);
C
Chris Mason 已提交
1700 1701
struct inode *btrfs_alloc_inode(struct super_block *sb);
void btrfs_destroy_inode(struct inode *inode);
A
Al Viro 已提交
1702
void btrfs_free_inode(struct inode *inode);
1703
int btrfs_drop_inode(struct inode *inode);
1704
int __init btrfs_init_cachep(void);
1705
void __cold btrfs_destroy_cachep(void);
D
David Sterba 已提交
1706
struct inode *btrfs_iget_path(struct super_block *s, u64 ino,
1707
			      struct btrfs_root *root, struct btrfs_path *path);
D
David Sterba 已提交
1708
struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root);
1709
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
1710
				    struct page *page, size_t pg_offset,
1711
				    u64 start, u64 end);
1712
int btrfs_update_inode(struct btrfs_trans_handle *trans,
1713
		       struct btrfs_root *root, struct btrfs_inode *inode);
1714
int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
1715
				struct btrfs_root *root, struct btrfs_inode *inode);
1716 1717
int btrfs_orphan_add(struct btrfs_trans_handle *trans,
		struct btrfs_inode *inode);
1718
int btrfs_orphan_cleanup(struct btrfs_root *root);
1719
int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size);
Y
Yan, Zheng 已提交
1720
void btrfs_add_delayed_iput(struct inode *inode);
1721
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info);
1722
int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info);
1723 1724 1725
int btrfs_prealloc_file_range(struct inode *inode, int mode,
			      u64 start, u64 num_bytes, u64 min_size,
			      loff_t actual_len, u64 *alloc_hint);
1726 1727 1728 1729
int btrfs_prealloc_file_range_trans(struct inode *inode,
				    struct btrfs_trans_handle *trans, int mode,
				    u64 start, u64 num_bytes, u64 min_size,
				    loff_t actual_len, u64 *alloc_hint);
1730
int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
1731 1732
		u64 start, u64 end, int *page_started, unsigned long *nr_written,
		struct writeback_control *wbc);
1733
int btrfs_writepage_cow_fixup(struct page *page);
1734 1735
void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
					  struct page *page, u64 start,
1736
					  u64 end, bool uptodate);
1737 1738 1739 1740 1741 1742
int btrfs_encoded_io_compression_from_extent(struct btrfs_fs_info *fs_info,
					     int compress_type);
int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
					  u64 file_offset, u64 disk_bytenr,
					  u64 disk_io_size,
					  struct page **pages);
1743 1744
ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
			   struct btrfs_ioctl_encoded_io_args *encoded);
1745 1746
ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
			     const struct btrfs_ioctl_encoded_io_args *encoded);
1747

1748 1749 1750 1751
ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter,
		       size_t done_before);
struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
				  size_t done_before);
1752

1753
extern const struct dentry_operations btrfs_dentry_operations;
C
Christoph Hellwig 已提交
1754

1755
/* Inode locking type flags, by default the exclusive lock is taken */
1756 1757 1758 1759 1760
enum btrfs_ilock_type {
	ENUM_BIT(BTRFS_ILOCK_SHARED),
	ENUM_BIT(BTRFS_ILOCK_TRY),
	ENUM_BIT(BTRFS_ILOCK_MMAP),
};
1761 1762 1763

int btrfs_inode_lock(struct inode *inode, unsigned int ilock_flags);
void btrfs_inode_unlock(struct inode *inode, unsigned int ilock_flags);
1764 1765 1766
void btrfs_update_inode_bytes(struct btrfs_inode *inode,
			      const u64 add_bytes,
			      const u64 del_bytes);
1767
void btrfs_assert_inode_range_clean(struct btrfs_inode *inode, u64 start, u64 end);
C
Christoph Hellwig 已提交
1768 1769 1770

/* ioctl.c */
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1771
long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
M
Miklos Szeredi 已提交
1772 1773 1774
int btrfs_fileattr_get(struct dentry *dentry, struct fileattr *fa);
int btrfs_fileattr_set(struct user_namespace *mnt_userns,
		       struct dentry *dentry, struct fileattr *fa);
1775
int btrfs_ioctl_get_supported_features(void __user *arg);
1776
void btrfs_sync_inode_flags_to_i_flags(struct inode *inode);
1777
int __pure btrfs_is_empty_uuid(u8 *uuid);
1778
int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
C
Chris Mason 已提交
1779
		      struct btrfs_ioctl_defrag_range_args *range,
1780
		      u64 newer_than, unsigned long max_to_defrag);
1781 1782 1783
void btrfs_get_block_group_info(struct list_head *groups_list,
				struct btrfs_ioctl_space_info *space);
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
1784 1785
			       struct btrfs_ioctl_balance_args *bargs);

C
Chris Mason 已提交
1786
/* file.c */
1787
int __init btrfs_auto_defrag_init(void);
1788
void __cold btrfs_auto_defrag_exit(void);
C
Chris Mason 已提交
1789
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
1790
			   struct btrfs_inode *inode, u32 extent_thresh);
C
Chris Mason 已提交
1791
int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
1792
void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
1793
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
1794
extern const struct file_operations btrfs_file_operations;
J
Josef Bacik 已提交
1795
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
1796 1797
		       struct btrfs_root *root, struct btrfs_inode *inode,
		       struct btrfs_drop_extents_args *args);
1798 1799 1800
int btrfs_replace_file_extents(struct btrfs_inode *inode,
			   struct btrfs_path *path, const u64 start,
			   const u64 end,
1801
			   struct btrfs_replace_extent_info *extent_info,
1802
			   struct btrfs_trans_handle **trans_out);
Y
Yan Zheng 已提交
1803
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
1804
			      struct btrfs_inode *inode, u64 start, u64 end);
1805 1806
ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
			    const struct btrfs_ioctl_encoded_io_args *encoded);
S
Sage Weil 已提交
1807
int btrfs_release_file(struct inode *inode, struct file *file);
1808
int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
1809
		      size_t num_pages, loff_t pos, size_t write_bytes,
1810
		      struct extent_state **cached, bool noreserve);
1811
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
1812
int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
1813
			   size_t *write_bytes, bool nowait);
1814
void btrfs_check_nocow_unlock(struct btrfs_inode *inode);
1815 1816
bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end,
				  u64 *delalloc_start_ret, u64 *delalloc_end_ret);
S
Sage Weil 已提交
1817

1818 1819
/* tree-defrag.c */
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
1820
			struct btrfs_root *root);
1821

1822
/* super.c */
1823
int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
1824
			unsigned long new_flags);
S
Sage Weil 已提交
1825
int btrfs_sync_fs(struct super_block *sb, int wait);
1826 1827
char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
					  u64 subvol_objectid);
1828

1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841
#if BITS_PER_LONG == 32
#define BTRFS_32BIT_MAX_FILE_SIZE (((u64)ULONG_MAX + 1) << PAGE_SHIFT)
/*
 * The warning threshold is 5/8th of the MAX_LFS_FILESIZE that limits the logical
 * addresses of extents.
 *
 * For 4K page size it's about 10T, for 64K it's 160T.
 */
#define BTRFS_32BIT_EARLY_WARN_THRESHOLD (BTRFS_32BIT_MAX_FILE_SIZE * 5 / 8)
void btrfs_warn_32bit_limit(struct btrfs_fs_info *fs_info);
void btrfs_err_32bit_limit(struct btrfs_fs_info *fs_info);
#endif

1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874
/*
 * Get the correct offset inside the page of extent buffer.
 *
 * @eb:		target extent buffer
 * @start:	offset inside the extent buffer
 *
 * Will handle both sectorsize == PAGE_SIZE and sectorsize < PAGE_SIZE cases.
 */
static inline size_t get_eb_offset_in_page(const struct extent_buffer *eb,
					   unsigned long offset)
{
	/*
	 * For sectorsize == PAGE_SIZE case, eb->start will always be aligned
	 * to PAGE_SIZE, thus adding it won't cause any difference.
	 *
	 * For sectorsize < PAGE_SIZE, we must only read the data that belongs
	 * to the eb, thus we have to take the eb->start into consideration.
	 */
	return offset_in_page(offset + eb->start);
}

static inline unsigned long get_eb_page_index(unsigned long offset)
{
	/*
	 * For sectorsize == PAGE_SIZE case, plain >> PAGE_SHIFT is enough.
	 *
	 * For sectorsize < PAGE_SIZE case, we only support 64K PAGE_SIZE,
	 * and have ensured that all tree blocks are contained in one page,
	 * thus we always get index == 0.
	 */
	return offset >> PAGE_SHIFT;
}

1875 1876 1877 1878 1879 1880 1881 1882 1883 1884
/*
 * Use that for functions that are conditionally exported for sanity tests but
 * otherwise static
 */
#ifndef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
#define EXPORT_FOR_TESTS static
#else
#define EXPORT_FOR_TESTS
#endif

J
Josef Bacik 已提交
1885
/* acl.c */
C
Chris Mason 已提交
1886
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
1887
struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu);
1888 1889
int btrfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
		  struct posix_acl *acl, int type);
1890 1891
int __btrfs_set_acl(struct btrfs_trans_handle *trans, struct inode *inode,
		    struct posix_acl *acl, int type);
1892
#else
1893
#define btrfs_get_acl NULL
1894
#define btrfs_set_acl NULL
1895 1896 1897
static inline int __btrfs_set_acl(struct btrfs_trans_handle *trans,
				  struct inode *inode, struct posix_acl *acl,
				  int type)
1898
{
1899
	return -EOPNOTSUPP;
1900 1901
}
#endif
J
Josef Bacik 已提交
1902

1903
/* relocation.c */
1904
int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start);
1905 1906 1907 1908
int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root);
int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
			    struct btrfs_root *root);
1909
int btrfs_recover_relocation(struct btrfs_fs_info *fs_info);
1910
int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len);
1911 1912 1913
int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
			  struct btrfs_root *root, struct extent_buffer *buf,
			  struct extent_buffer *cow);
1914
void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
1915
			      u64 *bytes_to_reserve);
1916
int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
1917
			      struct btrfs_pending_snapshot *pending);
1918
int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info);
1919 1920
struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info,
				   u64 bytenr);
1921
int btrfs_should_ignore_reloc_root(struct btrfs_root *root);
A
Arne Jansen 已提交
1922 1923

/* scrub.c */
1924 1925
int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
		    u64 end, struct btrfs_scrub_progress *progress,
1926
		    int readonly, int is_dev_replace);
1927 1928
void btrfs_scrub_pause(struct btrfs_fs_info *fs_info);
void btrfs_scrub_continue(struct btrfs_fs_info *fs_info);
1929
int btrfs_scrub_cancel(struct btrfs_fs_info *info);
1930
int btrfs_scrub_cancel_dev(struct btrfs_device *dev);
1931
int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
A
Arne Jansen 已提交
1932
			 struct btrfs_scrub_progress *progress);
1933 1934 1935

/* dev-replace.c */
void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info);
1936 1937 1938 1939 1940 1941
void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount);

static inline void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info)
{
	btrfs_bio_counter_sub(fs_info, 1);
}
A
Arne Jansen 已提交
1942

1943 1944 1945
static inline int is_fstree(u64 rootid)
{
	if (rootid == BTRFS_FS_TREE_OBJECTID ||
1946 1947
	    ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID &&
	      !btrfs_qgroup_level(rootid)))
1948 1949 1950
		return 1;
	return 0;
}
1951 1952 1953 1954 1955 1956

static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info)
{
	return signal_pending(current);
}

B
Boris Burkov 已提交
1957 1958 1959 1960 1961
/* verity.c */
#ifdef CONFIG_FS_VERITY

extern const struct fsverity_operations btrfs_verityops;
int btrfs_drop_verity_items(struct btrfs_inode *inode);
1962
int btrfs_get_verity_descriptor(struct inode *inode, void *buf, size_t buf_size);
B
Boris Burkov 已提交
1963 1964 1965 1966 1967 1968 1969 1970

#else

static inline int btrfs_drop_verity_items(struct btrfs_inode *inode)
{
	return 0;
}

1971 1972 1973 1974 1975 1976
static inline int btrfs_get_verity_descriptor(struct inode *inode, void *buf,
					      size_t buf_size)
{
	return -EPERM;
}

B
Boris Burkov 已提交
1977 1978
#endif

1979 1980 1981
/* Sanity test specific functions */
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
void btrfs_test_destroy_inode(struct inode *inode);
1982
#endif
1983

1984 1985 1986 1987 1988
static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
{
	return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID;
}

1989 1990 1991 1992 1993 1994 1995 1996 1997
/*
 * We use page status Private2 to indicate there is an ordered extent with
 * unfinished IO.
 *
 * Rename the Private2 accessors to Ordered, to improve readability.
 */
#define PageOrdered(page)		PagePrivate2(page)
#define SetPageOrdered(page)		SetPagePrivate2(page)
#define ClearPageOrdered(page)		ClearPagePrivate2(page)
1998 1999 2000
#define folio_test_ordered(folio)	folio_test_private_2(folio)
#define folio_set_ordered(folio)	folio_set_private_2(folio)
#define folio_clear_ordered(folio)	folio_clear_private_2(folio)
2001

2002
#endif