提交 97d0bf96 编写于 作者: L Linus Torvalds

Merge tag 'for-5.5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "User visible changes:
   - new block group profiles: RAID1 with 3- and 4- copies
       - RAID1 in btrfs has always 2 copies, now add support for 3 and 4
       - this is an incompat feature (named RAID1C34)
       - recommended use of RAID1C3 is replacement of RAID6 profile on
         metadata, this brings a more reliable resiliency against 2
         device loss/damage

   - support for new checksums
       - per-filesystem, set at mkfs time
       - fast hash (crc32c successor): xxhash, 64bit digest
       - strong hashes (both 256bit): sha256 (slower, FIPS), blake2b
         (faster)
       - the blake2b module goes via the crypto tree, btrfs.ko has a
         soft dependency

   - speed up lseek, don't take inode locks unnecessarily, this can
     speed up parallel SEEK_CUR/SEEK_SET/SEEK_END by 80%

   - send:
       - allow clone operations within the same file
       - limit maximum number of sent clone references to avoid slow
         backref walking

   - error message improvements: device scan prints process name and PID

  Core changes:
   - cleanups
       - remove unique workqueue helpers, used to provide a way to avoid
         deadlocks in the workqueue code, now done in a simpler way
       - remove lots of indirect function calls in compression code
       - extent IO tree code moved out of extent_io.c
       - cleanup backup superblock handling at mount time
       - transaction life cycle documentation and cleanups
       - locking code cleanups, annotations and documentation
       - add more cold, const, pure function attributes
       - removal of unused or redundant struct members or variables

   - new tree-checker sanity tests
       - try to detect missing INODE_ITEM, cross-reference checks of
         DIR_ITEM, DIR_INDEX, INODE_REF, and XATTR_* items

   - remove own bio scheduling code (used to avoid checksum submissions
     being stuck behind other IO), replaced by cgroup controller-based
     code to allow better control and avoid priority inversions in cases
     where the custom and cgroup scheduling disagreed

  Fixes:
   - avoid getting stuck during cyclic writebacks

   - fix trimming of ranges crossing block group boundaries

   - fix rename exchange on subvolumes, all involved subvolumes need to
     be recorded in the transaction"

* tag 'for-5.5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (137 commits)
  btrfs: drop bdev argument from submit_extent_page
  btrfs: remove extent_map::bdev
  btrfs: drop bio_set_dev where not needed
  btrfs: get bdev directly from fs_devices in submit_extent_page
  btrfs: record all roots for rename exchange on a subvol
  Btrfs: fix block group remaining RO forever after error during device replace
  btrfs: scrub: Don't check free space before marking a block group RO
  btrfs: change btrfs_fs_devices::rotating to bool
  btrfs: change btrfs_fs_devices::seeding to bool
  btrfs: rename btrfs_block_group_cache
  btrfs: block-group: Reuse the item key from caller of read_one_block_group()
  btrfs: block-group: Refactor btrfs_read_block_groups()
  btrfs: document extent buffer locking
  btrfs: access eb::blocking_writers according to ACCESS_ONCE policies
  btrfs: set blocking_writers directly, no increment or decrement
  btrfs: merge blocking_writers branches in btrfs_tree_read_lock
  btrfs: drop incompat bit for raid1c34 after last block group is gone
  btrfs: add incompat for raid1 with 3, 4 copies
  btrfs: add support for 4-copy replication (raid1c4)
  btrfs: add support for 3-copy replication (raid1c3)
  ...
......@@ -5,6 +5,8 @@ config BTRFS_FS
select CRYPTO
select CRYPTO_CRC32C
select LIBCRC32C
select CRYPTO_XXHASH
select CRYPTO_SHA256
select ZLIB_INFLATE
select ZLIB_DEFLATE
select LZO_COMPRESS
......
......@@ -53,24 +53,12 @@ struct btrfs_workqueue {
struct __btrfs_workqueue *high;
};
static void normal_work_helper(struct btrfs_work *work);
#define BTRFS_WORK_HELPER(name) \
noinline_for_stack void btrfs_##name(struct work_struct *arg) \
{ \
struct btrfs_work *work = container_of(arg, struct btrfs_work, \
normal_work); \
normal_work_helper(work); \
}
struct btrfs_fs_info *
btrfs_workqueue_owner(const struct __btrfs_workqueue *wq)
struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct __btrfs_workqueue *wq)
{
return wq->fs_info;
}
struct btrfs_fs_info *
btrfs_work_owner(const struct btrfs_work *work)
struct btrfs_fs_info * __pure btrfs_work_owner(const struct btrfs_work *work)
{
return work->wq->fs_info;
}
......@@ -89,29 +77,6 @@ bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq)
return atomic_read(&wq->normal->pending) > wq->normal->thresh * 2;
}
BTRFS_WORK_HELPER(worker_helper);
BTRFS_WORK_HELPER(delalloc_helper);
BTRFS_WORK_HELPER(flush_delalloc_helper);
BTRFS_WORK_HELPER(cache_helper);
BTRFS_WORK_HELPER(submit_helper);
BTRFS_WORK_HELPER(fixup_helper);
BTRFS_WORK_HELPER(endio_helper);
BTRFS_WORK_HELPER(endio_meta_helper);
BTRFS_WORK_HELPER(endio_meta_write_helper);
BTRFS_WORK_HELPER(endio_raid56_helper);
BTRFS_WORK_HELPER(endio_repair_helper);
BTRFS_WORK_HELPER(rmw_helper);
BTRFS_WORK_HELPER(endio_write_helper);
BTRFS_WORK_HELPER(freespace_write_helper);
BTRFS_WORK_HELPER(delayed_meta_helper);
BTRFS_WORK_HELPER(readahead_helper);
BTRFS_WORK_HELPER(qgroup_rescan_helper);
BTRFS_WORK_HELPER(extent_refs_helper);
BTRFS_WORK_HELPER(scrub_helper);
BTRFS_WORK_HELPER(scrubwrc_helper);
BTRFS_WORK_HELPER(scrubnc_helper);
BTRFS_WORK_HELPER(scrubparity_helper);
static struct __btrfs_workqueue *
__btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name,
unsigned int flags, int limit_active, int thresh)
......@@ -252,16 +217,16 @@ static inline void thresh_exec_hook(struct __btrfs_workqueue *wq)
}
}
static void run_ordered_work(struct __btrfs_workqueue *wq)
static void run_ordered_work(struct __btrfs_workqueue *wq,
struct btrfs_work *self)
{
struct list_head *list = &wq->ordered_list;
struct btrfs_work *work;
spinlock_t *lock = &wq->list_lock;
unsigned long flags;
bool free_self = false;
while (1) {
void *wtag;
spin_lock_irqsave(lock, flags);
if (list_empty(list))
break;
......@@ -287,22 +252,53 @@ static void run_ordered_work(struct __btrfs_workqueue *wq)
list_del(&work->ordered_list);
spin_unlock_irqrestore(lock, flags);
/*
* We don't want to call the ordered free functions with the
* lock held though. Save the work as tag for the trace event,
* because the callback could free the structure.
*/
wtag = work;
work->ordered_free(work);
trace_btrfs_all_work_done(wq->fs_info, wtag);
if (work == self) {
/*
* This is the work item that the worker is currently
* executing.
*
* The kernel workqueue code guarantees non-reentrancy
* of work items. I.e., if a work item with the same
* address and work function is queued twice, the second
* execution is blocked until the first one finishes. A
* work item may be freed and recycled with the same
* work function; the workqueue code assumes that the
* original work item cannot depend on the recycled work
* item in that case (see find_worker_executing_work()).
*
* Note that different types of Btrfs work can depend on
* each other, and one type of work on one Btrfs
* filesystem may even depend on the same type of work
* on another Btrfs filesystem via, e.g., a loop device.
* Therefore, we must not allow the current work item to
* be recycled until we are really done, otherwise we
* break the above assumption and can deadlock.
*/
free_self = true;
} else {
/*
* We don't want to call the ordered free functions with
* the lock held.
*/
work->ordered_free(work);
/* NB: work must not be dereferenced past this point. */
trace_btrfs_all_work_done(wq->fs_info, work);
}
}
spin_unlock_irqrestore(lock, flags);
if (free_self) {
self->ordered_free(self);
/* NB: self must not be dereferenced past this point. */
trace_btrfs_all_work_done(wq->fs_info, self);
}
}
static void normal_work_helper(struct btrfs_work *work)
static void btrfs_work_helper(struct work_struct *normal_work)
{
struct btrfs_work *work = container_of(normal_work, struct btrfs_work,
normal_work);
struct __btrfs_workqueue *wq;
void *wtag;
int need_order = 0;
/*
......@@ -316,29 +312,26 @@ static void normal_work_helper(struct btrfs_work *work)
if (work->ordered_func)
need_order = 1;
wq = work->wq;
/* Safe for tracepoints in case work gets freed by the callback */
wtag = work;
trace_btrfs_work_sched(work);
thresh_exec_hook(wq);
work->func(work);
if (need_order) {
set_bit(WORK_DONE_BIT, &work->flags);
run_ordered_work(wq);
run_ordered_work(wq, work);
} else {
/* NB: work must not be dereferenced past this point. */
trace_btrfs_all_work_done(wq->fs_info, work);
}
if (!need_order)
trace_btrfs_all_work_done(wq->fs_info, wtag);
}
void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func,
btrfs_func_t func,
btrfs_func_t ordered_func,
btrfs_func_t ordered_free)
void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func,
btrfs_func_t ordered_func, btrfs_func_t ordered_free)
{
work->func = func;
work->ordered_func = ordered_func;
work->ordered_free = ordered_free;
INIT_WORK(&work->normal_work, uniq_func);
INIT_WORK(&work->normal_work, btrfs_work_helper);
INIT_LIST_HEAD(&work->ordered_list);
work->flags = 0;
}
......
......@@ -29,49 +29,20 @@ struct btrfs_work {
unsigned long flags;
};
#define BTRFS_WORK_HELPER_PROTO(name) \
void btrfs_##name(struct work_struct *arg)
BTRFS_WORK_HELPER_PROTO(worker_helper);
BTRFS_WORK_HELPER_PROTO(delalloc_helper);
BTRFS_WORK_HELPER_PROTO(flush_delalloc_helper);
BTRFS_WORK_HELPER_PROTO(cache_helper);
BTRFS_WORK_HELPER_PROTO(submit_helper);
BTRFS_WORK_HELPER_PROTO(fixup_helper);
BTRFS_WORK_HELPER_PROTO(endio_helper);
BTRFS_WORK_HELPER_PROTO(endio_meta_helper);
BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper);
BTRFS_WORK_HELPER_PROTO(endio_raid56_helper);
BTRFS_WORK_HELPER_PROTO(endio_repair_helper);
BTRFS_WORK_HELPER_PROTO(rmw_helper);
BTRFS_WORK_HELPER_PROTO(endio_write_helper);
BTRFS_WORK_HELPER_PROTO(freespace_write_helper);
BTRFS_WORK_HELPER_PROTO(delayed_meta_helper);
BTRFS_WORK_HELPER_PROTO(readahead_helper);
BTRFS_WORK_HELPER_PROTO(qgroup_rescan_helper);
BTRFS_WORK_HELPER_PROTO(extent_refs_helper);
BTRFS_WORK_HELPER_PROTO(scrub_helper);
BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
BTRFS_WORK_HELPER_PROTO(scrubparity_helper);
struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
const char *name,
unsigned int flags,
int limit_active,
int thresh);
void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper,
btrfs_func_t func,
btrfs_func_t ordered_func,
btrfs_func_t ordered_free);
void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func,
btrfs_func_t ordered_func, btrfs_func_t ordered_free);
void btrfs_queue_work(struct btrfs_workqueue *wq,
struct btrfs_work *work);
void btrfs_destroy_workqueue(struct btrfs_workqueue *wq);
void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max);
void btrfs_set_work_high_priority(struct btrfs_work *work);
struct btrfs_fs_info *btrfs_work_owner(const struct btrfs_work *work);
struct btrfs_fs_info *btrfs_workqueue_owner(const struct __btrfs_workqueue *wq);
struct btrfs_fs_info * __pure btrfs_work_owner(const struct btrfs_work *work);
struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct __btrfs_workqueue *wq);
bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq);
#endif
此差异已折叠。
......@@ -34,7 +34,7 @@ struct btrfs_caching_control {
struct mutex mutex;
wait_queue_head_t wait;
struct btrfs_work work;
struct btrfs_block_group_cache *block_group;
struct btrfs_block_group *block_group;
u64 progress;
refcount_t count;
};
......@@ -42,14 +42,15 @@ struct btrfs_caching_control {
/* Once caching_thread() finds this much free space, it will wake up waiters. */
#define CACHING_CTL_WAKE_UP SZ_2M
struct btrfs_block_group_cache {
struct btrfs_key key;
struct btrfs_block_group_item item;
struct btrfs_block_group {
struct btrfs_fs_info *fs_info;
struct inode *inode;
spinlock_t lock;
u64 start;
u64 length;
u64 pinned;
u64 reserved;
u64 used;
u64 delalloc_bytes;
u64 bytes_super;
u64 flags;
......@@ -159,7 +160,7 @@ struct btrfs_block_group_cache {
#ifdef CONFIG_BTRFS_DEBUG
static inline int btrfs_should_fragment_free_space(
struct btrfs_block_group_cache *block_group)
struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
......@@ -170,29 +171,29 @@ static inline int btrfs_should_fragment_free_space(
}
#endif
struct btrfs_block_group_cache *btrfs_lookup_first_block_group(
struct btrfs_block_group *btrfs_lookup_first_block_group(
struct btrfs_fs_info *info, u64 bytenr);
struct btrfs_block_group_cache *btrfs_lookup_block_group(
struct btrfs_block_group *btrfs_lookup_block_group(
struct btrfs_fs_info *info, u64 bytenr);
struct btrfs_block_group_cache *btrfs_next_block_group(
struct btrfs_block_group_cache *cache);
void btrfs_get_block_group(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
struct btrfs_block_group *btrfs_next_block_group(
struct btrfs_block_group *cache);
void btrfs_get_block_group(struct btrfs_block_group *cache);
void btrfs_put_block_group(struct btrfs_block_group *cache);
void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
const u64 start);
void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg);
void btrfs_wait_block_group_reservations(struct btrfs_block_group *bg);
bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr);
void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr);
void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg);
void btrfs_wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
void btrfs_wait_nocow_writers(struct btrfs_block_group *bg);
void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
u64 num_bytes);
int btrfs_wait_block_group_cache_done(struct btrfs_block_group_cache *cache);
int btrfs_cache_block_group(struct btrfs_block_group_cache *cache,
int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache);
int btrfs_cache_block_group(struct btrfs_block_group *cache,
int load_cache_only);
void btrfs_put_caching_control(struct btrfs_caching_control *ctl);
struct btrfs_caching_control *btrfs_get_caching_control(
struct btrfs_block_group_cache *cache);
u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
struct btrfs_block_group *cache);
u64 add_new_free_space(struct btrfs_block_group *block_group,
u64 start, u64 end);
struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
struct btrfs_fs_info *fs_info,
......@@ -200,21 +201,22 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
u64 group_start, struct extent_map *em);
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
void btrfs_mark_bg_unused(struct btrfs_block_group_cache *bg);
void btrfs_mark_bg_unused(struct btrfs_block_group *bg);
int btrfs_read_block_groups(struct btrfs_fs_info *info);
int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
u64 type, u64 chunk_offset, u64 size);
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans);
int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache);
void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache);
int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
bool do_chunk_alloc);
void btrfs_dec_block_group_ro(struct btrfs_block_group *cache);
int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans);
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans);
int btrfs_setup_space_cache(struct btrfs_trans_handle *trans);
int btrfs_update_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, int alloc);
int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
u64 ram_bytes, u64 num_bytes, int delalloc);
void btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
void btrfs_free_reserved_bytes(struct btrfs_block_group *cache,
u64 num_bytes, int delalloc);
int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
enum btrfs_chunk_alloc_enum force);
......@@ -239,8 +241,7 @@ static inline u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info)
return btrfs_get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
}
static inline int btrfs_block_group_cache_done(
struct btrfs_block_group_cache *cache)
static inline int btrfs_block_group_done(struct btrfs_block_group *cache)
{
smp_mb();
return cache->cached == BTRFS_CACHE_FINISHED ||
......
......@@ -63,9 +63,6 @@ struct btrfs_inode {
/* held while logging the inode in tree-log.c */
struct mutex log_mutex;
/* held while doing delalloc reservations */
struct mutex delalloc_mutex;
/* used to order data wrt metadata */
struct btrfs_ordered_inode_tree ordered_tree;
......
......@@ -29,6 +29,41 @@
#include "extent_io.h"
#include "extent_map.h"
int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
u64 start, struct page **pages, unsigned long *out_pages,
unsigned long *total_in, unsigned long *total_out);
int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int zlib_decompress(struct list_head *ws, unsigned char *data_in,
struct page *dest_page, unsigned long start_byte, size_t srclen,
size_t destlen);
struct list_head *zlib_alloc_workspace(unsigned int level);
void zlib_free_workspace(struct list_head *ws);
struct list_head *zlib_get_workspace(unsigned int level);
int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
u64 start, struct page **pages, unsigned long *out_pages,
unsigned long *total_in, unsigned long *total_out);
int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int lzo_decompress(struct list_head *ws, unsigned char *data_in,
struct page *dest_page, unsigned long start_byte, size_t srclen,
size_t destlen);
struct list_head *lzo_alloc_workspace(unsigned int level);
void lzo_free_workspace(struct list_head *ws);
int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
u64 start, struct page **pages, unsigned long *out_pages,
unsigned long *total_in, unsigned long *total_out);
int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int zstd_decompress(struct list_head *ws, unsigned char *data_in,
struct page *dest_page, unsigned long start_byte, size_t srclen,
size_t destlen);
void zstd_init_workspace_manager(void);
void zstd_cleanup_workspace_manager(void);
struct list_head *zstd_alloc_workspace(unsigned int level);
void zstd_free_workspace(struct list_head *ws);
struct list_head *zstd_get_workspace(unsigned int level);
void zstd_put_workspace(struct list_head *ws);
static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
const char* btrfs_compress_type2str(enum btrfs_compression_type type)
......@@ -39,6 +74,8 @@ const char* btrfs_compress_type2str(enum btrfs_compression_type type)
case BTRFS_COMPRESS_ZSTD:
case BTRFS_COMPRESS_NONE:
return btrfs_compress_types[type];
default:
break;
}
return NULL;
......@@ -60,6 +97,70 @@ bool btrfs_compress_is_valid_type(const char *str, size_t len)
return false;
}
static int compression_compress_pages(int type, struct list_head *ws,
struct address_space *mapping, u64 start, struct page **pages,
unsigned long *out_pages, unsigned long *total_in,
unsigned long *total_out)
{
switch (type) {
case BTRFS_COMPRESS_ZLIB:
return zlib_compress_pages(ws, mapping, start, pages,
out_pages, total_in, total_out);
case BTRFS_COMPRESS_LZO:
return lzo_compress_pages(ws, mapping, start, pages,
out_pages, total_in, total_out);
case BTRFS_COMPRESS_ZSTD:
return zstd_compress_pages(ws, mapping, start, pages,
out_pages, total_in, total_out);
case BTRFS_COMPRESS_NONE:
default:
/*
* This can't happen, the type is validated several times
* before we get here. As a sane fallback, return what the
* callers will understand as 'no compression happened'.
*/
return -E2BIG;
}
}
static int compression_decompress_bio(int type, struct list_head *ws,
struct compressed_bio *cb)
{
switch (type) {
case BTRFS_COMPRESS_ZLIB: return zlib_decompress_bio(ws, cb);
case BTRFS_COMPRESS_LZO: return lzo_decompress_bio(ws, cb);
case BTRFS_COMPRESS_ZSTD: return zstd_decompress_bio(ws, cb);
case BTRFS_COMPRESS_NONE:
default:
/*
* This can't happen, the type is validated several times
* before we get here.
*/
BUG();
}
}
static int compression_decompress(int type, struct list_head *ws,
unsigned char *data_in, struct page *dest_page,
unsigned long start_byte, size_t srclen, size_t destlen)
{
switch (type) {
case BTRFS_COMPRESS_ZLIB: return zlib_decompress(ws, data_in, dest_page,
start_byte, srclen, destlen);
case BTRFS_COMPRESS_LZO: return lzo_decompress(ws, data_in, dest_page,
start_byte, srclen, destlen);
case BTRFS_COMPRESS_ZSTD: return zstd_decompress(ws, data_in, dest_page,
start_byte, srclen, destlen);
case BTRFS_COMPRESS_NONE:
default:
/*
* This can't happen, the type is validated several times
* before we get here.
*/
BUG();
}
}
static int btrfs_decompress_bio(struct compressed_bio *cb);
static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
......@@ -311,7 +412,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long compressed_len,
struct page **compressed_pages,
unsigned long nr_pages,
unsigned int write_flags)
unsigned int write_flags,
struct cgroup_subsys_state *blkcg_css)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct bio *bio = NULL;
......@@ -320,7 +422,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
int pg_index = 0;
struct page *page;
u64 first_byte = disk_start;
struct block_device *bdev;
blk_status_t ret;
int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
......@@ -339,13 +440,15 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
cb->orig_bio = NULL;
cb->nr_pages = nr_pages;
bdev = fs_info->fs_devices->latest_bdev;
bio = btrfs_bio_alloc(first_byte);
bio_set_dev(bio, bdev);
bio->bi_opf = REQ_OP_WRITE | write_flags;
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
if (blkcg_css) {
bio->bi_opf |= REQ_CGROUP_PUNT;
bio_associate_blkg_from_css(bio, blkcg_css);
}
refcount_set(&cb->pending_bios, 1);
/* create and submit bios for the compressed pages */
......@@ -378,14 +481,13 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
BUG_ON(ret); /* -ENOMEM */
}
ret = btrfs_map_bio(fs_info, bio, 0, 1);
ret = btrfs_map_bio(fs_info, bio, 0);
if (ret) {
bio->bi_status = ret;
bio_endio(bio);
}
bio = btrfs_bio_alloc(first_byte);
bio_set_dev(bio, bdev);
bio->bi_opf = REQ_OP_WRITE | write_flags;
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
......@@ -409,7 +511,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
BUG_ON(ret); /* -ENOMEM */
}
ret = btrfs_map_bio(fs_info, bio, 0, 1);
ret = btrfs_map_bio(fs_info, bio, 0);
if (ret) {
bio->bi_status = ret;
bio_endio(bio);
......@@ -553,7 +655,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
unsigned long nr_pages;
unsigned long pg_index;
struct page *page;
struct block_device *bdev;
struct bio *comp_bio;
u64 cur_disk_byte = (u64)bio->bi_iter.bi_sector << 9;
u64 em_len;
......@@ -604,8 +705,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
if (!cb->compressed_pages)
goto fail1;
bdev = fs_info->fs_devices->latest_bdev;
for (pg_index = 0; pg_index < nr_pages; pg_index++) {
cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS |
__GFP_HIGHMEM);
......@@ -624,7 +723,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
cb->len = bio->bi_iter.bi_size;
comp_bio = btrfs_bio_alloc(cur_disk_byte);
bio_set_dev(comp_bio, bdev);
comp_bio->bi_opf = REQ_OP_READ;
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
......@@ -668,14 +766,13 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
fs_info->sectorsize);
sums += csum_size * nr_sectors;
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
if (ret) {
comp_bio->bi_status = ret;
bio_endio(comp_bio);
}
comp_bio = btrfs_bio_alloc(cur_disk_byte);
bio_set_dev(comp_bio, bdev);
comp_bio->bi_opf = REQ_OP_READ;
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
......@@ -693,7 +790,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
BUG_ON(ret); /* -ENOMEM */
}
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
if (ret) {
comp_bio->bi_status = ret;
bio_endio(comp_bio);
......@@ -764,26 +861,6 @@ struct heuristic_ws {
static struct workspace_manager heuristic_wsm;
static void heuristic_init_workspace_manager(void)
{
btrfs_init_workspace_manager(&heuristic_wsm, &btrfs_heuristic_compress);
}
static void heuristic_cleanup_workspace_manager(void)
{
btrfs_cleanup_workspace_manager(&heuristic_wsm);
}
static struct list_head *heuristic_get_workspace(unsigned int level)
{
return btrfs_get_workspace(&heuristic_wsm, level);
}
static void heuristic_put_workspace(struct list_head *ws)
{
btrfs_put_workspace(&heuristic_wsm, ws);
}
static void free_heuristic_ws(struct list_head *ws)
{
struct heuristic_ws *workspace;
......@@ -824,12 +901,7 @@ static struct list_head *alloc_heuristic_ws(unsigned int level)
}
const struct btrfs_compress_op btrfs_heuristic_compress = {
.init_workspace_manager = heuristic_init_workspace_manager,
.cleanup_workspace_manager = heuristic_cleanup_workspace_manager,
.get_workspace = heuristic_get_workspace,
.put_workspace = heuristic_put_workspace,
.alloc_workspace = alloc_heuristic_ws,
.free_workspace = free_heuristic_ws,
.workspace_manager = &heuristic_wsm,
};
static const struct btrfs_compress_op * const btrfs_compress_op[] = {
......@@ -840,13 +912,44 @@ static const struct btrfs_compress_op * const btrfs_compress_op[] = {
&btrfs_zstd_compress,
};
void btrfs_init_workspace_manager(struct workspace_manager *wsm,
const struct btrfs_compress_op *ops)
static struct list_head *alloc_workspace(int type, unsigned int level)
{
struct list_head *workspace;
switch (type) {
case BTRFS_COMPRESS_NONE: return alloc_heuristic_ws(level);
case BTRFS_COMPRESS_ZLIB: return zlib_alloc_workspace(level);
case BTRFS_COMPRESS_LZO: return lzo_alloc_workspace(level);
case BTRFS_COMPRESS_ZSTD: return zstd_alloc_workspace(level);
default:
/*
* This can't happen, the type is validated several times
* before we get here.
*/
BUG();
}
}
wsm->ops = ops;
static void free_workspace(int type, struct list_head *ws)
{
switch (type) {
case BTRFS_COMPRESS_NONE: return free_heuristic_ws(ws);
case BTRFS_COMPRESS_ZLIB: return zlib_free_workspace(ws);
case BTRFS_COMPRESS_LZO: return lzo_free_workspace(ws);
case BTRFS_COMPRESS_ZSTD: return zstd_free_workspace(ws);
default:
/*
* This can't happen, the type is validated several times
* before we get here.
*/
BUG();
}
}
static void btrfs_init_workspace_manager(int type)
{
struct workspace_manager *wsm;
struct list_head *workspace;
wsm = btrfs_compress_op[type]->workspace_manager;
INIT_LIST_HEAD(&wsm->idle_ws);
spin_lock_init(&wsm->ws_lock);
atomic_set(&wsm->total_ws, 0);
......@@ -856,7 +959,7 @@ void btrfs_init_workspace_manager(struct workspace_manager *wsm,
* Preallocate one workspace for each compression type so we can
* guarantee forward progress in the worst case
*/
workspace = wsm->ops->alloc_workspace(0);
workspace = alloc_workspace(type, 0);
if (IS_ERR(workspace)) {
pr_warn(
"BTRFS: cannot preallocate compression workspace, will try later\n");
......@@ -867,14 +970,16 @@ void btrfs_init_workspace_manager(struct workspace_manager *wsm,
}
}
void btrfs_cleanup_workspace_manager(struct workspace_manager *wsman)
static void btrfs_cleanup_workspace_manager(int type)
{
struct workspace_manager *wsman;
struct list_head *ws;
wsman = btrfs_compress_op[type]->workspace_manager;
while (!list_empty(&wsman->idle_ws)) {
ws = wsman->idle_ws.next;
list_del(ws);
wsman->ops->free_workspace(ws);
free_workspace(type, ws);
atomic_dec(&wsman->total_ws);
}
}
......@@ -885,9 +990,9 @@ void btrfs_cleanup_workspace_manager(struct workspace_manager *wsman)
* Preallocation makes a forward progress guarantees and we do not return
* errors.
*/
struct list_head *btrfs_get_workspace(struct workspace_manager *wsm,
unsigned int level)
struct list_head *btrfs_get_workspace(int type, unsigned int level)
{
struct workspace_manager *wsm;
struct list_head *workspace;
int cpus = num_online_cpus();
unsigned nofs_flag;
......@@ -897,6 +1002,7 @@ struct list_head *btrfs_get_workspace(struct workspace_manager *wsm,
wait_queue_head_t *ws_wait;
int *free_ws;
wsm = btrfs_compress_op[type]->workspace_manager;
idle_ws = &wsm->idle_ws;
ws_lock = &wsm->ws_lock;
total_ws = &wsm->total_ws;
......@@ -932,7 +1038,7 @@ struct list_head *btrfs_get_workspace(struct workspace_manager *wsm,
* context of btrfs_compress_bio/btrfs_compress_pages
*/
nofs_flag = memalloc_nofs_save();
workspace = wsm->ops->alloc_workspace(level);
workspace = alloc_workspace(type, level);
memalloc_nofs_restore(nofs_flag);
if (IS_ERR(workspace)) {
......@@ -965,21 +1071,34 @@ struct list_head *btrfs_get_workspace(struct workspace_manager *wsm,
static struct list_head *get_workspace(int type, int level)
{
return btrfs_compress_op[type]->get_workspace(level);
switch (type) {
case BTRFS_COMPRESS_NONE: return btrfs_get_workspace(type, level);
case BTRFS_COMPRESS_ZLIB: return zlib_get_workspace(level);
case BTRFS_COMPRESS_LZO: return btrfs_get_workspace(type, level);
case BTRFS_COMPRESS_ZSTD: return zstd_get_workspace(level);
default:
/*
* This can't happen, the type is validated several times
* before we get here.
*/
BUG();
}
}
/*
* put a workspace struct back on the list or free it if we have enough
* idle ones sitting around
*/
void btrfs_put_workspace(struct workspace_manager *wsm, struct list_head *ws)
void btrfs_put_workspace(int type, struct list_head *ws)
{
struct workspace_manager *wsm;
struct list_head *idle_ws;
spinlock_t *ws_lock;
atomic_t *total_ws;
wait_queue_head_t *ws_wait;
int *free_ws;
wsm = btrfs_compress_op[type]->workspace_manager;
idle_ws = &wsm->idle_ws;
ws_lock = &wsm->ws_lock;
total_ws = &wsm->total_ws;
......@@ -995,7 +1114,7 @@ void btrfs_put_workspace(struct workspace_manager *wsm, struct list_head *ws)
}
spin_unlock(ws_lock);
wsm->ops->free_workspace(ws);
free_workspace(type, ws);
atomic_dec(total_ws);
wake:
cond_wake_up(ws_wait);
......@@ -1003,7 +1122,18 @@ void btrfs_put_workspace(struct workspace_manager *wsm, struct list_head *ws)
static void put_workspace(int type, struct list_head *ws)
{
return btrfs_compress_op[type]->put_workspace(ws);
switch (type) {
case BTRFS_COMPRESS_NONE: return btrfs_put_workspace(type, ws);
case BTRFS_COMPRESS_ZLIB: return btrfs_put_workspace(type, ws);
case BTRFS_COMPRESS_LZO: return btrfs_put_workspace(type, ws);
case BTRFS_COMPRESS_ZSTD: return zstd_put_workspace(ws);
default:
/*
* This can't happen, the type is validated several times
* before we get here.
*/
BUG();
}
}
/*
......@@ -1042,10 +1172,8 @@ int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
level = btrfs_compress_set_level(type, level);
workspace = get_workspace(type, level);
ret = btrfs_compress_op[type]->compress_pages(workspace, mapping,
start, pages,
out_pages,
total_in, total_out);
ret = compression_compress_pages(type, workspace, mapping, start, pages,
out_pages, total_in, total_out);
put_workspace(type, workspace);
return ret;
}
......@@ -1071,7 +1199,7 @@ static int btrfs_decompress_bio(struct compressed_bio *cb)
int type = cb->compress_type;
workspace = get_workspace(type, 0);
ret = btrfs_compress_op[type]->decompress_bio(workspace, cb);
ret = compression_decompress_bio(type, workspace, cb);
put_workspace(type, workspace);
return ret;
......@@ -1089,9 +1217,8 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
int ret;
workspace = get_workspace(type, 0);
ret = btrfs_compress_op[type]->decompress(workspace, data_in,
dest_page, start_byte,
srclen, destlen);
ret = compression_decompress(type, workspace, data_in, dest_page,
start_byte, srclen, destlen);
put_workspace(type, workspace);
return ret;
......@@ -1099,18 +1226,18 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
void __init btrfs_init_compress(void)
{
int i;
for (i = 0; i < BTRFS_NR_WORKSPACE_MANAGERS; i++)
btrfs_compress_op[i]->init_workspace_manager();
btrfs_init_workspace_manager(BTRFS_COMPRESS_NONE);
btrfs_init_workspace_manager(BTRFS_COMPRESS_ZLIB);
btrfs_init_workspace_manager(BTRFS_COMPRESS_LZO);
zstd_init_workspace_manager();
}
void __cold btrfs_exit_compress(void)
{
int i;
for (i = 0; i < BTRFS_NR_WORKSPACE_MANAGERS; i++)
btrfs_compress_op[i]->cleanup_workspace_manager();
btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_NONE);
btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_ZLIB);
btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_LZO);
zstd_cleanup_workspace_manager();
}
/*
......
......@@ -93,7 +93,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long compressed_len,
struct page **compressed_pages,
unsigned long nr_pages,
unsigned int write_flags);
unsigned int write_flags,
struct cgroup_subsys_state *blkcg_css);
blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
......@@ -104,11 +105,10 @@ enum btrfs_compression_type {
BTRFS_COMPRESS_ZLIB = 1,
BTRFS_COMPRESS_LZO = 2,
BTRFS_COMPRESS_ZSTD = 3,
BTRFS_COMPRESS_TYPES = 3,
BTRFS_NR_COMPRESS_TYPES = 4,
};
struct workspace_manager {
const struct btrfs_compress_op *ops;
struct list_head idle_ws;
spinlock_t ws_lock;
/* Number of free workspaces */
......@@ -119,50 +119,18 @@ struct workspace_manager {
wait_queue_head_t ws_wait;
};
void btrfs_init_workspace_manager(struct workspace_manager *wsm,
const struct btrfs_compress_op *ops);
struct list_head *btrfs_get_workspace(struct workspace_manager *wsm,
unsigned int level);
void btrfs_put_workspace(struct workspace_manager *wsm, struct list_head *ws);
void btrfs_cleanup_workspace_manager(struct workspace_manager *wsm);
struct list_head *btrfs_get_workspace(int type, unsigned int level);
void btrfs_put_workspace(int type, struct list_head *ws);
struct btrfs_compress_op {
void (*init_workspace_manager)(void);
void (*cleanup_workspace_manager)(void);
struct list_head *(*get_workspace)(unsigned int level);
void (*put_workspace)(struct list_head *ws);
struct list_head *(*alloc_workspace)(unsigned int level);
void (*free_workspace)(struct list_head *workspace);
int (*compress_pages)(struct list_head *workspace,
struct address_space *mapping,
u64 start,
struct page **pages,
unsigned long *out_pages,
unsigned long *total_in,
unsigned long *total_out);
int (*decompress_bio)(struct list_head *workspace,
struct compressed_bio *cb);
int (*decompress)(struct list_head *workspace,
unsigned char *data_in,
struct page *dest_page,
unsigned long start_byte,
size_t srclen, size_t destlen);
struct workspace_manager *workspace_manager;
/* Maximum level supported by the compression algorithm */
unsigned int max_level;
unsigned int default_level;
};
/* The heuristic workspaces are managed via the 0th workspace manager */
#define BTRFS_NR_WORKSPACE_MANAGERS (BTRFS_COMPRESS_TYPES + 1)
#define BTRFS_NR_WORKSPACE_MANAGERS BTRFS_NR_COMPRESS_TYPES
extern const struct btrfs_compress_op btrfs_heuristic_compress;
extern const struct btrfs_compress_op btrfs_zlib_compress;
......
......@@ -32,8 +32,13 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
static const struct btrfs_csums {
u16 size;
const char *name;
const char *driver;
} btrfs_csums[] = {
[BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
[BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
[BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
[BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b",
.driver = "blake2b-256" },
};
int btrfs_super_csum_size(const struct btrfs_super_block *s)
......@@ -51,34 +56,25 @@ const char *btrfs_super_csum_name(u16 csum_type)
return btrfs_csums[csum_type].name;
}
struct btrfs_path *btrfs_alloc_path(void)
/*
* Return driver name if defined, otherwise the name that's also a valid driver
* name
*/
const char *btrfs_super_csum_driver(u16 csum_type)
{
return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
/* csum type is validated at mount time */
return btrfs_csums[csum_type].driver ?:
btrfs_csums[csum_type].name;
}
/*
* set all locked nodes in the path to blocking locks. This should
* be done before scheduling
*/
noinline void btrfs_set_path_blocking(struct btrfs_path *p)
size_t __const btrfs_get_num_csums(void)
{
int i;
for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
if (!p->nodes[i] || !p->locks[i])
continue;
/*
* If we currently have a spinning reader or writer lock this
* will bump the count of blocking holders and drop the
* spinlock.
*/
if (p->locks[i] == BTRFS_READ_LOCK) {
btrfs_set_lock_blocking_read(p->nodes[i]);
p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
} else if (p->locks[i] == BTRFS_WRITE_LOCK) {
btrfs_set_lock_blocking_write(p->nodes[i]);
p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
}
}
return ARRAY_SIZE(btrfs_csums);
}
struct btrfs_path *btrfs_alloc_path(void)
{
return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
}
/* this also releases the path */
......@@ -1125,7 +1121,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
parent_start = buf->start;
extent_buffer_get(cow);
atomic_inc(&cow->refs);
ret = tree_mod_log_insert_root(root->node, cow, 1);
BUG_ON(ret < 0);
rcu_assign_pointer(root->node, cow);
......@@ -1563,7 +1559,7 @@ static int comp_keys(const struct btrfs_disk_key *disk,
/*
* same as comp_keys only with two btrfs_key's
*/
int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
{
if (k1->objectid > k2->objectid)
return 1;
......@@ -2036,7 +2032,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
/* update the path */
if (left) {
if (btrfs_header_nritems(left) > orig_slot) {
extent_buffer_get(left);
atomic_inc(&left->refs);
/* left was locked after cow */
path->nodes[level] = left;
path->slots[level + 1] -= 1;
......@@ -2378,32 +2374,6 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
}
}
/*
* This releases any locks held in the path starting at level and
* going all the way up to the root.
*
* btrfs_search_slot will keep the lock held on higher nodes in a few
* corner cases, such as COW of the block at slot zero in the node. This
* ignores those rules, and it should only be called when there are no
* more updates to be done higher up in the tree.
*/
noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
{
int i;
if (path->keep_locks)
return;
for (i = level; i < BTRFS_MAX_LEVEL; i++) {
if (!path->nodes[i])
continue;
if (!path->locks[i])
continue;
btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
path->locks[i] = 0;
}
}
/*
* helper function for btrfs_search_slot. The goal is to find a block
* in cache without setting the path to blocking. If we find the block
......@@ -2652,7 +2622,7 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
} else {
b = root->commit_root;
extent_buffer_get(b);
atomic_inc(&b->refs);
}
level = btrfs_header_level(b);
/*
......@@ -2785,12 +2755,10 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
}
while (b) {
int dec = 0;
level = btrfs_header_level(b);
/*
* setup the path here so we can release it under lock
* contention with the cow code
*/
if (cow) {
bool last_level = (level == (BTRFS_MAX_LEVEL - 1));
......@@ -2861,73 +2829,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (ret < 0)
goto done;
if (level != 0) {
int dec = 0;
if (ret && slot > 0) {
dec = 1;
slot -= 1;
}
p->slots[level] = slot;
err = setup_nodes_for_search(trans, root, p, b, level,
ins_len, &write_lock_level);
if (err == -EAGAIN)
goto again;
if (err) {
ret = err;
goto done;
}
b = p->nodes[level];
slot = p->slots[level];
/*
* slot 0 is special, if we change the key
* we have to update the parent pointer
* which means we must have a write lock
* on the parent
*/
if (slot == 0 && ins_len &&
write_lock_level < level + 1) {
write_lock_level = level + 1;
btrfs_release_path(p);
goto again;
}
unlock_up(p, level, lowest_unlock,
min_write_lock_level, &write_lock_level);
if (level == lowest_level) {
if (dec)
p->slots[level]++;
goto done;
}
err = read_block_for_search(root, p, &b, level,
slot, key);
if (err == -EAGAIN)
goto again;
if (err) {
ret = err;
goto done;
}
if (!p->skip_locking) {
level = btrfs_header_level(b);
if (level <= write_lock_level) {
if (!btrfs_try_tree_write_lock(b)) {
btrfs_set_path_blocking(p);
btrfs_tree_lock(b);
}
p->locks[level] = BTRFS_WRITE_LOCK;
} else {
if (!btrfs_tree_read_lock_atomic(b)) {
btrfs_set_path_blocking(p);
btrfs_tree_read_lock(b);
}
p->locks[level] = BTRFS_READ_LOCK;
}
p->nodes[level] = b;
}
} else {
if (level == 0) {
p->slots[level] = slot;
if (ins_len > 0 &&
btrfs_leaf_free_space(b) < ins_len) {
......@@ -2952,6 +2854,67 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
min_write_lock_level, NULL);
goto done;
}
if (ret && slot > 0) {
dec = 1;
slot--;
}
p->slots[level] = slot;
err = setup_nodes_for_search(trans, root, p, b, level, ins_len,
&write_lock_level);
if (err == -EAGAIN)
goto again;
if (err) {
ret = err;
goto done;
}
b = p->nodes[level];
slot = p->slots[level];
/*
* Slot 0 is special, if we change the key we have to update
* the parent pointer which means we must have a write lock on
* the parent
*/
if (slot == 0 && ins_len && write_lock_level < level + 1) {
write_lock_level = level + 1;
btrfs_release_path(p);
goto again;
}
unlock_up(p, level, lowest_unlock, min_write_lock_level,
&write_lock_level);
if (level == lowest_level) {
if (dec)
p->slots[level]++;
goto done;
}
err = read_block_for_search(root, p, &b, level, slot, key);
if (err == -EAGAIN)
goto again;
if (err) {
ret = err;
goto done;
}
if (!p->skip_locking) {
level = btrfs_header_level(b);
if (level <= write_lock_level) {
if (!btrfs_try_tree_write_lock(b)) {
btrfs_set_path_blocking(p);
btrfs_tree_lock(b);
}
p->locks[level] = BTRFS_WRITE_LOCK;
} else {
if (!btrfs_tree_read_lock_atomic(b)) {
btrfs_set_path_blocking(p);
btrfs_tree_read_lock(b);
}
p->locks[level] = BTRFS_READ_LOCK;
}
p->nodes[level] = b;
}
}
ret = 1;
done:
......@@ -3008,6 +2971,8 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
p->locks[level] = BTRFS_READ_LOCK;
while (b) {
int dec = 0;
level = btrfs_header_level(b);
p->nodes[level] = b;
......@@ -3028,47 +2993,45 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
if (ret < 0)
goto done;
if (level != 0) {
int dec = 0;
if (ret && slot > 0) {
dec = 1;
slot -= 1;
}
if (level == 0) {
p->slots[level] = slot;
unlock_up(p, level, lowest_unlock, 0, NULL);
goto done;
}
if (level == lowest_level) {
if (dec)
p->slots[level]++;
goto done;
}
if (ret && slot > 0) {
dec = 1;
slot--;
}
p->slots[level] = slot;
unlock_up(p, level, lowest_unlock, 0, NULL);
err = read_block_for_search(root, p, &b, level,
slot, key);
if (err == -EAGAIN)
goto again;
if (err) {
ret = err;
goto done;
}
if (level == lowest_level) {
if (dec)
p->slots[level]++;
goto done;
}
level = btrfs_header_level(b);
if (!btrfs_tree_read_lock_atomic(b)) {
btrfs_set_path_blocking(p);
btrfs_tree_read_lock(b);
}
b = tree_mod_log_rewind(fs_info, p, b, time_seq);
if (!b) {
ret = -ENOMEM;
goto done;
}
p->locks[level] = BTRFS_READ_LOCK;
p->nodes[level] = b;
} else {
p->slots[level] = slot;
unlock_up(p, level, lowest_unlock, 0, NULL);
err = read_block_for_search(root, p, &b, level, slot, key);
if (err == -EAGAIN)
goto again;
if (err) {
ret = err;
goto done;
}
level = btrfs_header_level(b);
if (!btrfs_tree_read_lock_atomic(b)) {
btrfs_set_path_blocking(p);
btrfs_tree_read_lock(b);
}
b = tree_mod_log_rewind(fs_info, p, b, time_seq);
if (!b) {
ret = -ENOMEM;
goto done;
}
p->locks[level] = BTRFS_READ_LOCK;
p->nodes[level] = b;
}
ret = 1;
done:
......@@ -3433,7 +3396,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
free_extent_buffer(old);
add_root_to_dirty_list(root);
extent_buffer_get(c);
atomic_inc(&c->refs);
path->nodes[level] = c;
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
path->slots[level] = 0;
......@@ -4966,7 +4929,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
root_sub_used(root, leaf->len);
extent_buffer_get(leaf);
atomic_inc(&leaf->refs);
btrfs_free_tree_block(trans, root, leaf, 0, 1);
free_extent_buffer_stale(leaf);
}
......@@ -5047,7 +5010,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
* for possible call to del_ptr below
*/
slot = path->slots[1];
extent_buffer_get(leaf);
atomic_inc(&leaf->refs);
btrfs_set_path_blocking(path);
wret = push_leaf_left(trans, root, path, 1, 1,
......
......@@ -28,6 +28,7 @@
#include <linux/dynamic_debug.h>
#include <linux/refcount.h>
#include <linux/crc32c.h>
#include "extent-io-tree.h"
#include "extent_io.h"
#include "extent_map.h"
#include "async-thread.h"
......@@ -38,7 +39,7 @@ struct btrfs_transaction;
struct btrfs_pending_snapshot;
struct btrfs_delayed_ref_root;
struct btrfs_space_info;
struct btrfs_block_group_cache;
struct btrfs_block_group;
extern struct kmem_cache *btrfs_trans_handle_cachep;
extern struct kmem_cache *btrfs_bit_radix_cachep;
extern struct kmem_cache *btrfs_path_cachep;
......@@ -56,9 +57,9 @@ struct btrfs_ref;
* filesystem data as well that can be used to read data in order to repair
* read errors on other disks.
*
* Current value is derived from RAID1 with 2 copies.
* Current value is derived from RAID1C4 with 4 copies.
*/
#define BTRFS_MAX_MIRRORS (2 + 1)
#define BTRFS_MAX_MIRRORS (4 + 1)
#define BTRFS_MAX_LEVEL 8
......@@ -291,7 +292,8 @@ struct btrfs_super_block {
BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
BTRFS_FEATURE_INCOMPAT_NO_HOLES | \
BTRFS_FEATURE_INCOMPAT_METADATA_UUID)
BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \
BTRFS_FEATURE_INCOMPAT_RAID1C34)
#define BTRFS_FEATURE_INCOMPAT_SAFE_SET \
(BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
......@@ -413,7 +415,7 @@ struct btrfs_free_cluster {
/* We did a full search and couldn't create a cluster */
bool fragmented;
struct btrfs_block_group_cache *block_group;
struct btrfs_block_group *block_group;
/*
* when a cluster is allocated from a block group, we put the
* cluster onto a list in the block group so that it can
......@@ -476,8 +478,8 @@ struct btrfs_swapfile_pin {
void *ptr;
struct inode *inode;
/*
* If true, ptr points to a struct btrfs_block_group_cache. Otherwise,
* ptr points to a struct btrfs_device.
* If true, ptr points to a struct btrfs_block_group. Otherwise, ptr
* points to a struct btrfs_device.
*/
bool is_block_group;
};
......@@ -722,7 +724,6 @@ struct btrfs_fs_info {
struct btrfs_workqueue *endio_meta_write_workers;
struct btrfs_workqueue *endio_write_workers;
struct btrfs_workqueue *endio_freespace_worker;
struct btrfs_workqueue *submit_workers;
struct btrfs_workqueue *caching_workers;
struct btrfs_workqueue *readahead_workers;
......@@ -1519,18 +1520,18 @@ static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb,
}
/* struct btrfs_block_group_item */
BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item,
BTRFS_SETGET_STACK_FUNCS(stack_block_group_used, struct btrfs_block_group_item,
used, 64);
BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item,
BTRFS_SETGET_FUNCS(block_group_used, struct btrfs_block_group_item,
used, 64);
BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid,
BTRFS_SETGET_STACK_FUNCS(stack_block_group_chunk_objectid,
struct btrfs_block_group_item, chunk_objectid, 64);
BTRFS_SETGET_FUNCS(disk_block_group_chunk_objectid,
BTRFS_SETGET_FUNCS(block_group_chunk_objectid,
struct btrfs_block_group_item, chunk_objectid, 64);
BTRFS_SETGET_FUNCS(disk_block_group_flags,
BTRFS_SETGET_FUNCS(block_group_flags,
struct btrfs_block_group_item, flags, 64);
BTRFS_SETGET_STACK_FUNCS(block_group_flags,
BTRFS_SETGET_STACK_FUNCS(stack_block_group_flags,
struct btrfs_block_group_item, flags, 64);
/* struct btrfs_free_space_info */
......@@ -2163,6 +2164,9 @@ BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
int btrfs_super_csum_size(const struct btrfs_super_block *s);
const char *btrfs_super_csum_name(u16 csum_type);
const char *btrfs_super_csum_driver(u16 csum_type);
size_t __const btrfs_get_num_csums(void);
/*
* The leaf data grows from end-to-front in the node.
......@@ -2397,7 +2401,7 @@ static inline u64 btrfs_calc_metadata_size(struct btrfs_fs_info *fs_info,
int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
u64 start, u64 num_bytes);
void btrfs_free_excluded_extents(struct btrfs_block_group_cache *cache);
void btrfs_free_excluded_extents(struct btrfs_block_group *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
unsigned long count);
void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
......@@ -2453,8 +2457,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_ref *generic_ref);
int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr);
void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache);
void btrfs_get_block_group_trimming(struct btrfs_block_group *cache);
void btrfs_put_block_group_trimming(struct btrfs_block_group *cache);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
enum btrfs_reserve_flush_enum {
......@@ -2507,7 +2511,7 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
int level, int *slot);
int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2);
int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2);
int btrfs_previous_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid,
int type);
......@@ -2567,8 +2571,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
void btrfs_release_path(struct btrfs_path *p);
struct btrfs_path *btrfs_alloc_path(void);
void btrfs_free_path(struct btrfs_path *p);
void btrfs_set_path_blocking(struct btrfs_path *p);
void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_path *path, int slot, int nr);
......@@ -2870,10 +2872,9 @@ int btrfs_drop_inode(struct inode *inode);
int __init btrfs_init_cachep(void);
void __cold btrfs_destroy_cachep(void);
struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
struct btrfs_root *root, int *new,
struct btrfs_path *path);
struct btrfs_root *root, struct btrfs_path *path);
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
struct btrfs_root *root, int *was_new);
struct btrfs_root *root);
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
struct page *page, size_t pg_offset,
u64 start, u64 end, int create);
......@@ -2909,7 +2910,7 @@ long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
int btrfs_ioctl_get_supported_features(void __user *arg);
void btrfs_sync_inode_flags_to_i_flags(struct inode *inode);
int btrfs_is_empty_uuid(u8 *uuid);
int __pure btrfs_is_empty_uuid(u8 *uuid);
int btrfs_defrag_file(struct inode *inode, struct file *file,
struct btrfs_ioctl_defrag_range_args *range,
u64 newer_than, unsigned long max_pages);
......@@ -3143,7 +3144,7 @@ __cold
void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...);
const char *btrfs_decode_error(int errno);
const char * __attribute_const__ btrfs_decode_error(int errno);
__cold
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
......
......@@ -307,7 +307,6 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
unsigned nr_extents;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
int ret = 0;
bool delalloc_lock = true;
/*
* If we are a free space inode we need to not flush since we will be in
......@@ -320,7 +319,6 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
*/
if (btrfs_is_free_space_inode(inode)) {
flush = BTRFS_RESERVE_NO_FLUSH;
delalloc_lock = false;
} else {
if (current->journal_info)
flush = BTRFS_RESERVE_FLUSH_LIMIT;
......@@ -329,9 +327,6 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
schedule_timeout(1);
}
if (delalloc_lock)
mutex_lock(&inode->delalloc_mutex);
num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
/*
......@@ -348,10 +343,12 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
&qgroup_reserve);
ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
if (ret)
goto out_fail;
return ret;
ret = btrfs_reserve_metadata_bytes(root, block_rsv, meta_reserve, flush);
if (ret)
goto out_qgroup;
if (ret) {
btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve);
return ret;
}
/*
* Now we need to update our outstanding extents and csum bytes _first_
......@@ -375,15 +372,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
block_rsv->qgroup_rsv_reserved += qgroup_reserve;
spin_unlock(&block_rsv->lock);
if (delalloc_lock)
mutex_unlock(&inode->delalloc_mutex);
return 0;
out_qgroup:
btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve);
out_fail:
if (delalloc_lock)
mutex_unlock(&inode->delalloc_mutex);
return ret;
}
/**
......
......@@ -12,6 +12,7 @@
#include "transaction.h"
#include "ctree.h"
#include "qgroup.h"
#include "locking.h"
#define BTRFS_DELAYED_WRITEBACK 512
#define BTRFS_DELAYED_BACKGROUND 128
......@@ -1367,8 +1368,8 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
return -ENOMEM;
async_work->delayed_root = delayed_root;
btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
btrfs_async_run_delayed_root, NULL, NULL);
btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, NULL,
NULL);
async_work->nr = nr;
btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
......@@ -1949,12 +1950,19 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
}
inode_id = delayed_nodes[n - 1]->inode_id + 1;
for (i = 0; i < n; i++)
refcount_inc(&delayed_nodes[i]->refs);
for (i = 0; i < n; i++) {
/*
* Don't increase refs in case the node is dead and
* about to be removed from the tree in the loop below
*/
if (!refcount_inc_not_zero(&delayed_nodes[i]->refs))
delayed_nodes[i] = NULL;
}
spin_unlock(&root->inode_lock);
for (i = 0; i < n; i++) {
if (!delayed_nodes[i])
continue;
__btrfs_kill_delayed_node(delayed_nodes[i]);
btrfs_release_delayed_node(delayed_nodes[i]);
}
......
......@@ -986,7 +986,7 @@ static int btrfs_dev_replace_kthread(void *data)
return 0;
}
int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
int __pure btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
{
if (!dev_replace->is_valid)
return 0;
......
......@@ -17,6 +17,6 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info);
void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info);
int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info);
int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
int __pure btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
#endif
此差异已折叠。
......@@ -49,10 +49,10 @@ struct extent_buffer *btrfs_find_create_tree_block(
struct btrfs_fs_info *fs_info,
u64 bytenr);
void btrfs_clean_tree_block(struct extent_buffer *buf);
int open_ctree(struct super_block *sb,
int __cold open_ctree(struct super_block *sb,
struct btrfs_fs_devices *fs_devices,
char *options);
void close_ctree(struct btrfs_fs_info *fs_info);
void __cold close_ctree(struct btrfs_fs_info *fs_info);
int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
......
......@@ -87,7 +87,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
inode = btrfs_iget(sb, &key, root, NULL);
inode = btrfs_iget(sb, &key, root);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto fail;
......@@ -214,7 +214,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
return d_obtain_alias(btrfs_iget(fs_info->sb, &key, root, NULL));
return d_obtain_alias(btrfs_iget(fs_info->sb, &key, root));
fail:
btrfs_free_path(path);
return ERR_PTR(ret);
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef BTRFS_EXTENT_IO_TREE_H
#define BTRFS_EXTENT_IO_TREE_H
struct extent_changeset;
struct io_failure_record;
/* Bits for the extent state */
#define EXTENT_DIRTY (1U << 0)
#define EXTENT_UPTODATE (1U << 1)
#define EXTENT_LOCKED (1U << 2)
#define EXTENT_NEW (1U << 3)
#define EXTENT_DELALLOC (1U << 4)
#define EXTENT_DEFRAG (1U << 5)
#define EXTENT_BOUNDARY (1U << 6)
#define EXTENT_NODATASUM (1U << 7)
#define EXTENT_CLEAR_META_RESV (1U << 8)
#define EXTENT_NEED_WAIT (1U << 9)
#define EXTENT_DAMAGED (1U << 10)
#define EXTENT_NORESERVE (1U << 11)
#define EXTENT_QGROUP_RESERVED (1U << 12)
#define EXTENT_CLEAR_DATA_RESV (1U << 13)
#define EXTENT_DELALLOC_NEW (1U << 14)
#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
EXTENT_CLEAR_DATA_RESV)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING)
/*
* Redefined bits above which are used only in the device allocation tree,
* shouldn't be using EXTENT_LOCKED / EXTENT_BOUNDARY / EXTENT_CLEAR_META_RESV
* / EXTENT_CLEAR_DATA_RESV because they have special meaning to the bit
* manipulation functions
*/
#define CHUNK_ALLOCATED EXTENT_DIRTY
#define CHUNK_TRIMMED EXTENT_DEFRAG
enum {
IO_TREE_FS_INFO_FREED_EXTENTS0,
IO_TREE_FS_INFO_FREED_EXTENTS1,
IO_TREE_INODE_IO,
IO_TREE_INODE_IO_FAILURE,
IO_TREE_RELOC_BLOCKS,
IO_TREE_TRANS_DIRTY_PAGES,
IO_TREE_ROOT_DIRTY_LOG_PAGES,
IO_TREE_SELFTEST,
};
struct extent_io_tree {
struct rb_root state;
struct btrfs_fs_info *fs_info;
void *private_data;
u64 dirty_bytes;
bool track_uptodate;
/* Who owns this io tree, should be one of IO_TREE_* */
u8 owner;
spinlock_t lock;
const struct extent_io_ops *ops;
};
struct extent_state {
u64 start;
u64 end; /* inclusive */
struct rb_node rb_node;
/* ADD NEW ELEMENTS AFTER THIS */
wait_queue_head_t wq;
refcount_t refs;
unsigned state;
struct io_failure_record *failrec;
#ifdef CONFIG_BTRFS_DEBUG
struct list_head leak_list;
#endif
};
int __init extent_state_cache_init(void);
void __cold extent_state_cache_exit(void);
void extent_io_tree_init(struct btrfs_fs_info *fs_info,
struct extent_io_tree *tree, unsigned int owner,
void *private_data);
void extent_io_tree_release(struct extent_io_tree *tree);
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached);
static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
{
return lock_extent_bits(tree, start, end, NULL);
}
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int __init extent_io_init(void);
void __cold extent_io_exit(void);
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end,
u64 max_bytes, unsigned bits, int contig);
void free_extent_state(struct extent_state *state);
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int filled,
struct extent_state *cached_state);
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, struct extent_changeset *changeset);
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete,
struct extent_state **cached);
int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete,
struct extent_state **cached, gfp_t mask,
struct extent_changeset *changeset);
static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
{
return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL);
}
static inline int unlock_extent_cached(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached)
{
return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
GFP_NOFS, NULL);
}
static inline int unlock_extent_cached_atomic(struct extent_io_tree *tree,
u64 start, u64 end, struct extent_state **cached)
{
return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
GFP_ATOMIC, NULL);
}
static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
u64 end, unsigned bits)
{
int wake = 0;
if (bits & EXTENT_LOCKED)
wake = 1;
return clear_extent_bit(tree, start, end, bits, wake, 0, NULL);
}
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, struct extent_changeset *changeset);
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, u64 *failed_start,
struct extent_state **cached_state, gfp_t mask);
int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits);
static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
u64 end, unsigned bits)
{
return set_extent_bit(tree, start, end, bits, NULL, NULL, GFP_NOFS);
}
static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached_state)
{
return __clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
cached_state, GFP_NOFS, NULL);
}
static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start,
u64 end, gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL,
NULL, mask);
}
static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached)
{
return clear_extent_bit(tree, start, end,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING, 0, 0, cached);
}
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, unsigned clear_bits,
struct extent_state **cached_state);
static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start,
u64 end, unsigned int extra_bits,
struct extent_state **cached_state)
{
return set_extent_bit(tree, start, end,
EXTENT_DELALLOC | EXTENT_UPTODATE | extra_bits,
NULL, cached_state, GFP_NOFS);
}
static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached_state)
{
return set_extent_bit(tree, start, end,
EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
NULL, cached_state, GFP_NOFS);
}
static inline int set_extent_new(struct extent_io_tree *tree, u64 start,
u64 end)
{
return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, NULL,
GFP_NOFS);
}
static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached_state, gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
cached_state, mask);
}
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned bits,
struct extent_state **cached_state);
void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned bits);
int extent_invalidatepage(struct extent_io_tree *tree,
struct page *page, unsigned long offset);
bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
u64 *end, u64 max_bytes,
struct extent_state **cached_state);
/* This should be reworked in the future and put elsewhere. */
int get_state_failrec(struct extent_io_tree *tree, u64 start,
struct io_failure_record **failrec);
int set_state_failrec(struct extent_io_tree *tree, u64 start,
struct io_failure_record *failrec);
void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start,
u64 end);
int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
struct io_failure_record **failrec_ret);
int free_io_failure(struct extent_io_tree *failure_tree,
struct extent_io_tree *io_tree,
struct io_failure_record *rec);
int clean_io_failure(struct btrfs_fs_info *fs_info,
struct extent_io_tree *failure_tree,
struct extent_io_tree *io_tree, u64 start,
struct page *page, u64 ino, unsigned int pg_offset);
#endif /* BTRFS_EXTENT_IO_TREE_H */
......@@ -54,7 +54,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
static int find_next_key(struct btrfs_path *path, int level,
struct btrfs_key *key);
static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
{
return (cache->flags & bits) == bits;
}
......@@ -70,13 +70,13 @@ int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
return 0;
}
void btrfs_free_excluded_extents(struct btrfs_block_group_cache *cache)
void btrfs_free_excluded_extents(struct btrfs_block_group *cache)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
u64 start, end;
start = cache->key.objectid;
end = start + cache->key.offset - 1;
start = cache->start;
end = start + cache->length - 1;
clear_extent_bits(&fs_info->freed_extents[0],
start, end, EXTENT_UPTODATE);
......@@ -1306,8 +1306,10 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
u64 num_bytes, u64 *actual_bytes)
{
int ret;
int ret = 0;
u64 discarded_bytes = 0;
u64 end = bytenr + num_bytes;
u64 cur = bytenr;
struct btrfs_bio *bbio = NULL;
......@@ -1316,15 +1318,23 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
* associated to its stripes that don't go away while we are discarding.
*/
btrfs_bio_counter_inc_blocked(fs_info);
/* Tell the block device(s) that the sectors can be discarded */
ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, bytenr, &num_bytes,
&bbio, 0);
/* Error condition is -ENOMEM */
if (!ret) {
struct btrfs_bio_stripe *stripe = bbio->stripes;
while (cur < end) {
struct btrfs_bio_stripe *stripe;
int i;
num_bytes = end - cur;
/* Tell the block device(s) that the sectors can be discarded */
ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, cur,
&num_bytes, &bbio, 0);
/*
* Error can be -ENOMEM, -ENOENT (no such chunk mapping) or
* -EOPNOTSUPP. For any such error, @num_bytes is not updated,
* thus we can't continue anyway.
*/
if (ret < 0)
goto out;
stripe = bbio->stripes;
for (i = 0; i < bbio->num_stripes; i++, stripe++) {
u64 bytes;
struct request_queue *req_q;
......@@ -1341,10 +1351,19 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
stripe->physical,
stripe->length,
&bytes);
if (!ret)
if (!ret) {
discarded_bytes += bytes;
else if (ret != -EOPNOTSUPP)
break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */
} else if (ret != -EOPNOTSUPP) {
/*
* Logic errors or -ENOMEM, or -EIO, but
* unlikely to happen.
*
* And since there are two loops, explicitly
* go to out to avoid confusion.
*/
btrfs_put_bbio(bbio);
goto out;
}
/*
* Just in case we get back EOPNOTSUPP for some reason,
......@@ -1354,7 +1373,9 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
ret = 0;
}
btrfs_put_bbio(bbio);
cur += num_bytes;
}
out:
btrfs_bio_counter_dec(fs_info);
if (actual_bytes)
......@@ -2516,7 +2537,7 @@ int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)
{
struct btrfs_block_group_cache *block_group;
struct btrfs_block_group *block_group;
int readonly = 0;
block_group = btrfs_lookup_block_group(fs_info, bytenr);
......@@ -2546,7 +2567,7 @@ static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
{
struct btrfs_block_group_cache *cache;
struct btrfs_block_group *cache;
u64 bytenr;
spin_lock(&fs_info->block_group_cache_lock);
......@@ -2560,13 +2581,13 @@ static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
if (!cache)
return 0;
bytenr = cache->key.objectid;
bytenr = cache->start;
btrfs_put_block_group(cache);
return bytenr;
}
static int pin_down_extent(struct btrfs_block_group_cache *cache,
static int pin_down_extent(struct btrfs_block_group *cache,
u64 bytenr, u64 num_bytes, int reserved)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
......@@ -2590,13 +2611,12 @@ static int pin_down_extent(struct btrfs_block_group_cache *cache,
return 0;
}
/*
* this function must be called within transaction
*/
int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes, int reserved)
{
struct btrfs_block_group_cache *cache;
struct btrfs_block_group *cache;
ASSERT(fs_info->running_transaction);
cache = btrfs_lookup_block_group(fs_info, bytenr);
BUG_ON(!cache); /* Logic error */
......@@ -2613,7 +2633,7 @@ int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes)
{
struct btrfs_block_group_cache *cache;
struct btrfs_block_group *cache;
int ret;
cache = btrfs_lookup_block_group(fs_info, bytenr);
......@@ -2640,7 +2660,7 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
u64 start, u64 num_bytes)
{
int ret;
struct btrfs_block_group_cache *block_group;
struct btrfs_block_group *block_group;
struct btrfs_caching_control *caching_ctl;
block_group = btrfs_lookup_block_group(fs_info, start);
......@@ -2652,7 +2672,7 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
if (!caching_ctl) {
/* Logic error */
BUG_ON(!btrfs_block_group_cache_done(block_group));
BUG_ON(!btrfs_block_group_done(block_group));
ret = btrfs_remove_free_space(block_group, start, num_bytes);
} else {
mutex_lock(&caching_ctl->mutex);
......@@ -2717,7 +2737,7 @@ int btrfs_exclude_logged_extents(struct extent_buffer *eb)
}
static void
btrfs_inc_block_group_reservations(struct btrfs_block_group_cache *bg)
btrfs_inc_block_group_reservations(struct btrfs_block_group *bg)
{
atomic_inc(&bg->reservations);
}
......@@ -2726,14 +2746,14 @@ void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
{
struct btrfs_caching_control *next;
struct btrfs_caching_control *caching_ctl;
struct btrfs_block_group_cache *cache;
struct btrfs_block_group *cache;
down_write(&fs_info->commit_root_sem);
list_for_each_entry_safe(caching_ctl, next,
&fs_info->caching_block_groups, list) {
cache = caching_ctl->block_group;
if (btrfs_block_group_cache_done(cache)) {
if (btrfs_block_group_done(cache)) {
cache->last_byte_to_unpin = (u64)-1;
list_del_init(&caching_ctl->list);
btrfs_put_caching_control(caching_ctl);
......@@ -2785,7 +2805,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
u64 start, u64 end,
const bool return_free_space)
{
struct btrfs_block_group_cache *cache = NULL;
struct btrfs_block_group *cache = NULL;
struct btrfs_space_info *space_info;
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
struct btrfs_free_cluster *cluster = NULL;
......@@ -2797,7 +2817,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
while (start <= end) {
readonly = false;
if (!cache ||
start >= cache->key.objectid + cache->key.offset) {
start >= cache->start + cache->length) {
if (cache)
btrfs_put_block_group(cache);
total_unpinned = 0;
......@@ -2810,7 +2830,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
empty_cluster <<= 1;
}
len = cache->key.objectid + cache->key.offset - start;
len = cache->start + cache->length - start;
len = min(len, end + 1 - start);
if (start < cache->last_byte_to_unpin) {
......@@ -2880,7 +2900,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_block_group_cache *block_group, *tmp;
struct btrfs_block_group *block_group, *tmp;
struct list_head *deleted_bgs;
struct extent_io_tree *unpin;
u64 start;
......@@ -2926,8 +2946,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
ret = -EROFS;
if (!trans->aborted)
ret = btrfs_discard_extent(fs_info,
block_group->key.objectid,
block_group->key.offset,
block_group->start,
block_group->length,
&trimmed);
list_del_init(&block_group->bg_list);
......@@ -3262,7 +3282,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
}
if (last_ref && btrfs_header_generation(buf) == trans->transid) {
struct btrfs_block_group_cache *cache;
struct btrfs_block_group *cache;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
ret = check_ref_cleanup(trans, buf->start);
......@@ -3349,15 +3369,14 @@ enum btrfs_loop_type {
};
static inline void
btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
btrfs_lock_block_group(struct btrfs_block_group *cache,
int delalloc)
{
if (delalloc)
down_read(&cache->data_rwsem);
}
static inline void
btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
static inline void btrfs_grab_block_group(struct btrfs_block_group *cache,
int delalloc)
{
btrfs_get_block_group(cache);
......@@ -3365,12 +3384,12 @@ btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
down_read(&cache->data_rwsem);
}
static struct btrfs_block_group_cache *
btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
static struct btrfs_block_group *btrfs_lock_cluster(
struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster,
int delalloc)
{
struct btrfs_block_group_cache *used_bg = NULL;
struct btrfs_block_group *used_bg = NULL;
spin_lock(&cluster->refill_lock);
while (1) {
......@@ -3404,7 +3423,7 @@ btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
}
static inline void
btrfs_release_block_group(struct btrfs_block_group_cache *cache,
btrfs_release_block_group(struct btrfs_block_group *cache,
int delalloc)
{
if (delalloc)
......@@ -3475,12 +3494,12 @@ struct find_free_extent_ctl {
* Return >0 to inform caller that we find nothing
* Return 0 means we have found a location and set ffe_ctl->found_offset.
*/
static int find_free_extent_clustered(struct btrfs_block_group_cache *bg,
static int find_free_extent_clustered(struct btrfs_block_group *bg,
struct btrfs_free_cluster *last_ptr,
struct find_free_extent_ctl *ffe_ctl,
struct btrfs_block_group_cache **cluster_bg_ret)
struct btrfs_block_group **cluster_bg_ret)
{
struct btrfs_block_group_cache *cluster_bg;
struct btrfs_block_group *cluster_bg;
u64 aligned_cluster;
u64 offset;
int ret;
......@@ -3493,7 +3512,7 @@ static int find_free_extent_clustered(struct btrfs_block_group_cache *bg,
goto release_cluster;
offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
ffe_ctl->num_bytes, cluster_bg->key.objectid,
ffe_ctl->num_bytes, cluster_bg->start,
&ffe_ctl->max_extent_size);
if (offset) {
/* We have a block, we're done */
......@@ -3579,7 +3598,7 @@ static int find_free_extent_clustered(struct btrfs_block_group_cache *bg,
* Return 0 when we found an free extent and set ffe_ctrl->found_offset
* Return -EAGAIN to inform caller that we need to re-search this block group
*/
static int find_free_extent_unclustered(struct btrfs_block_group_cache *bg,
static int find_free_extent_unclustered(struct btrfs_block_group *bg,
struct btrfs_free_cluster *last_ptr,
struct find_free_extent_ctl *ffe_ctl)
{
......@@ -3781,7 +3800,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
{
int ret = 0;
struct btrfs_free_cluster *last_ptr = NULL;
struct btrfs_block_group_cache *block_group = NULL;
struct btrfs_block_group *block_group = NULL;
struct find_free_extent_ctl ffe_ctl = {0};
struct btrfs_space_info *space_info;
bool use_cluster = true;
......@@ -3904,7 +3923,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
continue;
btrfs_grab_block_group(block_group, delalloc);
ffe_ctl.search_start = block_group->key.objectid;
ffe_ctl.search_start = block_group->start;
/*
* this can happen if we end up cycling through all the
......@@ -3935,7 +3954,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
}
have_block_group:
ffe_ctl.cached = btrfs_block_group_cache_done(block_group);
ffe_ctl.cached = btrfs_block_group_done(block_group);
if (unlikely(!ffe_ctl.cached)) {
ffe_ctl.have_caching_bg = true;
ret = btrfs_cache_block_group(block_group, 0);
......@@ -3951,7 +3970,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
* lets look there
*/
if (last_ptr && use_cluster) {
struct btrfs_block_group_cache *cluster_bg = NULL;
struct btrfs_block_group *cluster_bg = NULL;
ret = find_free_extent_clustered(block_group, last_ptr,
&ffe_ctl, &cluster_bg);
......@@ -3984,7 +4003,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
/* move on to the next group */
if (ffe_ctl.search_start + num_bytes >
block_group->key.objectid + block_group->key.offset) {
block_group->start + block_group->length) {
btrfs_add_free_space(block_group, ffe_ctl.found_offset,
num_bytes);
goto loop;
......@@ -4133,7 +4152,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
u64 start, u64 len,
int pin, int delalloc)
{
struct btrfs_block_group_cache *cache;
struct btrfs_block_group *cache;
int ret = 0;
cache = btrfs_lookup_block_group(fs_info, start);
......@@ -4366,7 +4385,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
struct btrfs_block_group_cache *block_group;
struct btrfs_block_group *block_group;
struct btrfs_space_info *space_info;
/*
......@@ -5436,7 +5455,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
btrfs_assert_tree_locked(parent);
parent_level = btrfs_header_level(parent);
extent_buffer_get(parent);
atomic_inc(&parent->refs);
path->nodes[parent_level] = parent;
path->slots[parent_level] = btrfs_header_nritems(parent);
......@@ -5480,7 +5499,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
*/
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
{
struct btrfs_block_group_cache *block_group;
struct btrfs_block_group *block_group;
u64 free_bytes = 0;
int factor;
......@@ -5498,9 +5517,8 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
}
factor = btrfs_bg_type_to_factor(block_group->flags);
free_bytes += (block_group->key.offset -
btrfs_block_group_used(&block_group->item)) *
factor;
free_bytes += (block_group->length -
block_group->used) * factor;
spin_unlock(&block_group->lock);
}
......@@ -5623,7 +5641,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
*/
int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
{
struct btrfs_block_group_cache *cache = NULL;
struct btrfs_block_group *cache = NULL;
struct btrfs_device *device;
struct list_head *devices;
u64 group_trimmed;
......@@ -5647,16 +5665,16 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
cache = btrfs_lookup_first_block_group(fs_info, range->start);
for (; cache; cache = btrfs_next_block_group(cache)) {
if (cache->key.objectid >= range_end) {
if (cache->start >= range_end) {
btrfs_put_block_group(cache);
break;
}
start = max(range->start, cache->key.objectid);
end = min(range_end, cache->key.objectid + cache->key.offset);
start = max(range->start, cache->start);
end = min(range_end, cache->start + cache->length);
if (end - start >= range->minlen) {
if (!btrfs_block_group_cache_done(cache)) {
if (!btrfs_block_group_done(cache)) {
ret = btrfs_cache_block_group(cache, 0);
if (ret) {
bg_failed++;
......
......@@ -14,6 +14,7 @@
#include <linux/prefetch.h>
#include <linux/cleancache.h>
#include "extent_io.h"
#include "extent-io-tree.h"
#include "extent_map.h"
#include "ctree.h"
#include "btrfs_inode.h"
......@@ -59,12 +60,23 @@ void btrfs_leak_debug_del(struct list_head *entry)
spin_unlock_irqrestore(&leak_lock, flags);
}
static inline
void btrfs_leak_debug_check(void)
static inline void btrfs_extent_buffer_leak_debug_check(void)
{
struct extent_state *state;
struct extent_buffer *eb;
while (!list_empty(&buffers)) {
eb = list_entry(buffers.next, struct extent_buffer, leak_list);
pr_err("BTRFS: buffer leak start %llu len %lu refs %d bflags %lu\n",
eb->start, eb->len, atomic_read(&eb->refs), eb->bflags);
list_del(&eb->leak_list);
kmem_cache_free(extent_buffer_cache, eb);
}
}
static inline void btrfs_extent_state_leak_debug_check(void)
{
struct extent_state *state;
while (!list_empty(&states)) {
state = list_entry(states.next, struct extent_state, leak_list);
pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
......@@ -74,14 +86,6 @@ void btrfs_leak_debug_check(void)
list_del(&state->leak_list);
kmem_cache_free(extent_state_cache, state);
}
while (!list_empty(&buffers)) {
eb = list_entry(buffers.next, struct extent_buffer, leak_list);
pr_err("BTRFS: buffer leak start %llu len %lu refs %d bflags %lu\n",
eb->start, eb->len, atomic_read(&eb->refs), eb->bflags);
list_del(&eb->leak_list);
kmem_cache_free(extent_buffer_cache, eb);
}
}
#define btrfs_debug_check_extent_io_range(tree, start, end) \
......@@ -105,7 +109,8 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
#else
#define btrfs_leak_debug_add(new, head) do {} while (0)
#define btrfs_leak_debug_del(entry) do {} while (0)
#define btrfs_leak_debug_check() do {} while (0)
#define btrfs_extent_buffer_leak_debug_check() do {} while (0)
#define btrfs_extent_state_leak_debug_check() do {} while (0)
#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
#endif
......@@ -196,19 +201,23 @@ static int __must_check flush_write_bio(struct extent_page_data *epd)
return ret;
}
int __init extent_io_init(void)
int __init extent_state_cache_init(void)
{
extent_state_cache = kmem_cache_create("btrfs_extent_state",
sizeof(struct extent_state), 0,
SLAB_MEM_SPREAD, NULL);
if (!extent_state_cache)
return -ENOMEM;
return 0;
}
int __init extent_io_init(void)
{
extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
sizeof(struct extent_buffer), 0,
SLAB_MEM_SPREAD, NULL);
if (!extent_buffer_cache)
goto free_state_cache;
return -ENOMEM;
if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
offsetof(struct btrfs_io_bio, bio),
......@@ -226,23 +235,24 @@ int __init extent_io_init(void)
free_buffer_cache:
kmem_cache_destroy(extent_buffer_cache);
extent_buffer_cache = NULL;
return -ENOMEM;
}
free_state_cache:
void __cold extent_state_cache_exit(void)
{
btrfs_extent_state_leak_debug_check();
kmem_cache_destroy(extent_state_cache);
extent_state_cache = NULL;
return -ENOMEM;
}
void __cold extent_io_exit(void)
{
btrfs_leak_debug_check();
btrfs_extent_buffer_leak_debug_check();
/*
* Make sure all delayed rcu free are flushed before we
* destroy caches.
*/
rcu_barrier();
kmem_cache_destroy(extent_state_cache);
kmem_cache_destroy(extent_buffer_cache);
bioset_exit(&btrfs_bioset);
}
......@@ -1676,9 +1686,9 @@ void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
*
* true is returned if we find something, false if nothing was in the tree
*/
static noinline bool find_delalloc_range(struct extent_io_tree *tree,
u64 *start, u64 *end, u64 max_bytes,
struct extent_state **cached_state)
bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
u64 *end, u64 max_bytes,
struct extent_state **cached_state)
{
struct rb_node *node;
struct extent_state *state;
......@@ -1796,8 +1806,8 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
/* step one, find a bunch of delalloc bytes starting at start */
delalloc_start = *start;
delalloc_end = 0;
found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
max_bytes, &cached_state);
found = btrfs_find_delalloc_range(tree, &delalloc_start, &delalloc_end,
max_bytes, &cached_state);
if (!found || delalloc_end <= *start) {
*start = delalloc_start;
*end = delalloc_end;
......@@ -1899,7 +1909,7 @@ static int __process_pages_contig(struct address_space *mapping,
if (page_ops & PAGE_SET_PRIVATE2)
SetPagePrivate2(pages[i]);
if (pages[i] == locked_page) {
if (locked_page && pages[i] == locked_page) {
put_page(pages[i]);
pages_locked++;
continue;
......@@ -2014,8 +2024,8 @@ u64 count_range_bits(struct extent_io_tree *tree,
* set the private field for a given byte offset in the tree. If there isn't
* an extent_state there already, this does nothing.
*/
static noinline int set_state_failrec(struct extent_io_tree *tree, u64 start,
struct io_failure_record *failrec)
int set_state_failrec(struct extent_io_tree *tree, u64 start,
struct io_failure_record *failrec)
{
struct rb_node *node;
struct extent_state *state;
......@@ -2042,8 +2052,8 @@ static noinline int set_state_failrec(struct extent_io_tree *tree, u64 start,
return ret;
}
static noinline int get_state_failrec(struct extent_io_tree *tree, u64 start,
struct io_failure_record **failrec)
int get_state_failrec(struct extent_io_tree *tree, u64 start,
struct io_failure_record **failrec)
{
struct rb_node *node;
struct extent_state *state;
......@@ -2534,7 +2544,6 @@ struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
bio = btrfs_io_bio_alloc(1);
bio->bi_end_io = endio_func;
bio->bi_iter.bi_sector = failrec->logical >> 9;
bio_set_dev(bio, fs_info->fs_devices->latest_bdev);
bio->bi_iter.bi_size = 0;
bio->bi_private = data;
......@@ -2920,7 +2929,6 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
* a contiguous page to the previous one
* @size: portion of page that we want to write
* @offset: starting offset in the page
* @bdev: attach newly created bios to this bdev
* @bio_ret: must be valid pointer, newly allocated bio will be stored there
* @end_io_func: end_io callback for new bio
* @mirror_num: desired mirror to read/write
......@@ -2931,7 +2939,6 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
struct writeback_control *wbc,
struct page *page, u64 offset,
size_t size, unsigned long pg_offset,
struct block_device *bdev,
struct bio **bio_ret,
bio_end_io_t end_io_func,
int mirror_num,
......@@ -2977,13 +2984,16 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
}
bio = btrfs_bio_alloc(offset);
bio_set_dev(bio, bdev);
bio_add_page(bio, page, page_size, pg_offset);
bio->bi_end_io = end_io_func;
bio->bi_private = tree;
bio->bi_write_hint = page->mapping->host->i_write_hint;
bio->bi_opf = opf;
if (wbc) {
struct block_device *bdev;
bdev = BTRFS_I(page->mapping->host)->root->fs_info->fs_devices->latest_bdev;
bio_set_dev(bio, bdev);
wbc_init_bio(wbc, bio);
wbc_account_cgroup_owner(wbc, page, page_size);
}
......@@ -3065,7 +3075,6 @@ static int __do_readpage(struct extent_io_tree *tree,
u64 block_start;
u64 cur_end;
struct extent_map *em;
struct block_device *bdev;
int ret = 0;
int nr = 0;
size_t pg_offset = 0;
......@@ -3142,7 +3151,6 @@ static int __do_readpage(struct extent_io_tree *tree,
offset = em->block_start + extent_offset;
disk_io_size = iosize;
}
bdev = em->bdev;
block_start = em->block_start;
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
block_start = EXTENT_MAP_HOLE;
......@@ -3232,7 +3240,7 @@ static int __do_readpage(struct extent_io_tree *tree,
ret = submit_extent_page(REQ_OP_READ | read_flags, tree, NULL,
page, offset, disk_io_size,
pg_offset, bdev, bio,
pg_offset, bio,
end_bio_extent_readpage, mirror_num,
*bio_flags,
this_bio_flag,
......@@ -3409,7 +3417,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
struct extent_page_data *epd,
loff_t i_size,
unsigned long nr_written,
unsigned int write_flags, int *nr_ret)
int *nr_ret)
{
struct extent_io_tree *tree = epd->tree;
u64 start = page_offset(page);
......@@ -3420,11 +3428,11 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
u64 block_start;
u64 iosize;
struct extent_map *em;
struct block_device *bdev;
size_t pg_offset = 0;
size_t blocksize;
int ret = 0;
int nr = 0;
const unsigned int write_flags = wbc_to_write_flags(wbc);
bool compressed;
ret = btrfs_writepage_cow_fixup(page, start, page_end);
......@@ -3478,7 +3486,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
iosize = min(em_end - cur, end - cur + 1);
iosize = ALIGN(iosize, blocksize);
offset = em->block_start + extent_offset;
bdev = em->bdev;
block_start = em->block_start;
compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
free_extent_map(em);
......@@ -3520,7 +3527,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
page, offset, iosize, pg_offset,
bdev, &epd->bio,
&epd->bio,
end_bio_extent_writepage,
0, 0, 0, false);
if (ret) {
......@@ -3558,11 +3565,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
size_t pg_offset = 0;
loff_t i_size = i_size_read(inode);
unsigned long end_index = i_size >> PAGE_SHIFT;
unsigned int write_flags = 0;
unsigned long nr_written = 0;
write_flags = wbc_to_write_flags(wbc);
trace___extent_writepage(page, inode, wbc);
WARN_ON(!PageLocked(page));
......@@ -3600,7 +3604,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
}
ret = __extent_writepage_io(inode, page, wbc, epd,
i_size, nr_written, write_flags, &nr);
i_size, nr_written, &nr);
if (ret == 1)
goto done_unlocked;
......@@ -3849,7 +3853,6 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
struct extent_page_data *epd)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
struct block_device *bdev = fs_info->fs_devices->latest_bdev;
struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
u64 offset = eb->start;
u32 nritems;
......@@ -3884,7 +3887,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
clear_page_dirty_for_io(p);
set_page_writeback(p);
ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
p, offset, PAGE_SIZE, 0, bdev,
p, offset, PAGE_SIZE, 0,
&epd->bio,
end_bio_extent_buffer_writepage,
0, 0, 0, false);
......@@ -4121,7 +4124,7 @@ static int extent_write_cache_pages(struct address_space *mapping,
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
done_index = page->index;
done_index = page->index + 1;
/*
* At this point we hold neither the i_pages lock nor
* the page lock: the page may be truncated or
......@@ -4156,16 +4159,6 @@ static int extent_write_cache_pages(struct address_space *mapping,
ret = __extent_writepage(page, wbc, epd);
if (ret < 0) {
/*
* done_index is set past this page,
* so media errors will not choke
* background writeout for the entire
* file. This has consequences for
* range_cyclic semantics (ie. it may
* not be suitable for data integrity
* writeout).
*/
done_index = page->index + 1;
done = 1;
break;
}
......@@ -4240,8 +4233,12 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
.nr_to_write = nr_pages * 2,
.range_start = start,
.range_end = end + 1,
/* We're called from an async helper function */
.punt_to_cgroup = 1,
.no_cgroup_owner = 1,
};
wbc_attach_fdatawrite_inode(&wbc_writepages, inode);
while (start <= end) {
page = find_get_page(mapping, start >> PAGE_SHIFT);
if (clear_page_dirty_for_io(page))
......@@ -4256,11 +4253,12 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
}
ASSERT(ret <= 0);
if (ret < 0) {
if (ret == 0)
ret = flush_write_bio(&epd);
else
end_write_bio(&epd, ret);
return ret;
}
ret = flush_write_bio(&epd);
wbc_detach_inode(&wbc_writepages);
return ret;
}
......
......@@ -7,35 +7,6 @@
#include <linux/refcount.h>
#include "ulist.h"
/* bits for the extent state */
#define EXTENT_DIRTY (1U << 0)
#define EXTENT_UPTODATE (1U << 1)
#define EXTENT_LOCKED (1U << 2)
#define EXTENT_NEW (1U << 3)
#define EXTENT_DELALLOC (1U << 4)
#define EXTENT_DEFRAG (1U << 5)
#define EXTENT_BOUNDARY (1U << 6)
#define EXTENT_NODATASUM (1U << 7)
#define EXTENT_CLEAR_META_RESV (1U << 8)
#define EXTENT_NEED_WAIT (1U << 9)
#define EXTENT_DAMAGED (1U << 10)
#define EXTENT_NORESERVE (1U << 11)
#define EXTENT_QGROUP_RESERVED (1U << 12)
#define EXTENT_CLEAR_DATA_RESV (1U << 13)
#define EXTENT_DELALLOC_NEW (1U << 14)
#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
EXTENT_CLEAR_DATA_RESV)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING)
/*
* Redefined bits above which are used only in the device allocation tree,
* shouldn't be using EXTENT_LOCKED / EXTENT_BOUNDARY / EXTENT_CLEAR_META_RESV
* / EXTENT_CLEAR_DATA_RESV because they have special meaning to the bit
* manipulation functions
*/
#define CHUNK_ALLOCATED EXTENT_DIRTY
#define CHUNK_TRIMMED EXTENT_DEFRAG
/*
* flags for bio submission. The high bits indicate the compression
* type for this bio
......@@ -89,12 +60,11 @@ enum {
#define BITMAP_LAST_BYTE_MASK(nbits) \
(BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
struct extent_state;
struct btrfs_root;
struct btrfs_inode;
struct btrfs_io_bio;
struct io_failure_record;
struct extent_io_tree;
typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
struct bio *bio, u64 bio_offset);
......@@ -111,47 +81,6 @@ struct extent_io_ops {
int mirror);
};
enum {
IO_TREE_FS_INFO_FREED_EXTENTS0,
IO_TREE_FS_INFO_FREED_EXTENTS1,
IO_TREE_INODE_IO,
IO_TREE_INODE_IO_FAILURE,
IO_TREE_RELOC_BLOCKS,
IO_TREE_TRANS_DIRTY_PAGES,
IO_TREE_ROOT_DIRTY_LOG_PAGES,
IO_TREE_SELFTEST,
};
struct extent_io_tree {
struct rb_root state;
struct btrfs_fs_info *fs_info;
void *private_data;
u64 dirty_bytes;
bool track_uptodate;
/* Who owns this io tree, should be one of IO_TREE_* */
u8 owner;
spinlock_t lock;
const struct extent_io_ops *ops;
};
struct extent_state {
u64 start;
u64 end; /* inclusive */
struct rb_node rb_node;
/* ADD NEW ELEMENTS AFTER THIS */
wait_queue_head_t wq;
refcount_t refs;
unsigned state;
struct io_failure_record *failrec;
#ifdef CONFIG_BTRFS_DEBUG
struct list_head leak_list;
#endif
};
#define INLINE_EXTENT_BUFFER_PAGES 16
#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_SIZE)
......@@ -259,152 +188,11 @@ typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
u64 start, u64 len,
int create);
void extent_io_tree_init(struct btrfs_fs_info *fs_info,
struct extent_io_tree *tree, unsigned int owner,
void *private_data);
void extent_io_tree_release(struct extent_io_tree *tree);
int try_release_extent_mapping(struct page *page, gfp_t mask);
int try_release_extent_buffer(struct page *page);
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached);
static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
{
return lock_extent_bits(tree, start, end, NULL);
}
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent, int mirror_num);
int __init extent_io_init(void);
void __cold extent_io_exit(void);
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end,
u64 max_bytes, unsigned bits, int contig);
void free_extent_state(struct extent_state *state);
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int filled,
struct extent_state *cached_state);
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, struct extent_changeset *changeset);
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete,
struct extent_state **cached);
int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete,
struct extent_state **cached, gfp_t mask,
struct extent_changeset *changeset);
static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
{
return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL);
}
static inline int unlock_extent_cached(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached)
{
return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
GFP_NOFS, NULL);
}
static inline int unlock_extent_cached_atomic(struct extent_io_tree *tree,
u64 start, u64 end, struct extent_state **cached)
{
return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
GFP_ATOMIC, NULL);
}
static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
u64 end, unsigned bits)
{
int wake = 0;
if (bits & EXTENT_LOCKED)
wake = 1;
return clear_extent_bit(tree, start, end, bits, wake, 0, NULL);
}
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, struct extent_changeset *changeset);
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, u64 *failed_start,
struct extent_state **cached_state, gfp_t mask);
int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits);
static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
u64 end, unsigned bits)
{
return set_extent_bit(tree, start, end, bits, NULL, NULL, GFP_NOFS);
}
static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached_state)
{
return __clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
cached_state, GFP_NOFS, NULL);
}
static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start,
u64 end, gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL,
NULL, mask);
}
static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached)
{
return clear_extent_bit(tree, start, end,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING, 0, 0, cached);
}
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, unsigned clear_bits,
struct extent_state **cached_state);
static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start,
u64 end, unsigned int extra_bits,
struct extent_state **cached_state)
{
return set_extent_bit(tree, start, end,
EXTENT_DELALLOC | EXTENT_UPTODATE | extra_bits,
NULL, cached_state, GFP_NOFS);
}
static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached_state)
{
return set_extent_bit(tree, start, end,
EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
NULL, cached_state, GFP_NOFS);
}
static inline int set_extent_new(struct extent_io_tree *tree, u64 start,
u64 end)
{
return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, NULL,
GFP_NOFS);
}
static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached_state, gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
cached_state, mask);
}
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned bits,
struct extent_state **cached_state);
void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned bits);
int extent_invalidatepage(struct extent_io_tree *tree,
struct page *page, unsigned long offset);
int extent_write_full_page(struct page *page, struct writeback_control *wbc);
int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
int mode);
......@@ -442,11 +230,6 @@ static inline int num_extent_pages(const struct extent_buffer *eb)
(eb->start >> PAGE_SHIFT);
}
static inline void extent_buffer_get(struct extent_buffer *eb)
{
atomic_inc(&eb->refs);
}
static inline int extent_buffer_uptodate(struct extent_buffer *eb)
{
return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
......@@ -508,10 +291,6 @@ struct btrfs_inode;
int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
u64 length, u64 logical, struct page *page,
unsigned int pg_offset, int mirror_num);
int clean_io_failure(struct btrfs_fs_info *fs_info,
struct extent_io_tree *failure_tree,
struct extent_io_tree *io_tree, u64 start,
struct page *page, u64 ino, unsigned int pg_offset);
void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
int btrfs_repair_eb_io_failure(struct extent_buffer *eb, int mirror_num);
......@@ -535,19 +314,12 @@ struct io_failure_record {
};
void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start,
u64 end);
int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
struct io_failure_record **failrec_ret);
bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
struct io_failure_record *failrec, int fail_mirror);
struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
struct io_failure_record *failrec,
struct page *page, int pg_offset, int icsum,
bio_end_io_t *endio_func, void *data);
int free_io_failure(struct extent_io_tree *failure_tree,
struct extent_io_tree *io_tree,
struct io_failure_record *rec);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
bool find_lock_delalloc_range(struct inode *inode,
struct page *locked_page, u64 *start,
......@@ -555,5 +327,4 @@ bool find_lock_delalloc_range(struct inode *inode,
#endif
struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start);
#endif
......@@ -214,9 +214,13 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
ASSERT(next->block_start != EXTENT_MAP_DELALLOC &&
prev->block_start != EXTENT_MAP_DELALLOC);
if (prev->map_lookup || next->map_lookup)
ASSERT(test_bit(EXTENT_FLAG_FS_MAPPING, &prev->flags) &&
test_bit(EXTENT_FLAG_FS_MAPPING, &next->flags));
if (extent_map_end(prev) == next->start &&
prev->flags == next->flags &&
prev->bdev == next->bdev &&
prev->map_lookup == next->map_lookup &&
((next->block_start == EXTENT_MAP_HOLE &&
prev->block_start == EXTENT_MAP_HOLE) ||
(next->block_start == EXTENT_MAP_INLINE &&
......
......@@ -42,15 +42,8 @@ struct extent_map {
u64 block_len;
u64 generation;
unsigned long flags;
union {
struct block_device *bdev;
/*
* used for chunk mappings
* flags & EXTENT_FLAG_FS_MAPPING must be set
*/
struct map_lookup *map_lookup;
};
/* Used for chunk mappings, flag EXTENT_FLAG_FS_MAPPING must be set */
struct map_lookup *map_lookup;
refcount_t refs;
unsigned int compress_type;
struct list_head list;
......
......@@ -945,7 +945,6 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
u8 type = btrfs_file_extent_type(leaf, fi);
int compress_type = btrfs_file_extent_compression(leaf, fi);
em->bdev = fs_info->fs_devices->latest_bdev;
btrfs_item_key_to_cpu(leaf, &key, slot);
extent_start = key.offset;
......
......@@ -296,7 +296,7 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
key.objectid = defrag->ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
inode = btrfs_iget(fs_info->sb, &key, inode_root);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
goto cleanup;
......@@ -667,7 +667,6 @@ void btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end,
}
split->generation = gen;
split->bdev = em->bdev;
split->flags = flags;
split->compress_type = em->compress_type;
replace_extent_mapping(em_tree, em, split, modified);
......@@ -680,7 +679,6 @@ void btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end,
split->start = start + len;
split->len = em->start + em->len - (start + len);
split->bdev = em->bdev;
split->flags = flags;
split->compress_type = em->compress_type;
split->generation = gen;
......@@ -1636,6 +1634,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
break;
}
only_release_metadata = false;
sector_offset = pos & (fs_info->sectorsize - 1);
reserve_bytes = round_up(write_bytes + sector_offset,
fs_info->sectorsize);
......@@ -1791,7 +1790,6 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
lockend, EXTENT_NORESERVE, NULL,
NULL, GFP_NOFS);
only_release_metadata = false;
}
btrfs_drop_pages(pages, num_pages);
......@@ -1903,9 +1901,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
(iocb->ki_flags & IOCB_NOWAIT))
return -EOPNOTSUPP;
if (!inode_trylock(inode)) {
if (iocb->ki_flags & IOCB_NOWAIT)
if (iocb->ki_flags & IOCB_NOWAIT) {
if (!inode_trylock(inode))
return -EAGAIN;
} else {
inode_lock(inode);
}
......@@ -2359,7 +2358,6 @@ static int fill_holes(struct btrfs_trans_handle *trans,
hole_em->block_start = EXTENT_MAP_HOLE;
hole_em->block_len = 0;
hole_em->orig_block_len = 0;
hole_em->bdev = fs_info->fs_devices->latest_bdev;
hole_em->compress_type = BTRFS_COMPRESS_NONE;
hole_em->generation = trans->transid;
......@@ -3350,29 +3348,30 @@ static long btrfs_fallocate(struct file *file, int mode,
return ret;
}
static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
static loff_t find_desired_extent(struct inode *inode, loff_t offset,
int whence)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
loff_t i_size = inode->i_size;
u64 lockstart;
u64 lockend;
u64 start;
u64 len;
int ret = 0;
if (inode->i_size == 0)
if (i_size == 0 || offset >= i_size)
return -ENXIO;
/*
* *offset can be negative, in this case we start finding DATA/HOLE from
* offset can be negative, in this case we start finding DATA/HOLE from
* the very start of the file.
*/
start = max_t(loff_t, 0, *offset);
start = max_t(loff_t, 0, offset);
lockstart = round_down(start, fs_info->sectorsize);
lockend = round_up(i_size_read(inode),
fs_info->sectorsize);
lockend = round_up(i_size, fs_info->sectorsize);
if (lockend <= lockstart)
lockend = lockstart + fs_info->sectorsize;
lockend--;
......@@ -3381,7 +3380,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state);
while (start < inode->i_size) {
while (start < i_size) {
em = btrfs_get_extent_fiemap(BTRFS_I(inode), start, len);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
......@@ -3404,46 +3403,39 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
cond_resched();
}
free_extent_map(em);
if (!ret) {
if (whence == SEEK_DATA && start >= inode->i_size)
ret = -ENXIO;
else
*offset = min_t(loff_t, start, inode->i_size);
}
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state);
return ret;
if (ret) {
offset = ret;
} else {
if (whence == SEEK_DATA && start >= i_size)
offset = -ENXIO;
else
offset = min_t(loff_t, start, i_size);
}
return offset;
}
static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file->f_mapping->host;
int ret;
inode_lock(inode);
switch (whence) {
case SEEK_END:
case SEEK_CUR:
offset = generic_file_llseek(file, offset, whence);
goto out;
default:
return generic_file_llseek(file, offset, whence);
case SEEK_DATA:
case SEEK_HOLE:
if (offset >= i_size_read(inode)) {
inode_unlock(inode);
return -ENXIO;
}
ret = find_desired_extent(inode, &offset, whence);
if (ret) {
inode_unlock(inode);
return ret;
}
inode_lock_shared(inode);
offset = find_desired_extent(inode, offset, whence);
inode_unlock_shared(inode);
break;
}
offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
out:
inode_unlock(inode);
return offset;
if (offset < 0)
return offset;
return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
}
static int btrfs_file_open(struct inode *inode, struct file *filp)
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -16,14 +16,14 @@ struct btrfs_caching_control;
#define BTRFS_FREE_SPACE_BITMAP_SIZE 256
#define BTRFS_FREE_SPACE_BITMAP_BITS (BTRFS_FREE_SPACE_BITMAP_SIZE * BITS_PER_BYTE)
void set_free_space_tree_thresholds(struct btrfs_block_group_cache *block_group);
void set_free_space_tree_thresholds(struct btrfs_block_group *block_group);
int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info);
int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info);
int load_free_space_tree(struct btrfs_caching_control *caching_ctl);
int add_block_group_free_space(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group);
struct btrfs_block_group *block_group);
int remove_block_group_free_space(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group);
struct btrfs_block_group *block_group);
int add_to_free_space_tree(struct btrfs_trans_handle *trans,
u64 start, u64 size);
int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
......@@ -32,21 +32,21 @@ int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct btrfs_free_space_info *
search_free_space_info(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_block_group *block_group,
struct btrfs_path *path, int cow);
int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_block_group *block_group,
struct btrfs_path *path, u64 start, u64 size);
int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_block_group *block_group,
struct btrfs_path *path, u64 start, u64 size);
int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_block_group *block_group,
struct btrfs_path *path);
int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_block_group *block_group,
struct btrfs_path *path);
int free_space_test_bit(struct btrfs_block_group_cache *block_group,
int free_space_test_bit(struct btrfs_block_group *block_group,
struct btrfs_path *path, u64 offset);
#endif
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册