提交 31466f3e 编写于 作者: L Linus Torvalds

Merge tag 'for-4.16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "Features or user visible changes:

   - fallocate: implement zero range mode

   - avoid losing data raid profile when deleting a device

   - tree item checker: more checks for directory items and xattrs

  Notable fixes:

   - raid56 recovery: don't use cached stripes, that could be
     potentially changed and a later RMW or recovery would lead to
     corruptions or failures

   - let raid56 try harder to rebuild damaged data, reading from all
     stripes if necessary

   - fix scrub to repair raid56 in a similar way as in the case above

  Other:

   - cleanups: device freeing, removed some call indirections, redundant
     bio_put/_get, unused parameters, refactorings and renames

   - RCU list traversal fixups

   - simplify mount callchain, remove recursing back when mounting a
     subvolume

   - plug for fsync, may improve bio merging on multiple devices

   - compression heurisic: replace heap sort with radix sort, gains some
     performance

   - add extent map selftests, buffered write vs dio"

* tag 'for-4.16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (155 commits)
  btrfs: drop devid as device_list_add() arg
  btrfs: get device pointer from device_list_add()
  btrfs: set the total_devices in device_list_add()
  btrfs: move pr_info into device_list_add
  btrfs: make btrfs_free_stale_devices() to match the path
  btrfs: rename btrfs_free_stale_devices() arg to skip_dev
  btrfs: make btrfs_free_stale_devices() argument optional
  btrfs: make btrfs_free_stale_device() to iterate all stales
  btrfs: no need to check for btrfs_fs_devices::seeding
  btrfs: Use IS_ALIGNED in btrfs_truncate_block instead of opencoding it
  Btrfs: noinline merge_extent_mapping
  Btrfs: add WARN_ONCE to detect unexpected error from merge_extent_mapping
  Btrfs: extent map selftest: dio write vs dio read
  Btrfs: extent map selftest: buffered write vs dio read
  Btrfs: add extent map selftests
  Btrfs: move extent map specific code to extent_map.c
  Btrfs: add helper for em merge logic
  Btrfs: fix unexpected EEXIST from btrfs_get_extent
  Btrfs: fix incorrect block_len in merge_extent_mapping
  btrfs: Remove unused readahead spinlock
  ...
...@@ -19,4 +19,4 @@ btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o ...@@ -19,4 +19,4 @@ btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \ btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
tests/extent-buffer-tests.o tests/btrfs-tests.o \ tests/extent-buffer-tests.o tests/btrfs-tests.o \
tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o \ tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o \
tests/free-space-tree-tests.o tests/free-space-tree-tests.o tests/extent-map-tests.o
...@@ -216,7 +216,8 @@ static int prelim_ref_compare(struct prelim_ref *ref1, ...@@ -216,7 +216,8 @@ static int prelim_ref_compare(struct prelim_ref *ref1,
return 0; return 0;
} }
void update_share_count(struct share_check *sc, int oldcount, int newcount) static void update_share_count(struct share_check *sc, int oldcount,
int newcount)
{ {
if ((!sc) || (oldcount == 0 && newcount < 1)) if ((!sc) || (oldcount == 0 && newcount < 1))
return; return;
......
...@@ -33,7 +33,6 @@ ...@@ -33,7 +33,6 @@
#include <linux/bit_spinlock.h> #include <linux/bit_spinlock.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#include <linux/sort.h>
#include <linux/log2.h> #include <linux/log2.h>
#include "ctree.h" #include "ctree.h"
#include "disk-io.h" #include "disk-io.h"
...@@ -45,6 +44,21 @@ ...@@ -45,6 +44,21 @@
#include "extent_io.h" #include "extent_io.h"
#include "extent_map.h" #include "extent_map.h"
static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
const char* btrfs_compress_type2str(enum btrfs_compression_type type)
{
switch (type) {
case BTRFS_COMPRESS_ZLIB:
case BTRFS_COMPRESS_LZO:
case BTRFS_COMPRESS_ZSTD:
case BTRFS_COMPRESS_NONE:
return btrfs_compress_types[type];
}
return NULL;
}
static int btrfs_decompress_bio(struct compressed_bio *cb); static int btrfs_decompress_bio(struct compressed_bio *cb);
static inline int compressed_bio_size(struct btrfs_fs_info *fs_info, static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
...@@ -348,8 +362,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, ...@@ -348,8 +362,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
page->mapping = NULL; page->mapping = NULL;
if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) < if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
PAGE_SIZE) { PAGE_SIZE) {
bio_get(bio);
/* /*
* inc the count before we submit the bio so * inc the count before we submit the bio so
* we know the end IO handler won't happen before * we know the end IO handler won't happen before
...@@ -372,8 +384,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, ...@@ -372,8 +384,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
bio_endio(bio); bio_endio(bio);
} }
bio_put(bio);
bio = btrfs_bio_alloc(bdev, first_byte); bio = btrfs_bio_alloc(bdev, first_byte);
bio->bi_opf = REQ_OP_WRITE | write_flags; bio->bi_opf = REQ_OP_WRITE | write_flags;
bio->bi_private = cb; bio->bi_private = cb;
...@@ -389,7 +399,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, ...@@ -389,7 +399,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
first_byte += PAGE_SIZE; first_byte += PAGE_SIZE;
cond_resched(); cond_resched();
} }
bio_get(bio);
ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA); ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
BUG_ON(ret); /* -ENOMEM */ BUG_ON(ret); /* -ENOMEM */
...@@ -405,7 +414,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, ...@@ -405,7 +414,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
bio_endio(bio); bio_endio(bio);
} }
bio_put(bio);
return 0; return 0;
} }
...@@ -638,8 +646,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, ...@@ -638,8 +646,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
page->mapping = NULL; page->mapping = NULL;
if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) < if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
PAGE_SIZE) { PAGE_SIZE) {
bio_get(comp_bio);
ret = btrfs_bio_wq_end_io(fs_info, comp_bio, ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
BTRFS_WQ_ENDIO_DATA); BTRFS_WQ_ENDIO_DATA);
BUG_ON(ret); /* -ENOMEM */ BUG_ON(ret); /* -ENOMEM */
...@@ -666,8 +672,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, ...@@ -666,8 +672,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
bio_endio(comp_bio); bio_endio(comp_bio);
} }
bio_put(comp_bio);
comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte); comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
bio_set_op_attrs(comp_bio, REQ_OP_READ, 0); bio_set_op_attrs(comp_bio, REQ_OP_READ, 0);
comp_bio->bi_private = cb; comp_bio->bi_private = cb;
...@@ -677,7 +681,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, ...@@ -677,7 +681,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
} }
cur_disk_byte += PAGE_SIZE; cur_disk_byte += PAGE_SIZE;
} }
bio_get(comp_bio);
ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA); ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA);
BUG_ON(ret); /* -ENOMEM */ BUG_ON(ret); /* -ENOMEM */
...@@ -693,7 +696,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, ...@@ -693,7 +696,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
bio_endio(comp_bio); bio_endio(comp_bio);
} }
bio_put(comp_bio);
return 0; return 0;
fail2: fail2:
...@@ -752,6 +754,8 @@ struct heuristic_ws { ...@@ -752,6 +754,8 @@ struct heuristic_ws {
u32 sample_size; u32 sample_size;
/* Buckets store counters for each byte value */ /* Buckets store counters for each byte value */
struct bucket_item *bucket; struct bucket_item *bucket;
/* Sorting buffer */
struct bucket_item *bucket_b;
struct list_head list; struct list_head list;
}; };
...@@ -763,6 +767,7 @@ static void free_heuristic_ws(struct list_head *ws) ...@@ -763,6 +767,7 @@ static void free_heuristic_ws(struct list_head *ws)
kvfree(workspace->sample); kvfree(workspace->sample);
kfree(workspace->bucket); kfree(workspace->bucket);
kfree(workspace->bucket_b);
kfree(workspace); kfree(workspace);
} }
...@@ -782,6 +787,10 @@ static struct list_head *alloc_heuristic_ws(void) ...@@ -782,6 +787,10 @@ static struct list_head *alloc_heuristic_ws(void)
if (!ws->bucket) if (!ws->bucket)
goto fail; goto fail;
ws->bucket_b = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket_b), GFP_KERNEL);
if (!ws->bucket_b)
goto fail;
INIT_LIST_HEAD(&ws->list); INIT_LIST_HEAD(&ws->list);
return &ws->list; return &ws->list;
fail: fail:
...@@ -1278,13 +1287,103 @@ static u32 shannon_entropy(struct heuristic_ws *ws) ...@@ -1278,13 +1287,103 @@ static u32 shannon_entropy(struct heuristic_ws *ws)
return entropy_sum * 100 / entropy_max; return entropy_sum * 100 / entropy_max;
} }
/* Compare buckets by size, ascending */ #define RADIX_BASE 4U
static int bucket_comp_rev(const void *lv, const void *rv) #define COUNTERS_SIZE (1U << RADIX_BASE)
static u8 get4bits(u64 num, int shift) {
u8 low4bits;
num >>= shift;
/* Reverse order */
low4bits = (COUNTERS_SIZE - 1) - (num % COUNTERS_SIZE);
return low4bits;
}
/*
* Use 4 bits as radix base
* Use 16 u32 counters for calculating new possition in buf array
*
* @array - array that will be sorted
* @array_buf - buffer array to store sorting results
* must be equal in size to @array
* @num - array size
*/
static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf,
int num)
{ {
const struct bucket_item *l = (const struct bucket_item *)lv; u64 max_num;
const struct bucket_item *r = (const struct bucket_item *)rv; u64 buf_num;
u32 counters[COUNTERS_SIZE];
u32 new_addr;
u32 addr;
int bitlen;
int shift;
int i;
return r->count - l->count; /*
* Try avoid useless loop iterations for small numbers stored in big
* counters. Example: 48 33 4 ... in 64bit array
*/
max_num = array[0].count;
for (i = 1; i < num; i++) {
buf_num = array[i].count;
if (buf_num > max_num)
max_num = buf_num;
}
buf_num = ilog2(max_num);
bitlen = ALIGN(buf_num, RADIX_BASE * 2);
shift = 0;
while (shift < bitlen) {
memset(counters, 0, sizeof(counters));
for (i = 0; i < num; i++) {
buf_num = array[i].count;
addr = get4bits(buf_num, shift);
counters[addr]++;
}
for (i = 1; i < COUNTERS_SIZE; i++)
counters[i] += counters[i - 1];
for (i = num - 1; i >= 0; i--) {
buf_num = array[i].count;
addr = get4bits(buf_num, shift);
counters[addr]--;
new_addr = counters[addr];
array_buf[new_addr] = array[i];
}
shift += RADIX_BASE;
/*
* Normal radix expects to move data from a temporary array, to
* the main one. But that requires some CPU time. Avoid that
* by doing another sort iteration to original array instead of
* memcpy()
*/
memset(counters, 0, sizeof(counters));
for (i = 0; i < num; i ++) {
buf_num = array_buf[i].count;
addr = get4bits(buf_num, shift);
counters[addr]++;
}
for (i = 1; i < COUNTERS_SIZE; i++)
counters[i] += counters[i - 1];
for (i = num - 1; i >= 0; i--) {
buf_num = array_buf[i].count;
addr = get4bits(buf_num, shift);
counters[addr]--;
new_addr = counters[addr];
array[new_addr] = array_buf[i];
}
shift += RADIX_BASE;
}
} }
/* /*
...@@ -1314,7 +1413,7 @@ static int byte_core_set_size(struct heuristic_ws *ws) ...@@ -1314,7 +1413,7 @@ static int byte_core_set_size(struct heuristic_ws *ws)
struct bucket_item *bucket = ws->bucket; struct bucket_item *bucket = ws->bucket;
/* Sort in reverse order */ /* Sort in reverse order */
sort(bucket, BUCKET_SIZE, sizeof(*bucket), &bucket_comp_rev, NULL); radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE);
for (i = 0; i < BYTE_CORE_SET_LOW; i++) for (i = 0; i < BYTE_CORE_SET_LOW; i++)
coreset_sum += bucket[i].count; coreset_sum += bucket[i].count;
......
...@@ -75,7 +75,7 @@ struct compressed_bio { ...@@ -75,7 +75,7 @@ struct compressed_bio {
u32 sums; u32 sums;
}; };
void btrfs_init_compress(void); void __init btrfs_init_compress(void);
void btrfs_exit_compress(void); void btrfs_exit_compress(void);
int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping, int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
...@@ -137,6 +137,8 @@ extern const struct btrfs_compress_op btrfs_zlib_compress; ...@@ -137,6 +137,8 @@ extern const struct btrfs_compress_op btrfs_zlib_compress;
extern const struct btrfs_compress_op btrfs_lzo_compress; extern const struct btrfs_compress_op btrfs_lzo_compress;
extern const struct btrfs_compress_op btrfs_zstd_compress; extern const struct btrfs_compress_op btrfs_zstd_compress;
const char* btrfs_compress_type2str(enum btrfs_compression_type type);
int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end); int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end);
#endif #endif
...@@ -1807,8 +1807,8 @@ static noinline int generic_bin_search(struct extent_buffer *eb, ...@@ -1807,8 +1807,8 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
* simple bin_search frontend that does the right thing for * simple bin_search frontend that does the right thing for
* leaves vs nodes * leaves vs nodes
*/ */
static int bin_search(struct extent_buffer *eb, const struct btrfs_key *key, int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
int level, int *slot) int level, int *slot)
{ {
if (level == 0) if (level == 0)
return generic_bin_search(eb, return generic_bin_search(eb,
...@@ -1824,12 +1824,6 @@ static int bin_search(struct extent_buffer *eb, const struct btrfs_key *key, ...@@ -1824,12 +1824,6 @@ static int bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
slot); slot);
} }
int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
int level, int *slot)
{
return bin_search(eb, key, level, slot);
}
static void root_add_used(struct btrfs_root *root, u32 size) static void root_add_used(struct btrfs_root *root, u32 size)
{ {
spin_lock(&root->accounting_lock); spin_lock(&root->accounting_lock);
...@@ -2614,7 +2608,7 @@ static int key_search(struct extent_buffer *b, const struct btrfs_key *key, ...@@ -2614,7 +2608,7 @@ static int key_search(struct extent_buffer *b, const struct btrfs_key *key,
int level, int *prev_cmp, int *slot) int level, int *prev_cmp, int *slot)
{ {
if (*prev_cmp != 0) { if (*prev_cmp != 0) {
*prev_cmp = bin_search(b, key, level, slot); *prev_cmp = btrfs_bin_search(b, key, level, slot);
return *prev_cmp; return *prev_cmp;
} }
...@@ -2660,17 +2654,29 @@ int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path, ...@@ -2660,17 +2654,29 @@ int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
} }
/* /*
* look for key in the tree. path is filled in with nodes along the way * btrfs_search_slot - look for a key in a tree and perform necessary
* if key is found, we return zero and you can find the item in the leaf * modifications to preserve tree invariants.
* level of the path (level 0) *
* @trans: Handle of transaction, used when modifying the tree
* @p: Holds all btree nodes along the search path
* @root: The root node of the tree
* @key: The key we are looking for
* @ins_len: Indicates purpose of search, for inserts it is 1, for
* deletions it's -1. 0 for plain searches
* @cow: boolean should CoW operations be performed. Must always be 1
* when modifying the tree.
*
* If @ins_len > 0, nodes and leaves will be split as we walk down the tree.
* If @ins_len < 0, nodes will be merged as we walk down the tree (if possible)
* *
* If the key isn't found, the path points to the slot where it should * If @key is found, 0 is returned and you can find the item in the leaf level
* be inserted, and 1 is returned. If there are other errors during the * of the path (level 0)
* search a negative error number is returned.
* *
* if ins_len > 0, nodes and leaves will be split as we walk down the * If @key isn't found, 1 is returned and the leaf level of the path (level 0)
* tree. if ins_len < 0, nodes will be merged as we walk down the tree (if * points to the slot where it should be inserted
* possible) *
* If an error is encountered while searching the tree a negative error number
* is returned
*/ */
int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
const struct btrfs_key *key, struct btrfs_path *p, const struct btrfs_key *key, struct btrfs_path *p,
...@@ -2774,6 +2780,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, ...@@ -2774,6 +2780,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
* contention with the cow code * contention with the cow code
*/ */
if (cow) { if (cow) {
bool last_level = (level == (BTRFS_MAX_LEVEL - 1));
/* /*
* if we don't really need to cow this block * if we don't really need to cow this block
* then we don't want to set the path blocking, * then we don't want to set the path blocking,
...@@ -2798,9 +2806,13 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, ...@@ -2798,9 +2806,13 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
} }
btrfs_set_path_blocking(p); btrfs_set_path_blocking(p);
err = btrfs_cow_block(trans, root, b, if (last_level)
p->nodes[level + 1], err = btrfs_cow_block(trans, root, b, NULL, 0,
p->slots[level + 1], &b); &b);
else
err = btrfs_cow_block(trans, root, b,
p->nodes[level + 1],
p->slots[level + 1], &b);
if (err) { if (err) {
ret = err; ret = err;
goto done; goto done;
...@@ -5175,7 +5187,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, ...@@ -5175,7 +5187,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
while (1) { while (1) {
nritems = btrfs_header_nritems(cur); nritems = btrfs_header_nritems(cur);
level = btrfs_header_level(cur); level = btrfs_header_level(cur);
sret = bin_search(cur, min_key, level, &slot); sret = btrfs_bin_search(cur, min_key, level, &slot);
/* at the lowest level, we're done, setup the path and exit */ /* at the lowest level, we're done, setup the path and exit */
if (level == path->lowest_level) { if (level == path->lowest_level) {
......
...@@ -679,7 +679,6 @@ enum btrfs_orphan_cleanup_state { ...@@ -679,7 +679,6 @@ enum btrfs_orphan_cleanup_state {
/* used by the raid56 code to lock stripes for read/modify/write */ /* used by the raid56 code to lock stripes for read/modify/write */
struct btrfs_stripe_hash { struct btrfs_stripe_hash {
struct list_head hash_list; struct list_head hash_list;
wait_queue_head_t wait;
spinlock_t lock; spinlock_t lock;
}; };
...@@ -3060,15 +3059,10 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, ...@@ -3060,15 +3059,10 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
struct btrfs_path *path, u64 dir, struct btrfs_path *path, u64 dir,
const char *name, u16 name_len, const char *name, u16 name_len,
int mod); int mod);
int verify_dir_item(struct btrfs_fs_info *fs_info,
struct extent_buffer *leaf, int slot,
struct btrfs_dir_item *dir_item);
struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info, struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, struct btrfs_path *path,
const char *name, const char *name,
int name_len); int name_len);
bool btrfs_is_name_len_valid(struct extent_buffer *leaf, int slot,
unsigned long start, u16 name_len);
/* orphan.c */ /* orphan.c */
int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
...@@ -3197,7 +3191,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); ...@@ -3197,7 +3191,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
struct inode *btrfs_alloc_inode(struct super_block *sb); struct inode *btrfs_alloc_inode(struct super_block *sb);
void btrfs_destroy_inode(struct inode *inode); void btrfs_destroy_inode(struct inode *inode);
int btrfs_drop_inode(struct inode *inode); int btrfs_drop_inode(struct inode *inode);
int btrfs_init_cachep(void); int __init btrfs_init_cachep(void);
void btrfs_destroy_cachep(void); void btrfs_destroy_cachep(void);
long btrfs_ioctl_trans_end(struct file *file); long btrfs_ioctl_trans_end(struct file *file);
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
...@@ -3248,7 +3242,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, ...@@ -3248,7 +3242,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
struct file *dst_file, u64 dst_loff); struct file *dst_file, u64 dst_loff);
/* file.c */ /* file.c */
int btrfs_auto_defrag_init(void); int __init btrfs_auto_defrag_init(void);
void btrfs_auto_defrag_exit(void); void btrfs_auto_defrag_exit(void);
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode); struct btrfs_inode *inode);
...@@ -3283,7 +3277,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, ...@@ -3283,7 +3277,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
struct btrfs_root *root); struct btrfs_root *root);
/* sysfs.c */ /* sysfs.c */
int btrfs_init_sysfs(void); int __init btrfs_init_sysfs(void);
void btrfs_exit_sysfs(void); void btrfs_exit_sysfs(void);
int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info); int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info);
void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info); void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info);
......
...@@ -1303,40 +1303,42 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work) ...@@ -1303,40 +1303,42 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work)
if (!path) if (!path)
goto out; goto out;
again: do {
if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2) if (atomic_read(&delayed_root->items) <
goto free_path; BTRFS_DELAYED_BACKGROUND / 2)
break;
delayed_node = btrfs_first_prepared_delayed_node(delayed_root); delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
if (!delayed_node) if (!delayed_node)
goto free_path; break;
path->leave_spinning = 1; path->leave_spinning = 1;
root = delayed_node->root; root = delayed_node->root;
trans = btrfs_join_transaction(root); trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) if (IS_ERR(trans)) {
goto release_path; btrfs_release_path(path);
btrfs_release_prepared_delayed_node(delayed_node);
total_done++;
continue;
}
block_rsv = trans->block_rsv; block_rsv = trans->block_rsv;
trans->block_rsv = &root->fs_info->delayed_block_rsv; trans->block_rsv = &root->fs_info->delayed_block_rsv;
__btrfs_commit_inode_delayed_items(trans, path, delayed_node); __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
trans->block_rsv = block_rsv; trans->block_rsv = block_rsv;
btrfs_end_transaction(trans); btrfs_end_transaction(trans);
btrfs_btree_balance_dirty_nodelay(root->fs_info); btrfs_btree_balance_dirty_nodelay(root->fs_info);
release_path: btrfs_release_path(path);
btrfs_release_path(path); btrfs_release_prepared_delayed_node(delayed_node);
total_done++; total_done++;
btrfs_release_prepared_delayed_node(delayed_node); } while ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK)
if ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK) || || total_done < async_work->nr);
total_done < async_work->nr)
goto again;
free_path:
btrfs_free_path(path); btrfs_free_path(path);
out: out:
wake_up(&delayed_root->wait); wake_up(&delayed_root->wait);
...@@ -1349,10 +1351,6 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, ...@@ -1349,10 +1351,6 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
{ {
struct btrfs_async_delayed_work *async_work; struct btrfs_async_delayed_work *async_work;
if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND ||
btrfs_workqueue_normal_congested(fs_info->delayed_workers))
return 0;
async_work = kmalloc(sizeof(*async_work), GFP_NOFS); async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
if (!async_work) if (!async_work)
return -ENOMEM; return -ENOMEM;
...@@ -1388,7 +1386,8 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info) ...@@ -1388,7 +1386,8 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
{ {
struct btrfs_delayed_root *delayed_root = fs_info->delayed_root; struct btrfs_delayed_root *delayed_root = fs_info->delayed_root;
if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) if ((atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) ||
btrfs_workqueue_normal_congested(fs_info->delayed_workers))
return; return;
if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) { if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
......
...@@ -937,7 +937,7 @@ void btrfs_delayed_ref_exit(void) ...@@ -937,7 +937,7 @@ void btrfs_delayed_ref_exit(void)
kmem_cache_destroy(btrfs_delayed_extent_op_cachep); kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
} }
int btrfs_delayed_ref_init(void) int __init btrfs_delayed_ref_init(void)
{ {
btrfs_delayed_ref_head_cachep = kmem_cache_create( btrfs_delayed_ref_head_cachep = kmem_cache_create(
"btrfs_delayed_ref_head", "btrfs_delayed_ref_head",
......
...@@ -203,7 +203,7 @@ extern struct kmem_cache *btrfs_delayed_tree_ref_cachep; ...@@ -203,7 +203,7 @@ extern struct kmem_cache *btrfs_delayed_tree_ref_cachep;
extern struct kmem_cache *btrfs_delayed_data_ref_cachep; extern struct kmem_cache *btrfs_delayed_data_ref_cachep;
extern struct kmem_cache *btrfs_delayed_extent_op_cachep; extern struct kmem_cache *btrfs_delayed_extent_op_cachep;
int btrfs_delayed_ref_init(void); int __init btrfs_delayed_ref_init(void);
void btrfs_delayed_ref_exit(void); void btrfs_delayed_ref_exit(void);
static inline struct btrfs_delayed_extent_op * static inline struct btrfs_delayed_extent_op *
......
...@@ -172,7 +172,8 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) ...@@ -172,7 +172,8 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
dev_replace->tgtdev->commit_bytes_used = dev_replace->tgtdev->commit_bytes_used =
dev_replace->srcdev->commit_bytes_used; dev_replace->srcdev->commit_bytes_used;
} }
dev_replace->tgtdev->is_tgtdev_for_dev_replace = 1; set_bit(BTRFS_DEV_STATE_REPLACE_TGT,
&dev_replace->tgtdev->dev_state);
btrfs_init_dev_replace_tgtdev_for_resume(fs_info, btrfs_init_dev_replace_tgtdev_for_resume(fs_info,
dev_replace->tgtdev); dev_replace->tgtdev);
} }
...@@ -304,6 +305,14 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info) ...@@ -304,6 +305,14 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info)
dev_replace->cursor_left_last_write_of_item; dev_replace->cursor_left_last_write_of_item;
} }
static char* btrfs_dev_name(struct btrfs_device *device)
{
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
return "<missing disk>";
else
return rcu_str_deref(device->name);
}
int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
const char *tgtdev_name, u64 srcdevid, const char *srcdev_name, const char *tgtdev_name, u64 srcdevid, const char *srcdev_name,
int read_src) int read_src)
...@@ -363,8 +372,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, ...@@ -363,8 +372,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
btrfs_info_in_rcu(fs_info, btrfs_info_in_rcu(fs_info,
"dev_replace from %s (devid %llu) to %s started", "dev_replace from %s (devid %llu) to %s started",
src_device->missing ? "<missing disk>" : btrfs_dev_name(src_device),
rcu_str_deref(src_device->name),
src_device->devid, src_device->devid,
rcu_str_deref(tgt_device->name)); rcu_str_deref(tgt_device->name));
...@@ -538,8 +546,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, ...@@ -538,8 +546,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
} else { } else {
btrfs_err_in_rcu(fs_info, btrfs_err_in_rcu(fs_info,
"btrfs_scrub_dev(%s, %llu, %s) failed %d", "btrfs_scrub_dev(%s, %llu, %s) failed %d",
src_device->missing ? "<missing disk>" : btrfs_dev_name(src_device),
rcu_str_deref(src_device->name),
src_device->devid, src_device->devid,
rcu_str_deref(tgt_device->name), scrub_ret); rcu_str_deref(tgt_device->name), scrub_ret);
btrfs_dev_replace_unlock(dev_replace, 1); btrfs_dev_replace_unlock(dev_replace, 1);
...@@ -557,11 +564,10 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, ...@@ -557,11 +564,10 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
btrfs_info_in_rcu(fs_info, btrfs_info_in_rcu(fs_info,
"dev_replace from %s (devid %llu) to %s finished", "dev_replace from %s (devid %llu) to %s finished",
src_device->missing ? "<missing disk>" : btrfs_dev_name(src_device),
rcu_str_deref(src_device->name),
src_device->devid, src_device->devid,
rcu_str_deref(tgt_device->name)); rcu_str_deref(tgt_device->name));
tgt_device->is_tgtdev_for_dev_replace = 0; clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &tgt_device->dev_state);
tgt_device->devid = src_device->devid; tgt_device->devid = src_device->devid;
src_device->devid = BTRFS_DEV_REPLACE_DEVID; src_device->devid = BTRFS_DEV_REPLACE_DEVID;
memcpy(uuid_tmp, tgt_device->uuid, sizeof(uuid_tmp)); memcpy(uuid_tmp, tgt_device->uuid, sizeof(uuid_tmp));
...@@ -814,12 +820,10 @@ static int btrfs_dev_replace_kthread(void *data) ...@@ -814,12 +820,10 @@ static int btrfs_dev_replace_kthread(void *data)
progress = btrfs_dev_replace_progress(fs_info); progress = btrfs_dev_replace_progress(fs_info);
progress = div_u64(progress, 10); progress = div_u64(progress, 10);
btrfs_info_in_rcu(fs_info, btrfs_info_in_rcu(fs_info,
"continuing dev_replace from %s (devid %llu) to %s @%u%%", "continuing dev_replace from %s (devid %llu) to target %s @%u%%",
dev_replace->srcdev->missing ? "<missing disk>" btrfs_dev_name(dev_replace->srcdev),
: rcu_str_deref(dev_replace->srcdev->name),
dev_replace->srcdev->devid, dev_replace->srcdev->devid,
dev_replace->tgtdev ? rcu_str_deref(dev_replace->tgtdev->name) btrfs_dev_name(dev_replace->tgtdev),
: "<missing target disk>",
(unsigned int)progress); (unsigned int)progress);
btrfs_dev_replace_continue_on_mount(fs_info); btrfs_dev_replace_continue_on_mount(fs_info);
......
...@@ -403,8 +403,6 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info, ...@@ -403,8 +403,6 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info,
btrfs_dir_data_len(leaf, dir_item); btrfs_dir_data_len(leaf, dir_item);
name_ptr = (unsigned long)(dir_item + 1); name_ptr = (unsigned long)(dir_item + 1);
if (verify_dir_item(fs_info, leaf, path->slots[0], dir_item))
return NULL;
if (btrfs_dir_name_len(leaf, dir_item) == name_len && if (btrfs_dir_name_len(leaf, dir_item) == name_len &&
memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)
return dir_item; return dir_item;
...@@ -450,109 +448,3 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, ...@@ -450,109 +448,3 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
} }
return ret; return ret;
} }
int verify_dir_item(struct btrfs_fs_info *fs_info,
struct extent_buffer *leaf,
int slot,
struct btrfs_dir_item *dir_item)
{
u16 namelen = BTRFS_NAME_LEN;
int ret;
u8 type = btrfs_dir_type(leaf, dir_item);
if (type >= BTRFS_FT_MAX) {
btrfs_crit(fs_info, "invalid dir item type: %d", (int)type);
return 1;
}
if (type == BTRFS_FT_XATTR)
namelen = XATTR_NAME_MAX;
if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
btrfs_crit(fs_info, "invalid dir item name len: %u",
(unsigned)btrfs_dir_name_len(leaf, dir_item));
return 1;
}
namelen = btrfs_dir_name_len(leaf, dir_item);
ret = btrfs_is_name_len_valid(leaf, slot,
(unsigned long)(dir_item + 1), namelen);
if (!ret)
return 1;
/* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
if ((btrfs_dir_data_len(leaf, dir_item) +
btrfs_dir_name_len(leaf, dir_item)) >
BTRFS_MAX_XATTR_SIZE(fs_info)) {
btrfs_crit(fs_info, "invalid dir item name + data len: %u + %u",
(unsigned)btrfs_dir_name_len(leaf, dir_item),
(unsigned)btrfs_dir_data_len(leaf, dir_item));
return 1;
}
return 0;
}
bool btrfs_is_name_len_valid(struct extent_buffer *leaf, int slot,
unsigned long start, u16 name_len)
{
struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_key key;
u32 read_start;
u32 read_end;
u32 item_start;
u32 item_end;
u32 size;
bool ret = true;
ASSERT(start > BTRFS_LEAF_DATA_OFFSET);
read_start = start - BTRFS_LEAF_DATA_OFFSET;
read_end = read_start + name_len;
item_start = btrfs_item_offset_nr(leaf, slot);
item_end = btrfs_item_end_nr(leaf, slot);
btrfs_item_key_to_cpu(leaf, &key, slot);
switch (key.type) {
case BTRFS_DIR_ITEM_KEY:
case BTRFS_XATTR_ITEM_KEY:
case BTRFS_DIR_INDEX_KEY:
size = sizeof(struct btrfs_dir_item);
break;
case BTRFS_INODE_REF_KEY:
size = sizeof(struct btrfs_inode_ref);
break;
case BTRFS_INODE_EXTREF_KEY:
size = sizeof(struct btrfs_inode_extref);
break;
case BTRFS_ROOT_REF_KEY:
case BTRFS_ROOT_BACKREF_KEY:
size = sizeof(struct btrfs_root_ref);
break;
default:
ret = false;
goto out;
}
if (read_start < item_start) {
ret = false;
goto out;
}
if (read_end > item_end) {
ret = false;
goto out;
}
/* there shall be item(s) before name */
if (read_start - item_start < size) {
ret = false;
goto out;
}
out:
if (!ret)
btrfs_crit(fs_info, "invalid dir item name len: %u",
(unsigned int)name_len);
return ret;
}
...@@ -61,7 +61,8 @@ ...@@ -61,7 +61,8 @@
BTRFS_HEADER_FLAG_RELOC |\ BTRFS_HEADER_FLAG_RELOC |\
BTRFS_SUPER_FLAG_ERROR |\ BTRFS_SUPER_FLAG_ERROR |\
BTRFS_SUPER_FLAG_SEEDING |\ BTRFS_SUPER_FLAG_SEEDING |\
BTRFS_SUPER_FLAG_METADUMP) BTRFS_SUPER_FLAG_METADUMP |\
BTRFS_SUPER_FLAG_METADUMP_V2)
static const struct extent_io_ops btree_extent_io_ops; static const struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work); static void end_workqueue_fn(struct btrfs_work *work);
...@@ -220,7 +221,7 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, ...@@ -220,7 +221,7 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
* extents on the btree inode are pretty simple, there's one extent * extents on the btree inode are pretty simple, there's one extent
* that covers the entire device * that covers the entire device
*/ */
static struct extent_map *btree_get_extent(struct btrfs_inode *inode, struct extent_map *btree_get_extent(struct btrfs_inode *inode,
struct page *page, size_t pg_offset, u64 start, u64 len, struct page *page, size_t pg_offset, u64 start, u64 len,
int create) int create)
{ {
...@@ -285,7 +286,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, ...@@ -285,7 +286,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
int verify) int verify)
{ {
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
char *result = NULL; char result[BTRFS_CSUM_SIZE];
unsigned long len; unsigned long len;
unsigned long cur_len; unsigned long cur_len;
unsigned long offset = BTRFS_CSUM_SIZE; unsigned long offset = BTRFS_CSUM_SIZE;
...@@ -294,7 +295,6 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, ...@@ -294,7 +295,6 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
unsigned long map_len; unsigned long map_len;
int err; int err;
u32 crc = ~(u32)0; u32 crc = ~(u32)0;
unsigned long inline_result;
len = buf->len - offset; len = buf->len - offset;
while (len > 0) { while (len > 0) {
...@@ -308,13 +308,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, ...@@ -308,13 +308,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
len -= cur_len; len -= cur_len;
offset += cur_len; offset += cur_len;
} }
if (csum_size > sizeof(inline_result)) { memset(result, 0, BTRFS_CSUM_SIZE);
result = kzalloc(csum_size, GFP_NOFS);
if (!result)
return -ENOMEM;
} else {
result = (char *)&inline_result;
}
btrfs_csum_final(crc, result); btrfs_csum_final(crc, result);
...@@ -329,15 +323,12 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, ...@@ -329,15 +323,12 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
"%s checksum verify failed on %llu wanted %X found %X level %d", "%s checksum verify failed on %llu wanted %X found %X level %d",
fs_info->sb->s_id, buf->start, fs_info->sb->s_id, buf->start,
val, found, btrfs_header_level(buf)); val, found, btrfs_header_level(buf));
if (result != (char *)&inline_result)
kfree(result);
return -EUCLEAN; return -EUCLEAN;
} }
} else { } else {
write_extent_buffer(buf, result, 0, csum_size); write_extent_buffer(buf, result, 0, csum_size);
} }
if (result != (char *)&inline_result)
kfree(result);
return 0; return 0;
} }
...@@ -391,7 +382,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, ...@@ -391,7 +382,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
clear_extent_buffer_uptodate(eb); clear_extent_buffer_uptodate(eb);
out: out:
unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
&cached_state, GFP_NOFS); &cached_state);
if (need_lock) if (need_lock)
btrfs_tree_read_unlock_blocking(eb); btrfs_tree_read_unlock_blocking(eb);
return ret; return ret;
...@@ -455,7 +446,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, ...@@ -455,7 +446,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree; io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
while (1) { while (1) {
ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE, ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
btree_get_extent, mirror_num); mirror_num);
if (!ret) { if (!ret) {
if (!verify_parent_transid(io_tree, eb, if (!verify_parent_transid(io_tree, eb,
parent_transid, 0)) parent_transid, 0))
...@@ -1012,7 +1003,7 @@ void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr) ...@@ -1012,7 +1003,7 @@ void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr)
if (IS_ERR(buf)) if (IS_ERR(buf))
return; return;
read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
buf, WAIT_NONE, btree_get_extent, 0); buf, WAIT_NONE, 0);
free_extent_buffer(buf); free_extent_buffer(buf);
} }
...@@ -1031,7 +1022,7 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr, ...@@ -1031,7 +1022,7 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags); set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK, ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK,
btree_get_extent, mirror_num); mirror_num);
if (ret) { if (ret) {
free_extent_buffer(buf); free_extent_buffer(buf);
return ret; return ret;
...@@ -1243,7 +1234,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, ...@@ -1243,7 +1234,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root; struct btrfs_root *root;
struct btrfs_key key; struct btrfs_key key;
int ret = 0; int ret = 0;
uuid_le uuid; uuid_le uuid = NULL_UUID_LE;
root = btrfs_alloc_root(fs_info, GFP_KERNEL); root = btrfs_alloc_root(fs_info, GFP_KERNEL);
if (!root) if (!root)
...@@ -1284,7 +1275,8 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, ...@@ -1284,7 +1275,8 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
btrfs_set_root_used(&root->root_item, leaf->len); btrfs_set_root_used(&root->root_item, leaf->len);
btrfs_set_root_last_snapshot(&root->root_item, 0); btrfs_set_root_last_snapshot(&root->root_item, 0);
btrfs_set_root_dirid(&root->root_item, 0); btrfs_set_root_dirid(&root->root_item, 0);
uuid_le_gen(&uuid); if (is_fstree(objectid))
uuid_le_gen(&uuid);
memcpy(root->root_item.uuid, uuid.b, BTRFS_UUID_SIZE); memcpy(root->root_item.uuid, uuid.b, BTRFS_UUID_SIZE);
root->root_item.drop_level = 0; root->root_item.drop_level = 0;
...@@ -2875,7 +2867,7 @@ int open_ctree(struct super_block *sb, ...@@ -2875,7 +2867,7 @@ int open_ctree(struct super_block *sb,
goto fail_sysfs; goto fail_sysfs;
} }
if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info)) { if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info, NULL)) {
btrfs_warn(fs_info, btrfs_warn(fs_info,
"writeable mount is not allowed due to too many missing devices"); "writeable mount is not allowed due to too many missing devices");
goto fail_sysfs; goto fail_sysfs;
...@@ -3357,7 +3349,7 @@ static void write_dev_flush(struct btrfs_device *device) ...@@ -3357,7 +3349,7 @@ static void write_dev_flush(struct btrfs_device *device)
bio->bi_private = &device->flush_wait; bio->bi_private = &device->flush_wait;
btrfsic_submit_bio(bio); btrfsic_submit_bio(bio);
device->flush_bio_sent = 1; set_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state);
} }
/* /*
...@@ -3367,10 +3359,10 @@ static blk_status_t wait_dev_flush(struct btrfs_device *device) ...@@ -3367,10 +3359,10 @@ static blk_status_t wait_dev_flush(struct btrfs_device *device)
{ {
struct bio *bio = device->flush_bio; struct bio *bio = device->flush_bio;
if (!device->flush_bio_sent) if (!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state))
return BLK_STS_OK; return BLK_STS_OK;
device->flush_bio_sent = 0; clear_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state);
wait_for_completion_io(&device->flush_wait); wait_for_completion_io(&device->flush_wait);
return bio->bi_status; return bio->bi_status;
...@@ -3378,7 +3370,7 @@ static blk_status_t wait_dev_flush(struct btrfs_device *device) ...@@ -3378,7 +3370,7 @@ static blk_status_t wait_dev_flush(struct btrfs_device *device)
static int check_barrier_error(struct btrfs_fs_info *fs_info) static int check_barrier_error(struct btrfs_fs_info *fs_info)
{ {
if (!btrfs_check_rw_degradable(fs_info)) if (!btrfs_check_rw_degradable(fs_info, NULL))
return -EIO; return -EIO;
return 0; return 0;
} }
...@@ -3394,14 +3386,16 @@ static int barrier_all_devices(struct btrfs_fs_info *info) ...@@ -3394,14 +3386,16 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
int errors_wait = 0; int errors_wait = 0;
blk_status_t ret; blk_status_t ret;
lockdep_assert_held(&info->fs_devices->device_list_mutex);
/* send down all the barriers */ /* send down all the barriers */
head = &info->fs_devices->devices; head = &info->fs_devices->devices;
list_for_each_entry_rcu(dev, head, dev_list) { list_for_each_entry(dev, head, dev_list) {
if (dev->missing) if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
continue; continue;
if (!dev->bdev) if (!dev->bdev)
continue; continue;
if (!dev->in_fs_metadata || !dev->writeable) if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
continue; continue;
write_dev_flush(dev); write_dev_flush(dev);
...@@ -3409,14 +3403,15 @@ static int barrier_all_devices(struct btrfs_fs_info *info) ...@@ -3409,14 +3403,15 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
} }
/* wait for all the barriers */ /* wait for all the barriers */
list_for_each_entry_rcu(dev, head, dev_list) { list_for_each_entry(dev, head, dev_list) {
if (dev->missing) if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
continue; continue;
if (!dev->bdev) { if (!dev->bdev) {
errors_wait++; errors_wait++;
continue; continue;
} }
if (!dev->in_fs_metadata || !dev->writeable) if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
continue; continue;
ret = wait_dev_flush(dev); ret = wait_dev_flush(dev);
...@@ -3508,12 +3503,13 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) ...@@ -3508,12 +3503,13 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
} }
} }
list_for_each_entry_rcu(dev, head, dev_list) { list_for_each_entry(dev, head, dev_list) {
if (!dev->bdev) { if (!dev->bdev) {
total_errors++; total_errors++;
continue; continue;
} }
if (!dev->in_fs_metadata || !dev->writeable) if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
continue; continue;
btrfs_set_stack_device_generation(dev_item, 0); btrfs_set_stack_device_generation(dev_item, 0);
...@@ -3549,10 +3545,11 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) ...@@ -3549,10 +3545,11 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
} }
total_errors = 0; total_errors = 0;
list_for_each_entry_rcu(dev, head, dev_list) { list_for_each_entry(dev, head, dev_list) {
if (!dev->bdev) if (!dev->bdev)
continue; continue;
if (!dev->in_fs_metadata || !dev->writeable) if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
continue; continue;
ret = wait_dev_supers(dev, max_mirrors); ret = wait_dev_supers(dev, max_mirrors);
...@@ -3910,9 +3907,11 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info) ...@@ -3910,9 +3907,11 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
btrfs_err(fs_info, "no valid FS found"); btrfs_err(fs_info, "no valid FS found");
ret = -EINVAL; ret = -EINVAL;
} }
if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) {
btrfs_warn(fs_info, "unrecognized super flag: %llu", btrfs_err(fs_info, "unrecognized or unsupported super flag: %llu",
btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP); btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
ret = -EINVAL;
}
if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) { if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
btrfs_err(fs_info, "tree_root level too big: %d >= %d", btrfs_err(fs_info, "tree_root level too big: %d >= %d",
btrfs_super_root_level(sb), BTRFS_MAX_LEVEL); btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
......
...@@ -149,6 +149,9 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, ...@@ -149,6 +149,9 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
u64 objectid); u64 objectid);
int btree_lock_page_hook(struct page *page, void *data, int btree_lock_page_hook(struct page *page, void *data,
void (*flush_fn)(void *)); void (*flush_fn)(void *));
struct extent_map *btree_get_extent(struct btrfs_inode *inode,
struct page *page, size_t pg_offset, u64 start, u64 len,
int create);
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags); int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
int __init btrfs_end_io_wq_init(void); int __init btrfs_end_io_wq_init(void);
void btrfs_end_io_wq_exit(void); void btrfs_end_io_wq_exit(void);
......
...@@ -283,11 +283,6 @@ static int btrfs_get_name(struct dentry *parent, char *name, ...@@ -283,11 +283,6 @@ static int btrfs_get_name(struct dentry *parent, char *name,
name_len = btrfs_inode_ref_name_len(leaf, iref); name_len = btrfs_inode_ref_name_len(leaf, iref);
} }
ret = btrfs_is_name_len_valid(leaf, path->slots[0], name_ptr, name_len);
if (!ret) {
btrfs_free_path(path);
return -EIO;
}
read_extent_buffer(leaf, name, name_ptr, name_len); read_extent_buffer(leaf, name, name_ptr, name_len);
btrfs_free_path(path); btrfs_free_path(path);
......
...@@ -2145,7 +2145,10 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, ...@@ -2145,7 +2145,10 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
for (i = 0; i < bbio->num_stripes; i++, stripe++) { for (i = 0; i < bbio->num_stripes; i++, stripe++) {
u64 bytes; u64 bytes;
if (!stripe->dev->can_discard) struct request_queue *req_q;
req_q = bdev_get_queue(stripe->dev->bdev);
if (!blk_queue_discard(req_q))
continue; continue;
ret = btrfs_issue_discard(stripe->dev->bdev, ret = btrfs_issue_discard(stripe->dev->bdev,
...@@ -2894,7 +2897,7 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, ...@@ -2894,7 +2897,7 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_block_rsv *global_rsv; struct btrfs_block_rsv *global_rsv;
u64 num_heads = trans->transaction->delayed_refs.num_heads_ready; u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
u64 csum_bytes = trans->transaction->delayed_refs.pending_csums; u64 csum_bytes = trans->transaction->delayed_refs.pending_csums;
u64 num_dirty_bgs = trans->transaction->num_dirty_bgs; unsigned int num_dirty_bgs = trans->transaction->num_dirty_bgs;
u64 num_bytes, num_dirty_bgs_bytes; u64 num_bytes, num_dirty_bgs_bytes;
int ret = 0; int ret = 0;
...@@ -4945,12 +4948,12 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info, ...@@ -4945,12 +4948,12 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
bytes = 0; bytes = 0;
else else
bytes -= delayed_rsv->size; bytes -= delayed_rsv->size;
spin_unlock(&delayed_rsv->lock);
if (percpu_counter_compare(&space_info->total_bytes_pinned, if (percpu_counter_compare(&space_info->total_bytes_pinned,
bytes) < 0) { bytes) < 0) {
spin_unlock(&delayed_rsv->lock);
return -ENOSPC; return -ENOSPC;
} }
spin_unlock(&delayed_rsv->lock);
commit: commit:
trans = btrfs_join_transaction(fs_info->extent_root); trans = btrfs_join_transaction(fs_info->extent_root);
...@@ -5738,8 +5741,8 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, ...@@ -5738,8 +5741,8 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
* or return if we already have enough space. This will also handle the resreve * or return if we already have enough space. This will also handle the resreve
* tracepoint for the reserved amount. * tracepoint for the reserved amount.
*/ */
int btrfs_inode_rsv_refill(struct btrfs_inode *inode, static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
enum btrfs_reserve_flush_enum flush) enum btrfs_reserve_flush_enum flush)
{ {
struct btrfs_root *root = inode->root; struct btrfs_root *root = inode->root;
struct btrfs_block_rsv *block_rsv = &inode->block_rsv; struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
...@@ -5770,7 +5773,7 @@ int btrfs_inode_rsv_refill(struct btrfs_inode *inode, ...@@ -5770,7 +5773,7 @@ int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
* This is the same as btrfs_block_rsv_release, except that it handles the * This is the same as btrfs_block_rsv_release, except that it handles the
* tracepoint for the reservation. * tracepoint for the reservation.
*/ */
void btrfs_inode_rsv_release(struct btrfs_inode *inode) static void btrfs_inode_rsv_release(struct btrfs_inode *inode)
{ {
struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
...@@ -9690,7 +9693,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr) ...@@ -9690,7 +9693,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
* space to fit our block group in. * space to fit our block group in.
*/ */
if (device->total_bytes > device->bytes_used + min_free && if (device->total_bytes > device->bytes_used + min_free &&
!device->is_tgtdev_for_dev_replace) { !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
ret = find_free_dev_extent(trans, device, min_free, ret = find_free_dev_extent(trans, device, min_free,
&dev_offset, NULL); &dev_offset, NULL);
if (!ret) if (!ret)
...@@ -10875,7 +10878,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, ...@@ -10875,7 +10878,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
*trimmed = 0; *trimmed = 0;
/* Not writeable = nothing to do. */ /* Not writeable = nothing to do. */
if (!device->writeable) if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
return 0; return 0;
/* No free space = nothing to do. */ /* No free space = nothing to do. */
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "locking.h" #include "locking.h"
#include "rcu-string.h" #include "rcu-string.h"
#include "backref.h" #include "backref.h"
#include "disk-io.h"
static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache; static struct kmem_cache *extent_buffer_cache;
...@@ -109,8 +110,6 @@ struct tree_entry { ...@@ -109,8 +110,6 @@ struct tree_entry {
struct extent_page_data { struct extent_page_data {
struct bio *bio; struct bio *bio;
struct extent_io_tree *tree; struct extent_io_tree *tree;
get_extent_t *get_extent;
/* tells writepage not to lock the state bits for this range /* tells writepage not to lock the state bits for this range
* it still does the unlocking * it still does the unlocking
*/ */
...@@ -139,7 +138,8 @@ static void add_extent_changeset(struct extent_state *state, unsigned bits, ...@@ -139,7 +138,8 @@ static void add_extent_changeset(struct extent_state *state, unsigned bits,
BUG_ON(ret < 0); BUG_ON(ret < 0);
} }
static noinline void flush_write_bio(void *data); static void flush_write_bio(struct extent_page_data *epd);
static inline struct btrfs_fs_info * static inline struct btrfs_fs_info *
tree_fs_info(struct extent_io_tree *tree) tree_fs_info(struct extent_io_tree *tree)
{ {
...@@ -581,7 +581,7 @@ static void extent_io_tree_panic(struct extent_io_tree *tree, int err) ...@@ -581,7 +581,7 @@ static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
* *
* This takes the tree lock, and returns 0 on success and < 0 on error. * This takes the tree lock, and returns 0 on success and < 0 on error.
*/ */
static int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete, unsigned bits, int wake, int delete,
struct extent_state **cached_state, struct extent_state **cached_state,
gfp_t mask, struct extent_changeset *changeset) gfp_t mask, struct extent_changeset *changeset)
...@@ -1295,10 +1295,10 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -1295,10 +1295,10 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete, unsigned bits, int wake, int delete,
struct extent_state **cached, gfp_t mask) struct extent_state **cached)
{ {
return __clear_extent_bit(tree, start, end, bits, wake, delete, return __clear_extent_bit(tree, start, end, bits, wake, delete,
cached, mask, NULL); cached, GFP_NOFS, NULL);
} }
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
...@@ -1348,7 +1348,7 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end) ...@@ -1348,7 +1348,7 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
if (err == -EEXIST) { if (err == -EEXIST) {
if (failed_start > start) if (failed_start > start)
clear_extent_bit(tree, start, failed_start - 1, clear_extent_bit(tree, start, failed_start - 1,
EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS); EXTENT_LOCKED, 1, 0, NULL);
return 0; return 0;
} }
return 1; return 1;
...@@ -1648,7 +1648,7 @@ STATIC u64 find_lock_delalloc_range(struct inode *inode, ...@@ -1648,7 +1648,7 @@ STATIC u64 find_lock_delalloc_range(struct inode *inode,
EXTENT_DELALLOC, 1, cached_state); EXTENT_DELALLOC, 1, cached_state);
if (!ret) { if (!ret) {
unlock_extent_cached(tree, delalloc_start, delalloc_end, unlock_extent_cached(tree, delalloc_start, delalloc_end,
&cached_state, GFP_NOFS); &cached_state);
__unlock_for_delalloc(inode, locked_page, __unlock_for_delalloc(inode, locked_page,
delalloc_start, delalloc_end); delalloc_start, delalloc_end);
cond_resched(); cond_resched();
...@@ -1744,7 +1744,7 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, ...@@ -1744,7 +1744,7 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
unsigned long page_ops) unsigned long page_ops)
{ {
clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, 1, 0, clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, 1, 0,
NULL, GFP_NOFS); NULL);
__process_pages_contig(inode->i_mapping, locked_page, __process_pages_contig(inode->i_mapping, locked_page,
start >> PAGE_SHIFT, end >> PAGE_SHIFT, start >> PAGE_SHIFT, end >> PAGE_SHIFT,
...@@ -2027,7 +2027,8 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, ...@@ -2027,7 +2027,8 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
bio->bi_iter.bi_sector = sector; bio->bi_iter.bi_sector = sector;
dev = bbio->stripes[bbio->mirror_num - 1].dev; dev = bbio->stripes[bbio->mirror_num - 1].dev;
btrfs_put_bbio(bbio); btrfs_put_bbio(bbio);
if (!dev || !dev->bdev || !dev->writeable) { if (!dev || !dev->bdev ||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
btrfs_bio_counter_dec(fs_info); btrfs_bio_counter_dec(fs_info);
bio_put(bio); bio_put(bio);
return -EIO; return -EIO;
...@@ -2493,7 +2494,7 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len, ...@@ -2493,7 +2494,7 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
if (uptodate && tree->track_uptodate) if (uptodate && tree->track_uptodate)
set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC); set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); unlock_extent_cached_atomic(tree, start, end, &cached);
} }
/* /*
...@@ -2733,7 +2734,6 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num, ...@@ -2733,7 +2734,6 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
start = page_offset(page) + bvec->bv_offset; start = page_offset(page) + bvec->bv_offset;
bio->bi_private = NULL; bio->bi_private = NULL;
bio_get(bio);
if (tree->ops) if (tree->ops)
ret = tree->ops->submit_bio_hook(tree->private_data, bio, ret = tree->ops->submit_bio_hook(tree->private_data, bio,
...@@ -2741,7 +2741,6 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num, ...@@ -2741,7 +2741,6 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
else else
btrfsic_submit_bio(bio); btrfsic_submit_bio(bio);
bio_put(bio);
return blk_status_to_errno(ret); return blk_status_to_errno(ret);
} }
...@@ -2943,8 +2942,7 @@ static int __do_readpage(struct extent_io_tree *tree, ...@@ -2943,8 +2942,7 @@ static int __do_readpage(struct extent_io_tree *tree,
set_extent_uptodate(tree, cur, cur + iosize - 1, set_extent_uptodate(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS); &cached, GFP_NOFS);
unlock_extent_cached(tree, cur, unlock_extent_cached(tree, cur,
cur + iosize - 1, cur + iosize - 1, &cached);
&cached, GFP_NOFS);
break; break;
} }
em = __get_extent_map(inode, page, pg_offset, cur, em = __get_extent_map(inode, page, pg_offset, cur,
...@@ -3037,8 +3035,7 @@ static int __do_readpage(struct extent_io_tree *tree, ...@@ -3037,8 +3035,7 @@ static int __do_readpage(struct extent_io_tree *tree,
set_extent_uptodate(tree, cur, cur + iosize - 1, set_extent_uptodate(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS); &cached, GFP_NOFS);
unlock_extent_cached(tree, cur, unlock_extent_cached(tree, cur,
cur + iosize - 1, cur + iosize - 1, &cached);
&cached, GFP_NOFS);
cur = cur + iosize; cur = cur + iosize;
pg_offset += iosize; pg_offset += iosize;
continue; continue;
...@@ -3093,9 +3090,8 @@ static int __do_readpage(struct extent_io_tree *tree, ...@@ -3093,9 +3090,8 @@ static int __do_readpage(struct extent_io_tree *tree,
static inline void __do_contiguous_readpages(struct extent_io_tree *tree, static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
struct page *pages[], int nr_pages, struct page *pages[], int nr_pages,
u64 start, u64 end, u64 start, u64 end,
get_extent_t *get_extent,
struct extent_map **em_cached, struct extent_map **em_cached,
struct bio **bio, int mirror_num, struct bio **bio,
unsigned long *bio_flags, unsigned long *bio_flags,
u64 *prev_em_start) u64 *prev_em_start)
{ {
...@@ -3116,18 +3112,17 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, ...@@ -3116,18 +3112,17 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
} }
for (index = 0; index < nr_pages; index++) { for (index = 0; index < nr_pages; index++) {
__do_readpage(tree, pages[index], get_extent, em_cached, bio, __do_readpage(tree, pages[index], btrfs_get_extent, em_cached,
mirror_num, bio_flags, 0, prev_em_start); bio, 0, bio_flags, 0, prev_em_start);
put_page(pages[index]); put_page(pages[index]);
} }
} }
static void __extent_readpages(struct extent_io_tree *tree, static void __extent_readpages(struct extent_io_tree *tree,
struct page *pages[], struct page *pages[],
int nr_pages, get_extent_t *get_extent, int nr_pages,
struct extent_map **em_cached, struct extent_map **em_cached,
struct bio **bio, int mirror_num, struct bio **bio, unsigned long *bio_flags,
unsigned long *bio_flags,
u64 *prev_em_start) u64 *prev_em_start)
{ {
u64 start = 0; u64 start = 0;
...@@ -3147,8 +3142,8 @@ static void __extent_readpages(struct extent_io_tree *tree, ...@@ -3147,8 +3142,8 @@ static void __extent_readpages(struct extent_io_tree *tree,
} else { } else {
__do_contiguous_readpages(tree, &pages[first_index], __do_contiguous_readpages(tree, &pages[first_index],
index - first_index, start, index - first_index, start,
end, get_extent, em_cached, end, em_cached,
bio, mirror_num, bio_flags, bio, bio_flags,
prev_em_start); prev_em_start);
start = page_start; start = page_start;
end = start + PAGE_SIZE - 1; end = start + PAGE_SIZE - 1;
...@@ -3159,9 +3154,8 @@ static void __extent_readpages(struct extent_io_tree *tree, ...@@ -3159,9 +3154,8 @@ static void __extent_readpages(struct extent_io_tree *tree,
if (end) if (end)
__do_contiguous_readpages(tree, &pages[first_index], __do_contiguous_readpages(tree, &pages[first_index],
index - first_index, start, index - first_index, start,
end, get_extent, em_cached, bio, end, em_cached, bio,
mirror_num, bio_flags, bio_flags, prev_em_start);
prev_em_start);
} }
static int __extent_read_full_page(struct extent_io_tree *tree, static int __extent_read_full_page(struct extent_io_tree *tree,
...@@ -3376,7 +3370,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, ...@@ -3376,7 +3370,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
page_end, NULL, 1); page_end, NULL, 1);
break; break;
} }
em = epd->get_extent(BTRFS_I(inode), page, pg_offset, cur, em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, cur,
end - cur + 1, 1); end - cur + 1, 1);
if (IS_ERR_OR_NULL(em)) { if (IS_ERR_OR_NULL(em)) {
SetPageError(page); SetPageError(page);
...@@ -3459,10 +3453,9 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, ...@@ -3459,10 +3453,9 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
* and the end_io handler clears the writeback ranges * and the end_io handler clears the writeback ranges
*/ */
static int __extent_writepage(struct page *page, struct writeback_control *wbc, static int __extent_writepage(struct page *page, struct writeback_control *wbc,
void *data) struct extent_page_data *epd)
{ {
struct inode *inode = page->mapping->host; struct inode *inode = page->mapping->host;
struct extent_page_data *epd = data;
u64 start = page_offset(page); u64 start = page_offset(page);
u64 page_end = start + PAGE_SIZE - 1; u64 page_end = start + PAGE_SIZE - 1;
int ret; int ret;
...@@ -3896,8 +3889,7 @@ int btree_write_cache_pages(struct address_space *mapping, ...@@ -3896,8 +3889,7 @@ int btree_write_cache_pages(struct address_space *mapping,
* write_cache_pages - walk the list of dirty pages of the given address space and write all of them. * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
* @mapping: address space structure to write * @mapping: address space structure to write
* @wbc: subtract the number of written pages from *@wbc->nr_to_write * @wbc: subtract the number of written pages from *@wbc->nr_to_write
* @writepage: function called for each page * @data: data passed to __extent_writepage function
* @data: data passed to writepage function
* *
* If a page is already under I/O, write_cache_pages() skips it, even * If a page is already under I/O, write_cache_pages() skips it, even
* if it's dirty. This is desirable behaviour for memory-cleaning writeback, * if it's dirty. This is desirable behaviour for memory-cleaning writeback,
...@@ -3909,8 +3901,7 @@ int btree_write_cache_pages(struct address_space *mapping, ...@@ -3909,8 +3901,7 @@ int btree_write_cache_pages(struct address_space *mapping,
*/ */
static int extent_write_cache_pages(struct address_space *mapping, static int extent_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc, struct writeback_control *wbc,
writepage_t writepage, void *data, struct extent_page_data *epd)
void (*flush_fn)(void *))
{ {
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
int ret = 0; int ret = 0;
...@@ -3974,7 +3965,7 @@ static int extent_write_cache_pages(struct address_space *mapping, ...@@ -3974,7 +3965,7 @@ static int extent_write_cache_pages(struct address_space *mapping,
* mapping * mapping
*/ */
if (!trylock_page(page)) { if (!trylock_page(page)) {
flush_fn(data); flush_write_bio(epd);
lock_page(page); lock_page(page);
} }
...@@ -3985,7 +3976,7 @@ static int extent_write_cache_pages(struct address_space *mapping, ...@@ -3985,7 +3976,7 @@ static int extent_write_cache_pages(struct address_space *mapping,
if (wbc->sync_mode != WB_SYNC_NONE) { if (wbc->sync_mode != WB_SYNC_NONE) {
if (PageWriteback(page)) if (PageWriteback(page))
flush_fn(data); flush_write_bio(epd);
wait_on_page_writeback(page); wait_on_page_writeback(page);
} }
...@@ -3995,7 +3986,7 @@ static int extent_write_cache_pages(struct address_space *mapping, ...@@ -3995,7 +3986,7 @@ static int extent_write_cache_pages(struct address_space *mapping,
continue; continue;
} }
ret = (*writepage)(page, wbc, data); ret = __extent_writepage(page, wbc, epd);
if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
unlock_page(page); unlock_page(page);
...@@ -4043,7 +4034,7 @@ static int extent_write_cache_pages(struct address_space *mapping, ...@@ -4043,7 +4034,7 @@ static int extent_write_cache_pages(struct address_space *mapping,
return ret; return ret;
} }
static void flush_epd_write_bio(struct extent_page_data *epd) static void flush_write_bio(struct extent_page_data *epd)
{ {
if (epd->bio) { if (epd->bio) {
int ret; int ret;
...@@ -4054,37 +4045,28 @@ static void flush_epd_write_bio(struct extent_page_data *epd) ...@@ -4054,37 +4045,28 @@ static void flush_epd_write_bio(struct extent_page_data *epd)
} }
} }
static noinline void flush_write_bio(void *data) int extent_write_full_page(struct page *page, struct writeback_control *wbc)
{
struct extent_page_data *epd = data;
flush_epd_write_bio(epd);
}
int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent,
struct writeback_control *wbc)
{ {
int ret; int ret;
struct extent_page_data epd = { struct extent_page_data epd = {
.bio = NULL, .bio = NULL,
.tree = tree, .tree = &BTRFS_I(page->mapping->host)->io_tree,
.get_extent = get_extent,
.extent_locked = 0, .extent_locked = 0,
.sync_io = wbc->sync_mode == WB_SYNC_ALL, .sync_io = wbc->sync_mode == WB_SYNC_ALL,
}; };
ret = __extent_writepage(page, wbc, &epd); ret = __extent_writepage(page, wbc, &epd);
flush_epd_write_bio(&epd); flush_write_bio(&epd);
return ret; return ret;
} }
int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
u64 start, u64 end, get_extent_t *get_extent,
int mode) int mode)
{ {
int ret = 0; int ret = 0;
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
struct page *page; struct page *page;
unsigned long nr_pages = (end - start + PAGE_SIZE) >> unsigned long nr_pages = (end - start + PAGE_SIZE) >>
PAGE_SHIFT; PAGE_SHIFT;
...@@ -4092,7 +4074,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, ...@@ -4092,7 +4074,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
struct extent_page_data epd = { struct extent_page_data epd = {
.bio = NULL, .bio = NULL,
.tree = tree, .tree = tree,
.get_extent = get_extent,
.extent_locked = 1, .extent_locked = 1,
.sync_io = mode == WB_SYNC_ALL, .sync_io = mode == WB_SYNC_ALL,
}; };
...@@ -4118,34 +4099,30 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, ...@@ -4118,34 +4099,30 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
start += PAGE_SIZE; start += PAGE_SIZE;
} }
flush_epd_write_bio(&epd); flush_write_bio(&epd);
return ret; return ret;
} }
int extent_writepages(struct extent_io_tree *tree, int extent_writepages(struct extent_io_tree *tree,
struct address_space *mapping, struct address_space *mapping,
get_extent_t *get_extent,
struct writeback_control *wbc) struct writeback_control *wbc)
{ {
int ret = 0; int ret = 0;
struct extent_page_data epd = { struct extent_page_data epd = {
.bio = NULL, .bio = NULL,
.tree = tree, .tree = tree,
.get_extent = get_extent,
.extent_locked = 0, .extent_locked = 0,
.sync_io = wbc->sync_mode == WB_SYNC_ALL, .sync_io = wbc->sync_mode == WB_SYNC_ALL,
}; };
ret = extent_write_cache_pages(mapping, wbc, __extent_writepage, &epd, ret = extent_write_cache_pages(mapping, wbc, &epd);
flush_write_bio); flush_write_bio(&epd);
flush_epd_write_bio(&epd);
return ret; return ret;
} }
int extent_readpages(struct extent_io_tree *tree, int extent_readpages(struct extent_io_tree *tree,
struct address_space *mapping, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages, struct list_head *pages, unsigned nr_pages)
get_extent_t get_extent)
{ {
struct bio *bio = NULL; struct bio *bio = NULL;
unsigned page_idx; unsigned page_idx;
...@@ -4171,13 +4148,13 @@ int extent_readpages(struct extent_io_tree *tree, ...@@ -4171,13 +4148,13 @@ int extent_readpages(struct extent_io_tree *tree,
pagepool[nr++] = page; pagepool[nr++] = page;
if (nr < ARRAY_SIZE(pagepool)) if (nr < ARRAY_SIZE(pagepool))
continue; continue;
__extent_readpages(tree, pagepool, nr, get_extent, &em_cached, __extent_readpages(tree, pagepool, nr, &em_cached, &bio,
&bio, 0, &bio_flags, &prev_em_start); &bio_flags, &prev_em_start);
nr = 0; nr = 0;
} }
if (nr) if (nr)
__extent_readpages(tree, pagepool, nr, get_extent, &em_cached, __extent_readpages(tree, pagepool, nr, &em_cached, &bio,
&bio, 0, &bio_flags, &prev_em_start); &bio_flags, &prev_em_start);
if (em_cached) if (em_cached)
free_extent_map(em_cached); free_extent_map(em_cached);
...@@ -4210,7 +4187,7 @@ int extent_invalidatepage(struct extent_io_tree *tree, ...@@ -4210,7 +4187,7 @@ int extent_invalidatepage(struct extent_io_tree *tree,
clear_extent_bit(tree, start, end, clear_extent_bit(tree, start, end,
EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING, EXTENT_DO_ACCOUNTING,
1, 1, &cached_state, GFP_NOFS); 1, 1, &cached_state);
return 0; return 0;
} }
...@@ -4235,9 +4212,9 @@ static int try_release_extent_state(struct extent_map_tree *map, ...@@ -4235,9 +4212,9 @@ static int try_release_extent_state(struct extent_map_tree *map,
* at this point we can safely clear everything except the * at this point we can safely clear everything except the
* locked bit and the nodatasum bit * locked bit and the nodatasum bit
*/ */
ret = clear_extent_bit(tree, start, end, ret = __clear_extent_bit(tree, start, end,
~(EXTENT_LOCKED | EXTENT_NODATASUM), ~(EXTENT_LOCKED | EXTENT_NODATASUM),
0, 0, NULL, mask); 0, 0, NULL, mask, NULL);
/* if clear_extent_bit failed for enomem reasons, /* if clear_extent_bit failed for enomem reasons,
* we can't allow the release to continue. * we can't allow the release to continue.
...@@ -4303,9 +4280,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, ...@@ -4303,9 +4280,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
* This maps until we find something past 'last' * This maps until we find something past 'last'
*/ */
static struct extent_map *get_extent_skip_holes(struct inode *inode, static struct extent_map *get_extent_skip_holes(struct inode *inode,
u64 offset, u64 offset, u64 last)
u64 last,
get_extent_t *get_extent)
{ {
u64 sectorsize = btrfs_inode_sectorsize(inode); u64 sectorsize = btrfs_inode_sectorsize(inode);
struct extent_map *em; struct extent_map *em;
...@@ -4319,15 +4294,14 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode, ...@@ -4319,15 +4294,14 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
if (len == 0) if (len == 0)
break; break;
len = ALIGN(len, sectorsize); len = ALIGN(len, sectorsize);
em = get_extent(BTRFS_I(inode), NULL, 0, offset, len, 0); em = btrfs_get_extent_fiemap(BTRFS_I(inode), NULL, 0, offset,
len, 0);
if (IS_ERR_OR_NULL(em)) if (IS_ERR_OR_NULL(em))
return em; return em;
/* if this isn't a hole return it */ /* if this isn't a hole return it */
if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) && if (em->block_start != EXTENT_MAP_HOLE)
em->block_start != EXTENT_MAP_HOLE) {
return em; return em;
}
/* this is a hole, advance to the next extent */ /* this is a hole, advance to the next extent */
offset = extent_map_end(em); offset = extent_map_end(em);
...@@ -4452,7 +4426,7 @@ static int emit_last_fiemap_cache(struct btrfs_fs_info *fs_info, ...@@ -4452,7 +4426,7 @@ static int emit_last_fiemap_cache(struct btrfs_fs_info *fs_info,
} }
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent) __u64 start, __u64 len)
{ {
int ret = 0; int ret = 0;
u64 off = start; u64 off = start;
...@@ -4534,8 +4508,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -4534,8 +4508,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1,
&cached_state); &cached_state);
em = get_extent_skip_holes(inode, start, last_for_get_extent, em = get_extent_skip_holes(inode, start, last_for_get_extent);
get_extent);
if (!em) if (!em)
goto out; goto out;
if (IS_ERR(em)) { if (IS_ERR(em)) {
...@@ -4623,8 +4596,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -4623,8 +4596,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
} }
/* now scan forward to see if this is really the last extent. */ /* now scan forward to see if this is really the last extent. */
em = get_extent_skip_holes(inode, off, last_for_get_extent, em = get_extent_skip_holes(inode, off, last_for_get_extent);
get_extent);
if (IS_ERR(em)) { if (IS_ERR(em)) {
ret = PTR_ERR(em); ret = PTR_ERR(em);
goto out; goto out;
...@@ -4648,7 +4620,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, ...@@ -4648,7 +4620,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
out: out:
btrfs_free_path(path); btrfs_free_path(path);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1, unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
&cached_state, GFP_NOFS); &cached_state);
return ret; return ret;
} }
...@@ -5264,8 +5236,7 @@ int extent_buffer_uptodate(struct extent_buffer *eb) ...@@ -5264,8 +5236,7 @@ int extent_buffer_uptodate(struct extent_buffer *eb)
} }
int read_extent_buffer_pages(struct extent_io_tree *tree, int read_extent_buffer_pages(struct extent_io_tree *tree,
struct extent_buffer *eb, int wait, struct extent_buffer *eb, int wait, int mirror_num)
get_extent_t *get_extent, int mirror_num)
{ {
unsigned long i; unsigned long i;
struct page *page; struct page *page;
...@@ -5325,7 +5296,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, ...@@ -5325,7 +5296,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
ClearPageError(page); ClearPageError(page);
err = __extent_read_full_page(tree, page, err = __extent_read_full_page(tree, page,
get_extent, &bio, btree_get_extent, &bio,
mirror_num, &bio_flags, mirror_num, &bio_flags,
REQ_META); REQ_META);
if (err) { if (err) {
......
...@@ -300,19 +300,29 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -300,19 +300,29 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, struct extent_changeset *changeset); unsigned bits, struct extent_changeset *changeset);
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete, unsigned bits, int wake, int delete,
struct extent_state **cached, gfp_t mask); struct extent_state **cached);
int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete,
struct extent_state **cached, gfp_t mask,
struct extent_changeset *changeset);
static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end) static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
{ {
return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL);
GFP_NOFS);
} }
static inline int unlock_extent_cached(struct extent_io_tree *tree, u64 start, static inline int unlock_extent_cached(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached, gfp_t mask) u64 end, struct extent_state **cached)
{
return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
GFP_NOFS, NULL);
}
static inline int unlock_extent_cached_atomic(struct extent_io_tree *tree,
u64 start, u64 end, struct extent_state **cached)
{ {
return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached, return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
mask); GFP_ATOMIC, NULL);
} }
static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start, static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
...@@ -323,8 +333,7 @@ static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start, ...@@ -323,8 +333,7 @@ static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
if (bits & EXTENT_LOCKED) if (bits & EXTENT_LOCKED)
wake = 1; wake = 1;
return clear_extent_bit(tree, start, end, bits, wake, 0, NULL, return clear_extent_bit(tree, start, end, bits, wake, 0, NULL);
GFP_NOFS);
} }
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
...@@ -340,10 +349,10 @@ static inline int set_extent_bits(struct extent_io_tree *tree, u64 start, ...@@ -340,10 +349,10 @@ static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
} }
static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached_state, gfp_t mask) u64 end, struct extent_state **cached_state)
{ {
return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, return __clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
cached_state, mask); cached_state, GFP_NOFS, NULL);
} }
static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start, static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start,
...@@ -358,7 +367,7 @@ static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start, ...@@ -358,7 +367,7 @@ static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
{ {
return clear_extent_bit(tree, start, end, return clear_extent_bit(tree, start, end,
EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); EXTENT_DO_ACCOUNTING, 0, 0, NULL);
} }
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
...@@ -401,24 +410,19 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, ...@@ -401,24 +410,19 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
struct extent_state **cached_state); struct extent_state **cached_state);
int extent_invalidatepage(struct extent_io_tree *tree, int extent_invalidatepage(struct extent_io_tree *tree,
struct page *page, unsigned long offset); struct page *page, unsigned long offset);
int extent_write_full_page(struct extent_io_tree *tree, struct page *page, int extent_write_full_page(struct page *page, struct writeback_control *wbc);
get_extent_t *get_extent, int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
struct writeback_control *wbc);
int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
u64 start, u64 end, get_extent_t *get_extent,
int mode); int mode);
int extent_writepages(struct extent_io_tree *tree, int extent_writepages(struct extent_io_tree *tree,
struct address_space *mapping, struct address_space *mapping,
get_extent_t *get_extent,
struct writeback_control *wbc); struct writeback_control *wbc);
int btree_write_cache_pages(struct address_space *mapping, int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc); struct writeback_control *wbc);
int extent_readpages(struct extent_io_tree *tree, int extent_readpages(struct extent_io_tree *tree,
struct address_space *mapping, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages, struct list_head *pages, unsigned nr_pages);
get_extent_t get_extent);
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent); __u64 start, __u64 len);
void set_page_extent_mapped(struct page *page); void set_page_extent_mapped(struct page *page);
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
...@@ -437,7 +441,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb); ...@@ -437,7 +441,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb);
#define WAIT_PAGE_LOCK 2 #define WAIT_PAGE_LOCK 2
int read_extent_buffer_pages(struct extent_io_tree *tree, int read_extent_buffer_pages(struct extent_io_tree *tree,
struct extent_buffer *eb, int wait, struct extent_buffer *eb, int wait,
get_extent_t *get_extent, int mirror_num); int mirror_num);
void wait_on_extent_buffer_writeback(struct extent_buffer *eb); void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
static inline unsigned long num_extent_pages(u64 start, u64 len) static inline unsigned long num_extent_pages(u64 start, u64 len)
......
...@@ -454,3 +454,135 @@ void replace_extent_mapping(struct extent_map_tree *tree, ...@@ -454,3 +454,135 @@ void replace_extent_mapping(struct extent_map_tree *tree,
setup_extent_mapping(tree, new, modified); setup_extent_mapping(tree, new, modified);
} }
static struct extent_map *next_extent_map(struct extent_map *em)
{
struct rb_node *next;
next = rb_next(&em->rb_node);
if (!next)
return NULL;
return container_of(next, struct extent_map, rb_node);
}
static struct extent_map *prev_extent_map(struct extent_map *em)
{
struct rb_node *prev;
prev = rb_prev(&em->rb_node);
if (!prev)
return NULL;
return container_of(prev, struct extent_map, rb_node);
}
/* helper for btfs_get_extent. Given an existing extent in the tree,
* the existing extent is the nearest extent to map_start,
* and an extent that you want to insert, deal with overlap and insert
* the best fitted new extent into the tree.
*/
static noinline int merge_extent_mapping(struct extent_map_tree *em_tree,
struct extent_map *existing,
struct extent_map *em,
u64 map_start)
{
struct extent_map *prev;
struct extent_map *next;
u64 start;
u64 end;
u64 start_diff;
BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
if (existing->start > map_start) {
next = existing;
prev = prev_extent_map(next);
} else {
prev = existing;
next = next_extent_map(prev);
}
start = prev ? extent_map_end(prev) : em->start;
start = max_t(u64, start, em->start);
end = next ? next->start : extent_map_end(em);
end = min_t(u64, end, extent_map_end(em));
start_diff = start - em->start;
em->start = start;
em->len = end - start;
if (em->block_start < EXTENT_MAP_LAST_BYTE &&
!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
em->block_start += start_diff;
em->block_len = em->len;
}
return add_extent_mapping(em_tree, em, 0);
}
/**
* btrfs_add_extent_mapping - add extent mapping into em_tree
* @em_tree - the extent tree into which we want to insert the extent mapping
* @em_in - extent we are inserting
* @start - start of the logical range btrfs_get_extent() is requesting
* @len - length of the logical range btrfs_get_extent() is requesting
*
* Note that @em_in's range may be different from [start, start+len),
* but they must be overlapped.
*
* Insert @em_in into @em_tree. In case there is an overlapping range, handle
* the -EEXIST by either:
* a) Returning the existing extent in @em_in if @start is within the
* existing em.
* b) Merge the existing extent with @em_in passed in.
*
* Return 0 on success, otherwise -EEXIST.
*
*/
int btrfs_add_extent_mapping(struct extent_map_tree *em_tree,
struct extent_map **em_in, u64 start, u64 len)
{
int ret;
struct extent_map *em = *em_in;
ret = add_extent_mapping(em_tree, em, 0);
/* it is possible that someone inserted the extent into the tree
* while we had the lock dropped. It is also possible that
* an overlapping map exists in the tree
*/
if (ret == -EEXIST) {
struct extent_map *existing;
ret = 0;
existing = search_extent_mapping(em_tree, start, len);
/*
* existing will always be non-NULL, since there must be
* extent causing the -EEXIST.
*/
if (start >= existing->start &&
start < extent_map_end(existing)) {
free_extent_map(em);
*em_in = existing;
ret = 0;
} else {
u64 orig_start = em->start;
u64 orig_len = em->len;
/*
* The existing extent map is the one nearest to
* the [start, start + len) range which overlaps
*/
ret = merge_extent_mapping(em_tree, existing,
em, start);
if (ret) {
free_extent_map(em);
*em_in = NULL;
WARN_ONCE(ret,
"unexpected error %d: merge existing(start %llu len %llu) with em(start %llu len %llu)\n",
ret, existing->start, existing->len,
orig_start, orig_len);
}
free_extent_map(existing);
}
}
ASSERT(ret == 0 || ret == -EEXIST);
return ret;
}
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
/* bits for the flags field */ /* bits for the flags field */
#define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */ #define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */
#define EXTENT_FLAG_COMPRESSED 1 #define EXTENT_FLAG_COMPRESSED 1
#define EXTENT_FLAG_VACANCY 2 /* no file extent item found */
#define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */ #define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */
#define EXTENT_FLAG_LOGGING 4 /* Logging this extent */ #define EXTENT_FLAG_LOGGING 4 /* Logging this extent */
#define EXTENT_FLAG_FILLING 5 /* Filling in a preallocated extent */ #define EXTENT_FLAG_FILLING 5 /* Filling in a preallocated extent */
...@@ -92,4 +91,6 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen ...@@ -92,4 +91,6 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen
void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em); void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em);
struct extent_map *search_extent_mapping(struct extent_map_tree *tree, struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len); u64 start, u64 len);
int btrfs_add_extent_mapping(struct extent_map_tree *em_tree,
struct extent_map **em_in, u64 start, u64 len);
#endif #endif
...@@ -1505,7 +1505,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, ...@@ -1505,7 +1505,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
ordered->file_offset + ordered->len > start_pos && ordered->file_offset + ordered->len > start_pos &&
ordered->file_offset <= last_pos) { ordered->file_offset <= last_pos) {
unlock_extent_cached(&inode->io_tree, start_pos, unlock_extent_cached(&inode->io_tree, start_pos,
last_pos, cached_state, GFP_NOFS); last_pos, cached_state);
for (i = 0; i < num_pages; i++) { for (i = 0; i < num_pages; i++) {
unlock_page(pages[i]); unlock_page(pages[i]);
put_page(pages[i]); put_page(pages[i]);
...@@ -1520,7 +1520,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, ...@@ -1520,7 +1520,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
clear_extent_bit(&inode->io_tree, start_pos, last_pos, clear_extent_bit(&inode->io_tree, start_pos, last_pos,
EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, cached_state, GFP_NOFS); 0, 0, cached_state);
*lockstart = start_pos; *lockstart = start_pos;
*lockend = last_pos; *lockend = last_pos;
ret = 1; ret = 1;
...@@ -1756,11 +1756,10 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, ...@@ -1756,11 +1756,10 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
if (copied > 0) if (copied > 0)
ret = btrfs_dirty_pages(inode, pages, dirty_pages, ret = btrfs_dirty_pages(inode, pages, dirty_pages,
pos, copied, NULL); pos, copied, &cached_state);
if (extents_locked) if (extents_locked)
unlock_extent_cached(&BTRFS_I(inode)->io_tree, unlock_extent_cached(&BTRFS_I(inode)->io_tree,
lockstart, lockend, &cached_state, lockstart, lockend, &cached_state);
GFP_NOFS);
btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes); btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
if (ret) { if (ret) {
btrfs_drop_pages(pages, num_pages); btrfs_drop_pages(pages, num_pages);
...@@ -2020,10 +2019,19 @@ int btrfs_release_file(struct inode *inode, struct file *filp) ...@@ -2020,10 +2019,19 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end) static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
{ {
int ret; int ret;
struct blk_plug plug;
/*
* This is only called in fsync, which would do synchronous writes, so
* a plug can merge adjacent IOs as much as possible. Esp. in case of
* multiple disks using raid profile, a large IO can be split to
* several segments of stripe length (currently 64K).
*/
blk_start_plug(&plug);
atomic_inc(&BTRFS_I(inode)->sync_writers); atomic_inc(&BTRFS_I(inode)->sync_writers);
ret = btrfs_fdatawrite_range(inode, start, end); ret = btrfs_fdatawrite_range(inode, start, end);
atomic_dec(&BTRFS_I(inode)->sync_writers); atomic_dec(&BTRFS_I(inode)->sync_writers);
blk_finish_plug(&plug);
return ret; return ret;
} }
...@@ -2451,6 +2459,46 @@ static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len) ...@@ -2451,6 +2459,46 @@ static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
return ret; return ret;
} }
static int btrfs_punch_hole_lock_range(struct inode *inode,
const u64 lockstart,
const u64 lockend,
struct extent_state **cached_state)
{
while (1) {
struct btrfs_ordered_extent *ordered;
int ret;
truncate_pagecache_range(inode, lockstart, lockend);
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
cached_state);
ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
/*
* We need to make sure we have no ordered extents in this range
* and nobody raced in and read a page in this range, if we did
* we need to try again.
*/
if ((!ordered ||
(ordered->file_offset + ordered->len <= lockstart ||
ordered->file_offset > lockend)) &&
!btrfs_page_exists_in_range(inode, lockstart, lockend)) {
if (ordered)
btrfs_put_ordered_extent(ordered);
break;
}
if (ordered)
btrfs_put_ordered_extent(ordered);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
lockend, cached_state);
ret = btrfs_wait_ordered_range(inode, lockstart,
lockend - lockstart + 1);
if (ret)
return ret;
}
return 0;
}
static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
{ {
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
...@@ -2567,38 +2615,11 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) ...@@ -2567,38 +2615,11 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
goto out_only_mutex; goto out_only_mutex;
} }
while (1) { ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend,
struct btrfs_ordered_extent *ordered; &cached_state);
if (ret) {
truncate_pagecache_range(inode, lockstart, lockend); inode_unlock(inode);
goto out_only_mutex;
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state);
ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
/*
* We need to make sure we have no ordered extents in this range
* and nobody raced in and read a page in this range, if we did
* we need to try again.
*/
if ((!ordered ||
(ordered->file_offset + ordered->len <= lockstart ||
ordered->file_offset > lockend)) &&
!btrfs_page_exists_in_range(inode, lockstart, lockend)) {
if (ordered)
btrfs_put_ordered_extent(ordered);
break;
}
if (ordered)
btrfs_put_ordered_extent(ordered);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
lockend, &cached_state, GFP_NOFS);
ret = btrfs_wait_ordered_range(inode, lockstart,
lockend - lockstart + 1);
if (ret) {
inode_unlock(inode);
return ret;
}
} }
path = btrfs_alloc_path(); path = btrfs_alloc_path();
...@@ -2743,7 +2764,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) ...@@ -2743,7 +2764,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
btrfs_free_block_rsv(fs_info, rsv); btrfs_free_block_rsv(fs_info, rsv);
out: out:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state, GFP_NOFS); &cached_state);
out_only_mutex: out_only_mutex:
if (!updated_inode && truncated_block && !ret && !err) { if (!updated_inode && truncated_block && !ret && !err) {
/* /*
...@@ -2807,6 +2828,234 @@ static int add_falloc_range(struct list_head *head, u64 start, u64 len) ...@@ -2807,6 +2828,234 @@ static int add_falloc_range(struct list_head *head, u64 start, u64 len)
return 0; return 0;
} }
static int btrfs_fallocate_update_isize(struct inode *inode,
const u64 end,
const int mode)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
int ret2;
if (mode & FALLOC_FL_KEEP_SIZE || end <= i_size_read(inode))
return 0;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans))
return PTR_ERR(trans);
inode->i_ctime = current_time(inode);
i_size_write(inode, end);
btrfs_ordered_update_i_size(inode, end, NULL);
ret = btrfs_update_inode(trans, root, inode);
ret2 = btrfs_end_transaction(trans);
return ret ? ret : ret2;
}
enum {
RANGE_BOUNDARY_WRITTEN_EXTENT = 0,
RANGE_BOUNDARY_PREALLOC_EXTENT = 1,
RANGE_BOUNDARY_HOLE = 2,
};
static int btrfs_zero_range_check_range_boundary(struct inode *inode,
u64 offset)
{
const u64 sectorsize = btrfs_inode_sectorsize(inode);
struct extent_map *em;
int ret;
offset = round_down(offset, sectorsize);
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em))
return PTR_ERR(em);
if (em->block_start == EXTENT_MAP_HOLE)
ret = RANGE_BOUNDARY_HOLE;
else if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
ret = RANGE_BOUNDARY_PREALLOC_EXTENT;
else
ret = RANGE_BOUNDARY_WRITTEN_EXTENT;
free_extent_map(em);
return ret;
}
static int btrfs_zero_range(struct inode *inode,
loff_t offset,
loff_t len,
const int mode)
{
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct extent_map *em;
struct extent_changeset *data_reserved = NULL;
int ret;
u64 alloc_hint = 0;
const u64 sectorsize = btrfs_inode_sectorsize(inode);
u64 alloc_start = round_down(offset, sectorsize);
u64 alloc_end = round_up(offset + len, sectorsize);
u64 bytes_to_reserve = 0;
bool space_reserved = false;
inode_dio_wait(inode);
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
alloc_start, alloc_end - alloc_start, 0);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out;
}
/*
* Avoid hole punching and extent allocation for some cases. More cases
* could be considered, but these are unlikely common and we keep things
* as simple as possible for now. Also, intentionally, if the target
* range contains one or more prealloc extents together with regular
* extents and holes, we drop all the existing extents and allocate a
* new prealloc extent, so that we get a larger contiguous disk extent.
*/
if (em->start <= alloc_start &&
test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
const u64 em_end = em->start + em->len;
if (em_end >= offset + len) {
/*
* The whole range is already a prealloc extent,
* do nothing except updating the inode's i_size if
* needed.
*/
free_extent_map(em);
ret = btrfs_fallocate_update_isize(inode, offset + len,
mode);
goto out;
}
/*
* Part of the range is already a prealloc extent, so operate
* only on the remaining part of the range.
*/
alloc_start = em_end;
ASSERT(IS_ALIGNED(alloc_start, sectorsize));
len = offset + len - alloc_start;
offset = alloc_start;
alloc_hint = em->block_start + em->len;
}
free_extent_map(em);
if (BTRFS_BYTES_TO_BLKS(fs_info, offset) ==
BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)) {
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
alloc_start, sectorsize, 0);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out;
}
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
free_extent_map(em);
ret = btrfs_fallocate_update_isize(inode, offset + len,
mode);
goto out;
}
if (len < sectorsize && em->block_start != EXTENT_MAP_HOLE) {
free_extent_map(em);
ret = btrfs_truncate_block(inode, offset, len, 0);
if (!ret)
ret = btrfs_fallocate_update_isize(inode,
offset + len,
mode);
return ret;
}
free_extent_map(em);
alloc_start = round_down(offset, sectorsize);
alloc_end = alloc_start + sectorsize;
goto reserve_space;
}
alloc_start = round_up(offset, sectorsize);
alloc_end = round_down(offset + len, sectorsize);
/*
* For unaligned ranges, check the pages at the boundaries, they might
* map to an extent, in which case we need to partially zero them, or
* they might map to a hole, in which case we need our allocation range
* to cover them.
*/
if (!IS_ALIGNED(offset, sectorsize)) {
ret = btrfs_zero_range_check_range_boundary(inode, offset);
if (ret < 0)
goto out;
if (ret == RANGE_BOUNDARY_HOLE) {
alloc_start = round_down(offset, sectorsize);
ret = 0;
} else if (ret == RANGE_BOUNDARY_WRITTEN_EXTENT) {
ret = btrfs_truncate_block(inode, offset, 0, 0);
if (ret)
goto out;
} else {
ret = 0;
}
}
if (!IS_ALIGNED(offset + len, sectorsize)) {
ret = btrfs_zero_range_check_range_boundary(inode,
offset + len);
if (ret < 0)
goto out;
if (ret == RANGE_BOUNDARY_HOLE) {
alloc_end = round_up(offset + len, sectorsize);
ret = 0;
} else if (ret == RANGE_BOUNDARY_WRITTEN_EXTENT) {
ret = btrfs_truncate_block(inode, offset + len, 0, 1);
if (ret)
goto out;
} else {
ret = 0;
}
}
reserve_space:
if (alloc_start < alloc_end) {
struct extent_state *cached_state = NULL;
const u64 lockstart = alloc_start;
const u64 lockend = alloc_end - 1;
bytes_to_reserve = alloc_end - alloc_start;
ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
bytes_to_reserve);
if (ret < 0)
goto out;
space_reserved = true;
ret = btrfs_qgroup_reserve_data(inode, &data_reserved,
alloc_start, bytes_to_reserve);
if (ret)
goto out;
ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend,
&cached_state);
if (ret)
goto out;
ret = btrfs_prealloc_file_range(inode, mode, alloc_start,
alloc_end - alloc_start,
i_blocksize(inode),
offset + len, &alloc_hint);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
lockend, &cached_state);
/* btrfs_prealloc_file_range releases reserved space on error */
if (ret) {
space_reserved = false;
goto out;
}
}
ret = btrfs_fallocate_update_isize(inode, offset + len, mode);
out:
if (ret && space_reserved)
btrfs_free_reserved_data_space(inode, data_reserved,
alloc_start, bytes_to_reserve);
extent_changeset_free(data_reserved);
return ret;
}
static long btrfs_fallocate(struct file *file, int mode, static long btrfs_fallocate(struct file *file, int mode,
loff_t offset, loff_t len) loff_t offset, loff_t len)
{ {
...@@ -2832,7 +3081,8 @@ static long btrfs_fallocate(struct file *file, int mode, ...@@ -2832,7 +3081,8 @@ static long btrfs_fallocate(struct file *file, int mode,
cur_offset = alloc_start; cur_offset = alloc_start;
/* Make sure we aren't being give some crap mode */ /* Make sure we aren't being give some crap mode */
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
FALLOC_FL_ZERO_RANGE))
return -EOPNOTSUPP; return -EOPNOTSUPP;
if (mode & FALLOC_FL_PUNCH_HOLE) if (mode & FALLOC_FL_PUNCH_HOLE)
...@@ -2843,10 +3093,12 @@ static long btrfs_fallocate(struct file *file, int mode, ...@@ -2843,10 +3093,12 @@ static long btrfs_fallocate(struct file *file, int mode,
* *
* For qgroup space, it will be checked later. * For qgroup space, it will be checked later.
*/ */
ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), if (!(mode & FALLOC_FL_ZERO_RANGE)) {
alloc_end - alloc_start); ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
if (ret < 0) alloc_end - alloc_start);
return ret; if (ret < 0)
return ret;
}
inode_lock(inode); inode_lock(inode);
...@@ -2888,6 +3140,12 @@ static long btrfs_fallocate(struct file *file, int mode, ...@@ -2888,6 +3140,12 @@ static long btrfs_fallocate(struct file *file, int mode,
if (ret) if (ret)
goto out; goto out;
if (mode & FALLOC_FL_ZERO_RANGE) {
ret = btrfs_zero_range(inode, offset, len, mode);
inode_unlock(inode);
return ret;
}
locked_end = alloc_end - 1; locked_end = alloc_end - 1;
while (1) { while (1) {
struct btrfs_ordered_extent *ordered; struct btrfs_ordered_extent *ordered;
...@@ -2897,15 +3155,15 @@ static long btrfs_fallocate(struct file *file, int mode, ...@@ -2897,15 +3155,15 @@ static long btrfs_fallocate(struct file *file, int mode,
*/ */
lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
locked_end, &cached_state); locked_end, &cached_state);
ordered = btrfs_lookup_first_ordered_extent(inode, ordered = btrfs_lookup_first_ordered_extent(inode, locked_end);
alloc_end - 1);
if (ordered && if (ordered &&
ordered->file_offset + ordered->len > alloc_start && ordered->file_offset + ordered->len > alloc_start &&
ordered->file_offset < alloc_end) { ordered->file_offset < alloc_end) {
btrfs_put_ordered_extent(ordered); btrfs_put_ordered_extent(ordered);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, unlock_extent_cached(&BTRFS_I(inode)->io_tree,
alloc_start, locked_end, alloc_start, locked_end,
&cached_state, GFP_KERNEL); &cached_state);
/* /*
* we can't wait on the range with the transaction * we can't wait on the range with the transaction
* running or with the extent lock held * running or with the extent lock held
...@@ -2923,7 +3181,7 @@ static long btrfs_fallocate(struct file *file, int mode, ...@@ -2923,7 +3181,7 @@ static long btrfs_fallocate(struct file *file, int mode,
/* First, check if we exceed the qgroup limit */ /* First, check if we exceed the qgroup limit */
INIT_LIST_HEAD(&reserve_list); INIT_LIST_HEAD(&reserve_list);
while (1) { while (cur_offset < alloc_end) {
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset, em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
alloc_end - cur_offset, 0); alloc_end - cur_offset, 0);
if (IS_ERR(em)) { if (IS_ERR(em)) {
...@@ -2959,8 +3217,6 @@ static long btrfs_fallocate(struct file *file, int mode, ...@@ -2959,8 +3217,6 @@ static long btrfs_fallocate(struct file *file, int mode,
} }
free_extent_map(em); free_extent_map(em);
cur_offset = last_byte; cur_offset = last_byte;
if (cur_offset >= alloc_end)
break;
} }
/* /*
...@@ -2983,37 +3239,18 @@ static long btrfs_fallocate(struct file *file, int mode, ...@@ -2983,37 +3239,18 @@ static long btrfs_fallocate(struct file *file, int mode,
if (ret < 0) if (ret < 0)
goto out_unlock; goto out_unlock;
if (actual_end > inode->i_size && /*
!(mode & FALLOC_FL_KEEP_SIZE)) { * We didn't need to allocate any more space, but we still extended the
struct btrfs_trans_handle *trans; * size of the file so we need to update i_size and the inode item.
struct btrfs_root *root = BTRFS_I(inode)->root; */
ret = btrfs_fallocate_update_isize(inode, actual_end, mode);
/*
* We didn't need to allocate any more space, but we
* still extended the size of the file so we need to
* update i_size and the inode item.
*/
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
} else {
inode->i_ctime = current_time(inode);
i_size_write(inode, actual_end);
btrfs_ordered_update_i_size(inode, actual_end, NULL);
ret = btrfs_update_inode(trans, root, inode);
if (ret)
btrfs_end_transaction(trans);
else
ret = btrfs_end_transaction(trans);
}
}
out_unlock: out_unlock:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
&cached_state, GFP_KERNEL); &cached_state);
out: out:
inode_unlock(inode); inode_unlock(inode);
/* Let go of our reservation. */ /* Let go of our reservation. */
if (ret != 0) if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE))
btrfs_free_reserved_data_space(inode, data_reserved, btrfs_free_reserved_data_space(inode, data_reserved,
alloc_start, alloc_end - cur_offset); alloc_start, alloc_end - cur_offset);
extent_changeset_free(data_reserved); extent_changeset_free(data_reserved);
...@@ -3082,7 +3319,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) ...@@ -3082,7 +3319,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
*offset = min_t(loff_t, start, inode->i_size); *offset = min_t(loff_t, start, inode->i_size);
} }
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state, GFP_NOFS); &cached_state);
return ret; return ret;
} }
...@@ -3146,7 +3383,7 @@ void btrfs_auto_defrag_exit(void) ...@@ -3146,7 +3383,7 @@ void btrfs_auto_defrag_exit(void)
kmem_cache_destroy(btrfs_inode_defrag_cachep); kmem_cache_destroy(btrfs_inode_defrag_cachep);
} }
int btrfs_auto_defrag_init(void) int __init btrfs_auto_defrag_init(void)
{ {
btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag", btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag",
sizeof(struct inode_defrag), 0, sizeof(struct inode_defrag), 0,
......
...@@ -993,8 +993,7 @@ update_cache_item(struct btrfs_trans_handle *trans, ...@@ -993,8 +993,7 @@ update_cache_item(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, root, &key, path, 0, 1); ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret < 0) { if (ret < 0) {
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL);
GFP_NOFS);
goto fail; goto fail;
} }
leaf = path->nodes[0]; leaf = path->nodes[0];
...@@ -1008,7 +1007,7 @@ update_cache_item(struct btrfs_trans_handle *trans, ...@@ -1008,7 +1007,7 @@ update_cache_item(struct btrfs_trans_handle *trans,
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
inode->i_size - 1, inode->i_size - 1,
EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
NULL, GFP_NOFS); NULL);
btrfs_release_path(path); btrfs_release_path(path);
goto fail; goto fail;
} }
...@@ -1105,8 +1104,7 @@ static int flush_dirty_cache(struct inode *inode) ...@@ -1105,8 +1104,7 @@ static int flush_dirty_cache(struct inode *inode)
ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
if (ret) if (ret)
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL);
GFP_NOFS);
return ret; return ret;
} }
...@@ -1127,8 +1125,7 @@ cleanup_write_cache_enospc(struct inode *inode, ...@@ -1127,8 +1125,7 @@ cleanup_write_cache_enospc(struct inode *inode,
{ {
io_ctl_drop_pages(io_ctl); io_ctl_drop_pages(io_ctl);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
i_size_read(inode) - 1, cached_state, i_size_read(inode) - 1, cached_state);
GFP_NOFS);
} }
static int __btrfs_wait_cache_io(struct btrfs_root *root, static int __btrfs_wait_cache_io(struct btrfs_root *root,
...@@ -1322,7 +1319,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, ...@@ -1322,7 +1319,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
io_ctl_drop_pages(io_ctl); io_ctl_drop_pages(io_ctl);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
i_size_read(inode) - 1, &cached_state, GFP_NOFS); i_size_read(inode) - 1, &cached_state);
/* /*
* at this point the pages are under IO and we're happy, * at this point the pages are under IO and we're happy,
......
此差异已折叠。
...@@ -308,12 +308,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) ...@@ -308,12 +308,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
ip->flags |= BTRFS_INODE_COMPRESS; ip->flags |= BTRFS_INODE_COMPRESS;
ip->flags &= ~BTRFS_INODE_NOCOMPRESS; ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
if (fs_info->compress_type == BTRFS_COMPRESS_LZO) comp = btrfs_compress_type2str(fs_info->compress_type);
comp = "lzo"; if (!comp || comp[0] == 0)
else if (fs_info->compress_type == BTRFS_COMPRESS_ZLIB) comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB);
comp = "zlib";
else
comp = "zstd";
ret = btrfs_set_prop(inode, "btrfs.compression", ret = btrfs_set_prop(inode, "btrfs.compression",
comp, strlen(comp), 0); comp, strlen(comp), 0);
if (ret) if (ret)
...@@ -980,7 +978,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) ...@@ -980,7 +978,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
/* get the big lock and read metadata off disk */ /* get the big lock and read metadata off disk */
lock_extent_bits(io_tree, start, end, &cached); lock_extent_bits(io_tree, start, end, &cached);
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0); em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
unlock_extent_cached(io_tree, start, end, &cached, GFP_NOFS); unlock_extent_cached(io_tree, start, end, &cached);
if (IS_ERR(em)) if (IS_ERR(em))
return NULL; return NULL;
...@@ -1131,7 +1129,7 @@ static int cluster_pages_for_defrag(struct inode *inode, ...@@ -1131,7 +1129,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
ordered = btrfs_lookup_ordered_extent(inode, ordered = btrfs_lookup_ordered_extent(inode,
page_start); page_start);
unlock_extent_cached(tree, page_start, page_end, unlock_extent_cached(tree, page_start, page_end,
&cached_state, GFP_NOFS); &cached_state);
if (!ordered) if (!ordered)
break; break;
...@@ -1191,7 +1189,7 @@ static int cluster_pages_for_defrag(struct inode *inode, ...@@ -1191,7 +1189,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0,
&cached_state, GFP_NOFS); &cached_state);
if (i_done != page_cnt) { if (i_done != page_cnt) {
spin_lock(&BTRFS_I(inode)->lock); spin_lock(&BTRFS_I(inode)->lock);
...@@ -1207,8 +1205,7 @@ static int cluster_pages_for_defrag(struct inode *inode, ...@@ -1207,8 +1205,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
&cached_state); &cached_state);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, unlock_extent_cached(&BTRFS_I(inode)->io_tree,
page_start, page_end - 1, &cached_state, page_start, page_end - 1, &cached_state);
GFP_NOFS);
for (i = 0; i < i_done; i++) { for (i = 0; i < i_done; i++) {
clear_page_dirty_for_io(pages[i]); clear_page_dirty_for_io(pages[i]);
...@@ -1504,7 +1501,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, ...@@ -1504,7 +1501,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
goto out_free; goto out_free;
} }
if (!device->writeable) { if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
btrfs_info(fs_info, btrfs_info(fs_info,
"resizer unable to apply on readonly device %llu", "resizer unable to apply on readonly device %llu",
devid); devid);
...@@ -1529,7 +1526,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, ...@@ -1529,7 +1526,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
} }
} }
if (device->is_tgtdev_for_dev_replace) { if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
ret = -EPERM; ret = -EPERM;
goto out_free; goto out_free;
} }
...@@ -2676,14 +2673,12 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) ...@@ -2676,14 +2673,12 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
goto out; goto out;
} }
mutex_lock(&fs_info->volume_mutex);
if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) { if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) {
ret = btrfs_rm_device(fs_info, NULL, vol_args->devid); ret = btrfs_rm_device(fs_info, NULL, vol_args->devid);
} else { } else {
vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
ret = btrfs_rm_device(fs_info, vol_args->name, 0); ret = btrfs_rm_device(fs_info, vol_args->name, 0);
} }
mutex_unlock(&fs_info->volume_mutex);
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
if (!ret) { if (!ret) {
...@@ -2727,9 +2722,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) ...@@ -2727,9 +2722,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
} }
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
mutex_lock(&fs_info->volume_mutex);
ret = btrfs_rm_device(fs_info, vol_args->name, 0); ret = btrfs_rm_device(fs_info, vol_args->name, 0);
mutex_unlock(&fs_info->volume_mutex);
if (!ret) if (!ret)
btrfs_info(fs_info, "disk deleted %s", vol_args->name); btrfs_info(fs_info, "disk deleted %s", vol_args->name);
...@@ -2754,16 +2747,16 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info, ...@@ -2754,16 +2747,16 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
if (!fi_args) if (!fi_args)
return -ENOMEM; return -ENOMEM;
mutex_lock(&fs_devices->device_list_mutex); rcu_read_lock();
fi_args->num_devices = fs_devices->num_devices; fi_args->num_devices = fs_devices->num_devices;
memcpy(&fi_args->fsid, fs_info->fsid, sizeof(fi_args->fsid));
list_for_each_entry(device, &fs_devices->devices, dev_list) { list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
if (device->devid > fi_args->max_id) if (device->devid > fi_args->max_id)
fi_args->max_id = device->devid; fi_args->max_id = device->devid;
} }
mutex_unlock(&fs_devices->device_list_mutex); rcu_read_unlock();
memcpy(&fi_args->fsid, fs_info->fsid, sizeof(fi_args->fsid));
fi_args->nodesize = fs_info->nodesize; fi_args->nodesize = fs_info->nodesize;
fi_args->sectorsize = fs_info->sectorsize; fi_args->sectorsize = fs_info->sectorsize;
fi_args->clone_alignment = fs_info->sectorsize; fi_args->clone_alignment = fs_info->sectorsize;
...@@ -2780,7 +2773,6 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info, ...@@ -2780,7 +2773,6 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
{ {
struct btrfs_ioctl_dev_info_args *di_args; struct btrfs_ioctl_dev_info_args *di_args;
struct btrfs_device *dev; struct btrfs_device *dev;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
int ret = 0; int ret = 0;
char *s_uuid = NULL; char *s_uuid = NULL;
...@@ -2791,7 +2783,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info, ...@@ -2791,7 +2783,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
if (!btrfs_is_empty_uuid(di_args->uuid)) if (!btrfs_is_empty_uuid(di_args->uuid))
s_uuid = di_args->uuid; s_uuid = di_args->uuid;
mutex_lock(&fs_devices->device_list_mutex); rcu_read_lock();
dev = btrfs_find_device(fs_info, di_args->devid, s_uuid, NULL); dev = btrfs_find_device(fs_info, di_args->devid, s_uuid, NULL);
if (!dev) { if (!dev) {
...@@ -2806,17 +2798,15 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info, ...@@ -2806,17 +2798,15 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
if (dev->name) { if (dev->name) {
struct rcu_string *name; struct rcu_string *name;
rcu_read_lock();
name = rcu_dereference(dev->name); name = rcu_dereference(dev->name);
strncpy(di_args->path, name->str, sizeof(di_args->path)); strncpy(di_args->path, name->str, sizeof(di_args->path) - 1);
rcu_read_unlock();
di_args->path[sizeof(di_args->path) - 1] = 0; di_args->path[sizeof(di_args->path) - 1] = 0;
} else { } else {
di_args->path[0] = '\0'; di_args->path[0] = '\0';
} }
out: out:
mutex_unlock(&fs_devices->device_list_mutex); rcu_read_unlock();
if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
ret = -EFAULT; ret = -EFAULT;
......
...@@ -164,7 +164,6 @@ static int iterate_object_props(struct btrfs_root *root, ...@@ -164,7 +164,6 @@ static int iterate_object_props(struct btrfs_root *root,
size_t), size_t),
void *ctx) void *ctx)
{ {
struct btrfs_fs_info *fs_info = root->fs_info;
int ret; int ret;
char *name_buf = NULL; char *name_buf = NULL;
char *value_buf = NULL; char *value_buf = NULL;
...@@ -215,12 +214,6 @@ static int iterate_object_props(struct btrfs_root *root, ...@@ -215,12 +214,6 @@ static int iterate_object_props(struct btrfs_root *root,
name_ptr = (unsigned long)(di + 1); name_ptr = (unsigned long)(di + 1);
data_ptr = name_ptr + name_len; data_ptr = name_ptr + name_len;
if (verify_dir_item(fs_info, leaf,
path->slots[0], di)) {
ret = -EIO;
goto out;
}
if (name_len <= XATTR_BTRFS_PREFIX_LEN || if (name_len <= XATTR_BTRFS_PREFIX_LEN ||
memcmp_extent_buffer(leaf, XATTR_BTRFS_PREFIX, memcmp_extent_buffer(leaf, XATTR_BTRFS_PREFIX,
name_ptr, name_ptr,
...@@ -430,11 +423,11 @@ static const char *prop_compression_extract(struct inode *inode) ...@@ -430,11 +423,11 @@ static const char *prop_compression_extract(struct inode *inode)
{ {
switch (BTRFS_I(inode)->prop_compress) { switch (BTRFS_I(inode)->prop_compress) {
case BTRFS_COMPRESS_ZLIB: case BTRFS_COMPRESS_ZLIB:
return "zlib";
case BTRFS_COMPRESS_LZO: case BTRFS_COMPRESS_LZO:
return "lzo";
case BTRFS_COMPRESS_ZSTD: case BTRFS_COMPRESS_ZSTD:
return "zstd"; return btrfs_compress_type2str(BTRFS_I(inode)->prop_compress);
default:
break;
} }
return NULL; return NULL;
......
...@@ -2883,8 +2883,7 @@ int btrfs_qgroup_reserve_data(struct inode *inode, ...@@ -2883,8 +2883,7 @@ int btrfs_qgroup_reserve_data(struct inode *inode,
ULIST_ITER_INIT(&uiter); ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(&reserved->range_changed, &uiter))) while ((unode = ulist_next(&reserved->range_changed, &uiter)))
clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val, clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val,
unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL, unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL);
GFP_NOFS);
extent_changeset_release(reserved); extent_changeset_release(reserved);
return ret; return ret;
} }
......
...@@ -231,7 +231,6 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info) ...@@ -231,7 +231,6 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
cur = h + i; cur = h + i;
INIT_LIST_HEAD(&cur->hash_list); INIT_LIST_HEAD(&cur->hash_list);
spin_lock_init(&cur->lock); spin_lock_init(&cur->lock);
init_waitqueue_head(&cur->wait);
} }
x = cmpxchg(&info->stripe_hash_table, NULL, table); x = cmpxchg(&info->stripe_hash_table, NULL, table);
...@@ -595,14 +594,31 @@ static int rbio_can_merge(struct btrfs_raid_bio *last, ...@@ -595,14 +594,31 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
* bio list here, anyone else that wants to * bio list here, anyone else that wants to
* change this stripe needs to do their own rmw. * change this stripe needs to do their own rmw.
*/ */
if (last->operation == BTRFS_RBIO_PARITY_SCRUB || if (last->operation == BTRFS_RBIO_PARITY_SCRUB)
cur->operation == BTRFS_RBIO_PARITY_SCRUB)
return 0; return 0;
if (last->operation == BTRFS_RBIO_REBUILD_MISSING || if (last->operation == BTRFS_RBIO_REBUILD_MISSING)
cur->operation == BTRFS_RBIO_REBUILD_MISSING)
return 0; return 0;
if (last->operation == BTRFS_RBIO_READ_REBUILD) {
int fa = last->faila;
int fb = last->failb;
int cur_fa = cur->faila;
int cur_fb = cur->failb;
if (last->faila >= last->failb) {
fa = last->failb;
fb = last->faila;
}
if (cur->faila >= cur->failb) {
cur_fa = cur->failb;
cur_fb = cur->faila;
}
if (fa != cur_fa || fb != cur_fb)
return 0;
}
return 1; return 1;
} }
...@@ -670,7 +686,6 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio) ...@@ -670,7 +686,6 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
struct btrfs_raid_bio *cur; struct btrfs_raid_bio *cur;
struct btrfs_raid_bio *pending; struct btrfs_raid_bio *pending;
unsigned long flags; unsigned long flags;
DEFINE_WAIT(wait);
struct btrfs_raid_bio *freeit = NULL; struct btrfs_raid_bio *freeit = NULL;
struct btrfs_raid_bio *cache_drop = NULL; struct btrfs_raid_bio *cache_drop = NULL;
int ret = 0; int ret = 0;
...@@ -816,15 +831,6 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio) ...@@ -816,15 +831,6 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
} }
goto done_nolock; goto done_nolock;
/*
* The barrier for this waitqueue_active is not needed,
* we're protected by h->lock and can't miss a wakeup.
*/
} else if (waitqueue_active(&h->wait)) {
spin_unlock(&rbio->bio_list_lock);
spin_unlock_irqrestore(&h->lock, flags);
wake_up(&h->wait);
goto done_nolock;
} }
} }
done: done:
...@@ -858,10 +864,17 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio) ...@@ -858,10 +864,17 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio)
kfree(rbio); kfree(rbio);
} }
static void free_raid_bio(struct btrfs_raid_bio *rbio) static void rbio_endio_bio_list(struct bio *cur, blk_status_t err)
{ {
unlock_stripe(rbio); struct bio *next;
__free_raid_bio(rbio);
while (cur) {
next = cur->bi_next;
cur->bi_next = NULL;
cur->bi_status = err;
bio_endio(cur);
cur = next;
}
} }
/* /*
...@@ -871,20 +884,26 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio) ...@@ -871,20 +884,26 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio)
static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err) static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
{ {
struct bio *cur = bio_list_get(&rbio->bio_list); struct bio *cur = bio_list_get(&rbio->bio_list);
struct bio *next; struct bio *extra;
if (rbio->generic_bio_cnt) if (rbio->generic_bio_cnt)
btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt); btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt);
free_raid_bio(rbio); /*
* At this moment, rbio->bio_list is empty, however since rbio does not
* always have RBIO_RMW_LOCKED_BIT set and rbio is still linked on the
* hash list, rbio may be merged with others so that rbio->bio_list
* becomes non-empty.
* Once unlock_stripe() is done, rbio->bio_list will not be updated any
* more and we can call bio_endio() on all queued bios.
*/
unlock_stripe(rbio);
extra = bio_list_get(&rbio->bio_list);
__free_raid_bio(rbio);
while (cur) { rbio_endio_bio_list(cur, err);
next = cur->bi_next; if (extra)
cur->bi_next = NULL; rbio_endio_bio_list(extra, err);
cur->bi_status = err;
bio_endio(cur);
cur = next;
}
} }
/* /*
...@@ -1435,14 +1454,13 @@ static int fail_bio_stripe(struct btrfs_raid_bio *rbio, ...@@ -1435,14 +1454,13 @@ static int fail_bio_stripe(struct btrfs_raid_bio *rbio,
*/ */
static void set_bio_pages_uptodate(struct bio *bio) static void set_bio_pages_uptodate(struct bio *bio)
{ {
struct bio_vec bvec; struct bio_vec *bvec;
struct bvec_iter iter; int i;
if (bio_flagged(bio, BIO_CLONED)) ASSERT(!bio_flagged(bio, BIO_CLONED));
bio->bi_iter = btrfs_io_bio(bio)->iter;
bio_for_each_segment(bvec, bio, iter) bio_for_each_segment_all(bvec, bio, i)
SetPageUptodate(bvec.bv_page); SetPageUptodate(bvec->bv_page);
} }
/* /*
...@@ -1969,7 +1987,22 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) ...@@ -1969,7 +1987,22 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
cleanup_io: cleanup_io:
if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
if (err == BLK_STS_OK) /*
* - In case of two failures, where rbio->failb != -1:
*
* Do not cache this rbio since the above read reconstruction
* (raid6_datap_recov() or raid6_2data_recov()) may have
* changed some content of stripes which are not identical to
* on-disk content any more, otherwise, a later write/recover
* may steal stripe_pages from this rbio and end up with
* corruptions or rebuild failures.
*
* - In case of single failure, where rbio->failb == -1:
*
* Cache this rbio iff the above read reconstruction is
* excuted without problems.
*/
if (err == BLK_STS_OK && rbio->failb < 0)
cache_rbio_pages(rbio); cache_rbio_pages(rbio);
else else
clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
...@@ -2170,11 +2203,21 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio, ...@@ -2170,11 +2203,21 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
} }
/* /*
* reconstruct from the q stripe if they are * Loop retry:
* asking for mirror 3 * for 'mirror == 2', reconstruct from all other stripes.
* for 'mirror_num > 2', select a stripe to fail on every retry.
*/ */
if (mirror_num == 3) if (mirror_num > 2) {
rbio->failb = rbio->real_stripes - 2; /*
* 'mirror == 3' is to fail the p stripe and
* reconstruct from the q stripe. 'mirror > 3' is to
* fail a data stripe and reconstruct from p+q stripe.
*/
rbio->failb = rbio->real_stripes - (mirror_num - 1);
ASSERT(rbio->failb > 0);
if (rbio->failb <= rbio->faila)
rbio->failb--;
}
ret = lock_stripe_add(rbio); ret = lock_stripe_add(rbio);
......
...@@ -606,8 +606,7 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, ...@@ -606,8 +606,7 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
} }
/* Walk up to the next node that needs to be processed */ /* Walk up to the next node that needs to be processed */
static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, static int walk_up_tree(struct btrfs_path *path, int *level)
int *level)
{ {
int l; int l;
...@@ -984,7 +983,6 @@ void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start, ...@@ -984,7 +983,6 @@ void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
{ {
struct btrfs_path *path; struct btrfs_path *path;
struct btrfs_root *root;
struct extent_buffer *eb; struct extent_buffer *eb;
u64 bytenr = 0, num_bytes = 0; u64 bytenr = 0, num_bytes = 0;
int ret, level; int ret, level;
...@@ -1014,7 +1012,7 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) ...@@ -1014,7 +1012,7 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
&bytenr, &num_bytes); &bytenr, &num_bytes);
if (ret) if (ret)
break; break;
ret = walk_up_tree(root, path, &level); ret = walk_up_tree(path, &level);
if (ret < 0) if (ret < 0)
break; break;
if (ret > 0) { if (ret > 0) {
......
...@@ -387,13 +387,6 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, ...@@ -387,13 +387,6 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid); WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid);
WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len); WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len);
ptr = (unsigned long)(ref + 1); ptr = (unsigned long)(ref + 1);
ret = btrfs_is_name_len_valid(leaf, path->slots[0], ptr,
name_len);
if (!ret) {
err = -EIO;
goto out;
}
WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len)); WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len));
*sequence = btrfs_root_ref_sequence(leaf, ref); *sequence = btrfs_root_ref_sequence(leaf, ref);
......
...@@ -301,6 +301,11 @@ static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); ...@@ -301,6 +301,11 @@ static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
static void scrub_put_ctx(struct scrub_ctx *sctx); static void scrub_put_ctx(struct scrub_ctx *sctx);
static inline int scrub_is_page_on_raid56(struct scrub_page *page)
{
return page->recover &&
(page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
}
static void scrub_pending_bio_inc(struct scrub_ctx *sctx) static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
{ {
...@@ -1323,15 +1328,34 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) ...@@ -1323,15 +1328,34 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
* could happen otherwise that a correct page would be * could happen otherwise that a correct page would be
* overwritten by a bad one). * overwritten by a bad one).
*/ */
for (mirror_index = 0; for (mirror_index = 0; ;mirror_index++) {
mirror_index < BTRFS_MAX_MIRRORS &&
sblocks_for_recheck[mirror_index].page_count > 0;
mirror_index++) {
struct scrub_block *sblock_other; struct scrub_block *sblock_other;
if (mirror_index == failed_mirror_index) if (mirror_index == failed_mirror_index)
continue; continue;
sblock_other = sblocks_for_recheck + mirror_index;
/* raid56's mirror can be more than BTRFS_MAX_MIRRORS */
if (!scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
if (mirror_index >= BTRFS_MAX_MIRRORS)
break;
if (!sblocks_for_recheck[mirror_index].page_count)
break;
sblock_other = sblocks_for_recheck + mirror_index;
} else {
struct scrub_recover *r = sblock_bad->pagev[0]->recover;
int max_allowed = r->bbio->num_stripes -
r->bbio->num_tgtdevs;
if (mirror_index >= max_allowed)
break;
if (!sblocks_for_recheck[1].page_count)
break;
ASSERT(failed_mirror_index == 0);
sblock_other = sblocks_for_recheck + 1;
sblock_other->pagev[0]->mirror_num = 1 + mirror_index;
}
/* build and submit the bios, check checksums */ /* build and submit the bios, check checksums */
scrub_recheck_block(fs_info, sblock_other, 0); scrub_recheck_block(fs_info, sblock_other, 0);
...@@ -1666,49 +1690,32 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, ...@@ -1666,49 +1690,32 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
return 0; return 0;
} }
struct scrub_bio_ret {
struct completion event;
blk_status_t status;
};
static void scrub_bio_wait_endio(struct bio *bio) static void scrub_bio_wait_endio(struct bio *bio)
{ {
struct scrub_bio_ret *ret = bio->bi_private; complete(bio->bi_private);
ret->status = bio->bi_status;
complete(&ret->event);
}
static inline int scrub_is_page_on_raid56(struct scrub_page *page)
{
return page->recover &&
(page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
} }
static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
struct bio *bio, struct bio *bio,
struct scrub_page *page) struct scrub_page *page)
{ {
struct scrub_bio_ret done; DECLARE_COMPLETION_ONSTACK(done);
int ret; int ret;
int mirror_num;
init_completion(&done.event);
done.status = 0;
bio->bi_iter.bi_sector = page->logical >> 9; bio->bi_iter.bi_sector = page->logical >> 9;
bio->bi_private = &done; bio->bi_private = &done;
bio->bi_end_io = scrub_bio_wait_endio; bio->bi_end_io = scrub_bio_wait_endio;
mirror_num = page->sblock->pagev[0]->mirror_num;
ret = raid56_parity_recover(fs_info, bio, page->recover->bbio, ret = raid56_parity_recover(fs_info, bio, page->recover->bbio,
page->recover->map_length, page->recover->map_length,
page->mirror_num, 0); mirror_num, 0);
if (ret) if (ret)
return ret; return ret;
wait_for_completion_io(&done.event); wait_for_completion_io(&done);
if (done.status) return blk_status_to_errno(bio->bi_status);
return -EIO;
return 0;
} }
/* /*
...@@ -2535,7 +2542,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, ...@@ -2535,7 +2542,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
} }
WARN_ON(sblock->page_count == 0); WARN_ON(sblock->page_count == 0);
if (dev->missing) { if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
/* /*
* This case should only be hit for RAID 5/6 device replace. See * This case should only be hit for RAID 5/6 device replace. See
* the comment in scrub_missing_raid56_pages() for details. * the comment in scrub_missing_raid56_pages() for details.
...@@ -2870,7 +2877,7 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity, ...@@ -2870,7 +2877,7 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity,
u8 csum[BTRFS_CSUM_SIZE]; u8 csum[BTRFS_CSUM_SIZE];
u32 blocksize; u32 blocksize;
if (dev->missing) { if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
scrub_parity_mark_sectors_error(sparity, logical, len); scrub_parity_mark_sectors_error(sparity, logical, len);
return 0; return 0;
} }
...@@ -4112,12 +4119,14 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, ...@@ -4112,12 +4119,14 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
mutex_lock(&fs_info->fs_devices->device_list_mutex); mutex_lock(&fs_info->fs_devices->device_list_mutex);
dev = btrfs_find_device(fs_info, devid, NULL, NULL); dev = btrfs_find_device(fs_info, devid, NULL, NULL);
if (!dev || (dev->missing && !is_dev_replace)) { if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
!is_dev_replace)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex); mutex_unlock(&fs_info->fs_devices->device_list_mutex);
return -ENODEV; return -ENODEV;
} }
if (!is_dev_replace && !readonly && !dev->writeable) { if (!is_dev_replace && !readonly &&
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex); mutex_unlock(&fs_info->fs_devices->device_list_mutex);
rcu_read_lock(); rcu_read_lock();
name = rcu_dereference(dev->name); name = rcu_dereference(dev->name);
...@@ -4128,14 +4137,15 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, ...@@ -4128,14 +4137,15 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
} }
mutex_lock(&fs_info->scrub_lock); mutex_lock(&fs_info->scrub_lock);
if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) { if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
mutex_unlock(&fs_info->scrub_lock); mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex); mutex_unlock(&fs_info->fs_devices->device_list_mutex);
return -EIO; return -EIO;
} }
btrfs_dev_replace_lock(&fs_info->dev_replace, 0); btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
if (dev->scrub_device || if (dev->scrub_ctx ||
(!is_dev_replace && (!is_dev_replace &&
btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) { btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
...@@ -4160,7 +4170,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, ...@@ -4160,7 +4170,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
return PTR_ERR(sctx); return PTR_ERR(sctx);
} }
sctx->readonly = readonly; sctx->readonly = readonly;
dev->scrub_device = sctx; dev->scrub_ctx = sctx;
mutex_unlock(&fs_info->fs_devices->device_list_mutex); mutex_unlock(&fs_info->fs_devices->device_list_mutex);
/* /*
...@@ -4195,7 +4205,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, ...@@ -4195,7 +4205,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
memcpy(progress, &sctx->stat, sizeof(*progress)); memcpy(progress, &sctx->stat, sizeof(*progress));
mutex_lock(&fs_info->scrub_lock); mutex_lock(&fs_info->scrub_lock);
dev->scrub_device = NULL; dev->scrub_ctx = NULL;
scrub_workers_put(fs_info); scrub_workers_put(fs_info);
mutex_unlock(&fs_info->scrub_lock); mutex_unlock(&fs_info->scrub_lock);
...@@ -4252,16 +4262,16 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info, ...@@ -4252,16 +4262,16 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info,
struct scrub_ctx *sctx; struct scrub_ctx *sctx;
mutex_lock(&fs_info->scrub_lock); mutex_lock(&fs_info->scrub_lock);
sctx = dev->scrub_device; sctx = dev->scrub_ctx;
if (!sctx) { if (!sctx) {
mutex_unlock(&fs_info->scrub_lock); mutex_unlock(&fs_info->scrub_lock);
return -ENOTCONN; return -ENOTCONN;
} }
atomic_inc(&sctx->cancel_req); atomic_inc(&sctx->cancel_req);
while (dev->scrub_device) { while (dev->scrub_ctx) {
mutex_unlock(&fs_info->scrub_lock); mutex_unlock(&fs_info->scrub_lock);
wait_event(fs_info->scrub_pause_wait, wait_event(fs_info->scrub_pause_wait,
dev->scrub_device == NULL); dev->scrub_ctx == NULL);
mutex_lock(&fs_info->scrub_lock); mutex_lock(&fs_info->scrub_lock);
} }
mutex_unlock(&fs_info->scrub_lock); mutex_unlock(&fs_info->scrub_lock);
...@@ -4278,7 +4288,7 @@ int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid, ...@@ -4278,7 +4288,7 @@ int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
mutex_lock(&fs_info->fs_devices->device_list_mutex); mutex_lock(&fs_info->fs_devices->device_list_mutex);
dev = btrfs_find_device(fs_info, devid, NULL, NULL); dev = btrfs_find_device(fs_info, devid, NULL, NULL);
if (dev) if (dev)
sctx = dev->scrub_device; sctx = dev->scrub_ctx;
if (sctx) if (sctx)
memcpy(progress, &sctx->stat, sizeof(*progress)); memcpy(progress, &sctx->stat, sizeof(*progress));
mutex_unlock(&fs_info->fs_devices->device_list_mutex); mutex_unlock(&fs_info->fs_devices->device_list_mutex);
...@@ -4478,8 +4488,7 @@ static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len, ...@@ -4478,8 +4488,7 @@ static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len,
free_extent_map(em); free_extent_map(em);
out_unlock: out_unlock:
unlock_extent_cached(io_tree, lockstart, lockend, &cached_state, unlock_extent_cached(io_tree, lockstart, lockend, &cached_state);
GFP_NOFS);
return ret; return ret;
} }
......
...@@ -1059,12 +1059,6 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, ...@@ -1059,12 +1059,6 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
} }
} }
ret = btrfs_is_name_len_valid(eb, path->slots[0],
(unsigned long)(di + 1), name_len + data_len);
if (!ret) {
ret = -EIO;
goto out;
}
if (name_len + data_len > buf_len) { if (name_len + data_len > buf_len) {
buf_len = name_len + data_len; buf_len = name_len + data_len;
if (is_vmalloc_addr(buf)) { if (is_vmalloc_addr(buf)) {
......
...@@ -61,12 +61,21 @@ ...@@ -61,12 +61,21 @@
#include "tests/btrfs-tests.h" #include "tests/btrfs-tests.h"
#include "qgroup.h" #include "qgroup.h"
#include "backref.h"
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/btrfs.h> #include <trace/events/btrfs.h>
static const struct super_operations btrfs_super_ops; static const struct super_operations btrfs_super_ops;
/*
* Types for mounting the default subvolume and a subvolume explicitly
* requested by subvol=/path. That way the callchain is straightforward and we
* don't have to play tricks with the mount options and recursive calls to
* btrfs_mount.
*
* The new btrfs_root_fs_type also servers as a tag for the bdev_holder.
*/
static struct file_system_type btrfs_fs_type; static struct file_system_type btrfs_fs_type;
static struct file_system_type btrfs_root_fs_type;
static int btrfs_remount(struct super_block *sb, int *flags, char *data); static int btrfs_remount(struct super_block *sb, int *flags, char *data);
...@@ -98,30 +107,6 @@ const char *btrfs_decode_error(int errno) ...@@ -98,30 +107,6 @@ const char *btrfs_decode_error(int errno)
return errstr; return errstr;
} }
/* btrfs handle error by forcing the filesystem readonly */
static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
{
struct super_block *sb = fs_info->sb;
if (sb_rdonly(sb))
return;
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
sb->s_flags |= SB_RDONLY;
btrfs_info(fs_info, "forced readonly");
/*
* Note that a running device replace operation is not
* canceled here although there is no way to update
* the progress. It would add the risk of a deadlock,
* therefore the canceling is omitted. The only penalty
* is that some I/O remains active until the procedure
* completes. The next time when the filesystem is
* mounted writeable again, the device replace
* operation continues.
*/
}
}
/* /*
* __btrfs_handle_fs_error decodes expected errors from the caller and * __btrfs_handle_fs_error decodes expected errors from the caller and
* invokes the approciate error response. * invokes the approciate error response.
...@@ -168,8 +153,23 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function ...@@ -168,8 +153,23 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state); set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
/* Don't go through full error handling during mount */ /* Don't go through full error handling during mount */
if (sb->s_flags & SB_BORN) if (!(sb->s_flags & SB_BORN))
btrfs_handle_error(fs_info); return;
if (sb_rdonly(sb))
return;
/* btrfs handle error by forcing the filesystem readonly */
sb->s_flags |= SB_RDONLY;
btrfs_info(fs_info, "forced readonly");
/*
* Note that a running device replace operation is not canceled here
* although there is no way to update the progress. It would add the
* risk of a deadlock, therefore the canceling is omitted. The only
* penalty is that some I/O remains active until the procedure
* completes. The next time when the filesystem is mounted writeable
* again, the device replace operation continues.
*/
} }
#ifdef CONFIG_PRINTK #ifdef CONFIG_PRINTK
...@@ -405,7 +405,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, ...@@ -405,7 +405,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
unsigned long new_flags) unsigned long new_flags)
{ {
substring_t args[MAX_OPT_ARGS]; substring_t args[MAX_OPT_ARGS];
char *p, *num, *orig = NULL; char *p, *num;
u64 cache_gen; u64 cache_gen;
int intarg; int intarg;
int ret = 0; int ret = 0;
...@@ -428,16 +428,6 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, ...@@ -428,16 +428,6 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
if (!options) if (!options)
goto check; goto check;
/*
* strsep changes the string, duplicate it because parse_options
* gets called twice
*/
options = kstrdup(options, GFP_KERNEL);
if (!options)
return -ENOMEM;
orig = options;
while ((p = strsep(&options, ",")) != NULL) { while ((p = strsep(&options, ",")) != NULL) {
int token; int token;
if (!*p) if (!*p)
...@@ -454,7 +444,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, ...@@ -454,7 +444,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
case Opt_subvolrootid: case Opt_subvolrootid:
case Opt_device: case Opt_device:
/* /*
* These are parsed by btrfs_parse_early_options * These are parsed by btrfs_parse_subvol_options
* and btrfs_parse_early_options
* and can be happily ignored here. * and can be happily ignored here.
*/ */
break; break;
...@@ -877,7 +868,6 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, ...@@ -877,7 +868,6 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
btrfs_info(info, "disk space caching is enabled"); btrfs_info(info, "disk space caching is enabled");
if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE)) if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
btrfs_info(info, "using free space tree"); btrfs_info(info, "using free space tree");
kfree(orig);
return ret; return ret;
} }
...@@ -888,11 +878,60 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, ...@@ -888,11 +878,60 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
* only when we need to allocate a new super block. * only when we need to allocate a new super block.
*/ */
static int btrfs_parse_early_options(const char *options, fmode_t flags, static int btrfs_parse_early_options(const char *options, fmode_t flags,
void *holder, char **subvol_name, u64 *subvol_objectid, void *holder, struct btrfs_fs_devices **fs_devices)
struct btrfs_fs_devices **fs_devices)
{ {
substring_t args[MAX_OPT_ARGS]; substring_t args[MAX_OPT_ARGS];
char *device_name, *opts, *orig, *p; char *device_name, *opts, *orig, *p;
int error = 0;
if (!options)
return 0;
/*
* strsep changes the string, duplicate it because btrfs_parse_options
* gets called later
*/
opts = kstrdup(options, GFP_KERNEL);
if (!opts)
return -ENOMEM;
orig = opts;
while ((p = strsep(&opts, ",")) != NULL) {
int token;
if (!*p)
continue;
token = match_token(p, tokens, args);
if (token == Opt_device) {
device_name = match_strdup(&args[0]);
if (!device_name) {
error = -ENOMEM;
goto out;
}
error = btrfs_scan_one_device(device_name,
flags, holder, fs_devices);
kfree(device_name);
if (error)
goto out;
}
}
out:
kfree(orig);
return error;
}
/*
* Parse mount options that are related to subvolume id
*
* The value is later passed to mount_subvol()
*/
static int btrfs_parse_subvol_options(const char *options, fmode_t flags,
char **subvol_name, u64 *subvol_objectid)
{
substring_t args[MAX_OPT_ARGS];
char *opts, *orig, *p;
char *num = NULL; char *num = NULL;
int error = 0; int error = 0;
...@@ -900,8 +939,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, ...@@ -900,8 +939,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
return 0; return 0;
/* /*
* strsep changes the string, duplicate it because parse_options * strsep changes the string, duplicate it because
* gets called twice * btrfs_parse_early_options gets called later
*/ */
opts = kstrdup(options, GFP_KERNEL); opts = kstrdup(options, GFP_KERNEL);
if (!opts) if (!opts)
...@@ -940,18 +979,6 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, ...@@ -940,18 +979,6 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
case Opt_subvolrootid: case Opt_subvolrootid:
pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n"); pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n");
break; break;
case Opt_device:
device_name = match_strdup(&args[0]);
if (!device_name) {
error = -ENOMEM;
goto out;
}
error = btrfs_scan_one_device(device_name,
flags, holder, fs_devices);
kfree(device_name);
if (error)
goto out;
break;
default: default:
break; break;
} }
...@@ -1243,7 +1270,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait) ...@@ -1243,7 +1270,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
{ {
struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb); struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
char *compress_type; const char *compress_type;
if (btrfs_test_opt(info, DEGRADED)) if (btrfs_test_opt(info, DEGRADED))
seq_puts(seq, ",degraded"); seq_puts(seq, ",degraded");
...@@ -1259,12 +1286,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) ...@@ -1259,12 +1286,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
num_online_cpus() + 2, 8)) num_online_cpus() + 2, 8))
seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
if (btrfs_test_opt(info, COMPRESS)) { if (btrfs_test_opt(info, COMPRESS)) {
if (info->compress_type == BTRFS_COMPRESS_ZLIB) compress_type = btrfs_compress_type2str(info->compress_type);
compress_type = "zlib";
else if (info->compress_type == BTRFS_COMPRESS_LZO)
compress_type = "lzo";
else
compress_type = "zstd";
if (btrfs_test_opt(info, FORCE_COMPRESS)) if (btrfs_test_opt(info, FORCE_COMPRESS))
seq_printf(seq, ",compress-force=%s", compress_type); seq_printf(seq, ",compress-force=%s", compress_type);
else else
...@@ -1365,86 +1387,12 @@ static inline int is_subvolume_inode(struct inode *inode) ...@@ -1365,86 +1387,12 @@ static inline int is_subvolume_inode(struct inode *inode)
return 0; return 0;
} }
/*
* This will add subvolid=0 to the argument string while removing any subvol=
* and subvolid= arguments to make sure we get the top-level root for path
* walking to the subvol we want.
*/
static char *setup_root_args(char *args)
{
char *buf, *dst, *sep;
if (!args)
return kstrdup("subvolid=0", GFP_KERNEL);
/* The worst case is that we add ",subvolid=0" to the end. */
buf = dst = kmalloc(strlen(args) + strlen(",subvolid=0") + 1,
GFP_KERNEL);
if (!buf)
return NULL;
while (1) {
sep = strchrnul(args, ',');
if (!strstarts(args, "subvol=") &&
!strstarts(args, "subvolid=")) {
memcpy(dst, args, sep - args);
dst += sep - args;
*dst++ = ',';
}
if (*sep)
args = sep + 1;
else
break;
}
strcpy(dst, "subvolid=0");
return buf;
}
static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
int flags, const char *device_name, const char *device_name, struct vfsmount *mnt)
char *data)
{ {
struct dentry *root; struct dentry *root;
struct vfsmount *mnt = NULL;
char *newargs;
int ret; int ret;
newargs = setup_root_args(data);
if (!newargs) {
root = ERR_PTR(-ENOMEM);
goto out;
}
mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs);
if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) {
if (flags & SB_RDONLY) {
mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~SB_RDONLY,
device_name, newargs);
} else {
mnt = vfs_kern_mount(&btrfs_fs_type, flags | SB_RDONLY,
device_name, newargs);
if (IS_ERR(mnt)) {
root = ERR_CAST(mnt);
mnt = NULL;
goto out;
}
down_write(&mnt->mnt_sb->s_umount);
ret = btrfs_remount(mnt->mnt_sb, &flags, NULL);
up_write(&mnt->mnt_sb->s_umount);
if (ret < 0) {
root = ERR_PTR(ret);
goto out;
}
}
}
if (IS_ERR(mnt)) {
root = ERR_CAST(mnt);
mnt = NULL;
goto out;
}
if (!subvol_name) { if (!subvol_name) {
if (!subvol_objectid) { if (!subvol_objectid) {
ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb), ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
...@@ -1500,7 +1448,6 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, ...@@ -1500,7 +1448,6 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
out: out:
mntput(mnt); mntput(mnt);
kfree(newargs);
kfree(subvol_name); kfree(subvol_name);
return root; return root;
} }
...@@ -1558,11 +1505,11 @@ static int setup_security_options(struct btrfs_fs_info *fs_info, ...@@ -1558,11 +1505,11 @@ static int setup_security_options(struct btrfs_fs_info *fs_info,
/* /*
* Find a superblock for the given device / mount point. * Find a superblock for the given device / mount point.
* *
* Note: This is based on get_sb_bdev from fs/super.c with a few additions * Note: This is based on mount_bdev from fs/super.c with a few additions
* for multiple device setup. Make sure to keep it in sync. * for multiple device setup. Make sure to keep it in sync.
*/ */
static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
const char *device_name, void *data) int flags, const char *device_name, void *data)
{ {
struct block_device *bdev = NULL; struct block_device *bdev = NULL;
struct super_block *s; struct super_block *s;
...@@ -1570,27 +1517,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, ...@@ -1570,27 +1517,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
struct btrfs_fs_info *fs_info = NULL; struct btrfs_fs_info *fs_info = NULL;
struct security_mnt_opts new_sec_opts; struct security_mnt_opts new_sec_opts;
fmode_t mode = FMODE_READ; fmode_t mode = FMODE_READ;
char *subvol_name = NULL;
u64 subvol_objectid = 0;
int error = 0; int error = 0;
if (!(flags & SB_RDONLY)) if (!(flags & SB_RDONLY))
mode |= FMODE_WRITE; mode |= FMODE_WRITE;
error = btrfs_parse_early_options(data, mode, fs_type, error = btrfs_parse_early_options(data, mode, fs_type,
&subvol_name, &subvol_objectid,
&fs_devices); &fs_devices);
if (error) { if (error) {
kfree(subvol_name);
return ERR_PTR(error); return ERR_PTR(error);
} }
if (subvol_name || subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
/* mount_subvol() will free subvol_name. */
return mount_subvol(subvol_name, subvol_objectid, flags,
device_name, data);
}
security_init_mnt_opts(&new_sec_opts); security_init_mnt_opts(&new_sec_opts);
if (data) { if (data) {
error = parse_security_options(data, &new_sec_opts); error = parse_security_options(data, &new_sec_opts);
...@@ -1674,6 +1611,84 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, ...@@ -1674,6 +1611,84 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
return ERR_PTR(error); return ERR_PTR(error);
} }
/*
* Mount function which is called by VFS layer.
*
* In order to allow mounting a subvolume directly, btrfs uses mount_subtree()
* which needs vfsmount* of device's root (/). This means device's root has to
* be mounted internally in any case.
*
* Operation flow:
* 1. Parse subvol id related options for later use in mount_subvol().
*
* 2. Mount device's root (/) by calling vfs_kern_mount().
*
* NOTE: vfs_kern_mount() is used by VFS to call btrfs_mount() in the
* first place. In order to avoid calling btrfs_mount() again, we use
* different file_system_type which is not registered to VFS by
* register_filesystem() (btrfs_root_fs_type). As a result,
* btrfs_mount_root() is called. The return value will be used by
* mount_subtree() in mount_subvol().
*
* 3. Call mount_subvol() to get the dentry of subvolume. Since there is
* "btrfs subvolume set-default", mount_subvol() is called always.
*/
static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
const char *device_name, void *data)
{
struct vfsmount *mnt_root;
struct dentry *root;
fmode_t mode = FMODE_READ;
char *subvol_name = NULL;
u64 subvol_objectid = 0;
int error = 0;
if (!(flags & SB_RDONLY))
mode |= FMODE_WRITE;
error = btrfs_parse_subvol_options(data, mode,
&subvol_name, &subvol_objectid);
if (error) {
kfree(subvol_name);
return ERR_PTR(error);
}
/* mount device's root (/) */
mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags, device_name, data);
if (PTR_ERR_OR_ZERO(mnt_root) == -EBUSY) {
if (flags & SB_RDONLY) {
mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
flags & ~SB_RDONLY, device_name, data);
} else {
mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
flags | SB_RDONLY, device_name, data);
if (IS_ERR(mnt_root)) {
root = ERR_CAST(mnt_root);
goto out;
}
down_write(&mnt_root->mnt_sb->s_umount);
error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL);
up_write(&mnt_root->mnt_sb->s_umount);
if (error < 0) {
root = ERR_PTR(error);
mntput(mnt_root);
goto out;
}
}
}
if (IS_ERR(mnt_root)) {
root = ERR_CAST(mnt_root);
goto out;
}
/* mount_subvol() will free subvol_name and mnt_root */
root = mount_subvol(subvol_name, subvol_objectid, device_name, mnt_root);
out:
return root;
}
static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
int new_pool_size, int old_pool_size) int new_pool_size, int old_pool_size)
{ {
...@@ -1820,7 +1835,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) ...@@ -1820,7 +1835,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
goto restore; goto restore;
} }
if (!btrfs_check_rw_degradable(fs_info)) { if (!btrfs_check_rw_degradable(fs_info, NULL)) {
btrfs_warn(fs_info, btrfs_warn(fs_info,
"too many missing devices, writeable remount is not allowed"); "too many missing devices, writeable remount is not allowed");
ret = -EACCES; ret = -EACCES;
...@@ -1972,8 +1987,10 @@ static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, ...@@ -1972,8 +1987,10 @@ static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
if (!device->in_fs_metadata || !device->bdev || if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
device->is_tgtdev_for_dev_replace) &device->dev_state) ||
!device->bdev ||
test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
continue; continue;
if (i >= nr_devices) if (i >= nr_devices)
...@@ -2174,6 +2191,15 @@ static struct file_system_type btrfs_fs_type = { ...@@ -2174,6 +2191,15 @@ static struct file_system_type btrfs_fs_type = {
.kill_sb = btrfs_kill_super, .kill_sb = btrfs_kill_super,
.fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA, .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
}; };
static struct file_system_type btrfs_root_fs_type = {
.owner = THIS_MODULE,
.name = "btrfs",
.mount = btrfs_mount_root,
.kill_sb = btrfs_kill_super,
.fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
};
MODULE_ALIAS_FS("btrfs"); MODULE_ALIAS_FS("btrfs");
static int btrfs_control_open(struct inode *inode, struct file *file) static int btrfs_control_open(struct inode *inode, struct file *file)
...@@ -2207,11 +2233,11 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, ...@@ -2207,11 +2233,11 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
switch (cmd) { switch (cmd) {
case BTRFS_IOC_SCAN_DEV: case BTRFS_IOC_SCAN_DEV:
ret = btrfs_scan_one_device(vol->name, FMODE_READ, ret = btrfs_scan_one_device(vol->name, FMODE_READ,
&btrfs_fs_type, &fs_devices); &btrfs_root_fs_type, &fs_devices);
break; break;
case BTRFS_IOC_DEVICES_READY: case BTRFS_IOC_DEVICES_READY:
ret = btrfs_scan_one_device(vol->name, FMODE_READ, ret = btrfs_scan_one_device(vol->name, FMODE_READ,
&btrfs_fs_type, &fs_devices); &btrfs_root_fs_type, &fs_devices);
if (ret) if (ret)
break; break;
ret = !(fs_devices->num_devices == fs_devices->total_devices); ret = !(fs_devices->num_devices == fs_devices->total_devices);
...@@ -2269,7 +2295,7 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root) ...@@ -2269,7 +2295,7 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
while (cur_devices) { while (cur_devices) {
head = &cur_devices->devices; head = &cur_devices->devices;
list_for_each_entry(dev, head, dev_list) { list_for_each_entry(dev, head, dev_list) {
if (dev->missing) if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
continue; continue;
if (!dev->name) if (!dev->name)
continue; continue;
...@@ -2324,7 +2350,7 @@ static struct miscdevice btrfs_misc = { ...@@ -2324,7 +2350,7 @@ static struct miscdevice btrfs_misc = {
MODULE_ALIAS_MISCDEV(BTRFS_MINOR); MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
MODULE_ALIAS("devname:btrfs-control"); MODULE_ALIAS("devname:btrfs-control");
static int btrfs_interface_init(void) static int __init btrfs_interface_init(void)
{ {
return misc_register(&btrfs_misc); return misc_register(&btrfs_misc);
} }
...@@ -2334,7 +2360,7 @@ static void btrfs_interface_exit(void) ...@@ -2334,7 +2360,7 @@ static void btrfs_interface_exit(void)
misc_deregister(&btrfs_misc); misc_deregister(&btrfs_misc);
} }
static void btrfs_print_mod_info(void) static void __init btrfs_print_mod_info(void)
{ {
pr_info("Btrfs loaded, crc32c=%s" pr_info("Btrfs loaded, crc32c=%s"
#ifdef CONFIG_BTRFS_DEBUG #ifdef CONFIG_BTRFS_DEBUG
......
...@@ -897,7 +897,7 @@ static int btrfs_init_debugfs(void) ...@@ -897,7 +897,7 @@ static int btrfs_init_debugfs(void)
return 0; return 0;
} }
int btrfs_init_sysfs(void) int __init btrfs_init_sysfs(void)
{ {
int ret; int ret;
......
...@@ -277,6 +277,9 @@ int btrfs_run_sanity_tests(void) ...@@ -277,6 +277,9 @@ int btrfs_run_sanity_tests(void)
goto out; goto out;
} }
} }
ret = btrfs_test_extent_map();
if (ret)
goto out;
out: out:
btrfs_destroy_test_fs(); btrfs_destroy_test_fs();
return ret; return ret;
......
...@@ -33,6 +33,7 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize); ...@@ -33,6 +33,7 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize);
int btrfs_test_inodes(u32 sectorsize, u32 nodesize); int btrfs_test_inodes(u32 sectorsize, u32 nodesize);
int btrfs_test_qgroups(u32 sectorsize, u32 nodesize); int btrfs_test_qgroups(u32 sectorsize, u32 nodesize);
int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize); int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize);
int btrfs_test_extent_map(void);
struct inode *btrfs_new_test_inode(void); struct inode *btrfs_new_test_inode(void);
struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize); struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize);
void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info); void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info);
......
/*
* Copyright (C) 2017 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/types.h>
#include "btrfs-tests.h"
#include "../ctree.h"
static void free_extent_map_tree(struct extent_map_tree *em_tree)
{
struct extent_map *em;
struct rb_node *node;
while (!RB_EMPTY_ROOT(&em_tree->map)) {
node = rb_first(&em_tree->map);
em = rb_entry(node, struct extent_map, rb_node);
remove_extent_mapping(em_tree, em);
#ifdef CONFIG_BTRFS_DEBUG
if (refcount_read(&em->refs) != 1) {
test_msg(
"em leak: em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx) refs %d\n",
em->start, em->len, em->block_start,
em->block_len, refcount_read(&em->refs));
refcount_set(&em->refs, 1);
}
#endif
free_extent_map(em);
}
}
/*
* Test scenario:
*
* Suppose that no extent map has been loaded into memory yet, there is a file
* extent [0, 16K), followed by another file extent [16K, 20K), two dio reads
* are entering btrfs_get_extent() concurrently, t1 is reading [8K, 16K), t2 is
* reading [0, 8K)
*
* t1 t2
* btrfs_get_extent() btrfs_get_extent()
* -> lookup_extent_mapping() ->lookup_extent_mapping()
* -> add_extent_mapping(0, 16K)
* -> return em
* ->add_extent_mapping(0, 16K)
* -> #handle -EEXIST
*/
static void test_case_1(struct extent_map_tree *em_tree)
{
struct extent_map *em;
u64 start = 0;
u64 len = SZ_8K;
int ret;
em = alloc_extent_map();
if (!em)
/* Skip the test on error. */
return;
/* Add [0, 16K) */
em->start = 0;
em->len = SZ_16K;
em->block_start = 0;
em->block_len = SZ_16K;
ret = add_extent_mapping(em_tree, em, 0);
ASSERT(ret == 0);
free_extent_map(em);
/* Add [16K, 20K) following [0, 16K) */
em = alloc_extent_map();
if (!em)
goto out;
em->start = SZ_16K;
em->len = SZ_4K;
em->block_start = SZ_32K; /* avoid merging */
em->block_len = SZ_4K;
ret = add_extent_mapping(em_tree, em, 0);
ASSERT(ret == 0);
free_extent_map(em);
em = alloc_extent_map();
if (!em)
goto out;
/* Add [0, 8K), should return [0, 16K) instead. */
em->start = start;
em->len = len;
em->block_start = start;
em->block_len = len;
ret = btrfs_add_extent_mapping(em_tree, &em, em->start, em->len);
if (ret)
test_msg("case1 [%llu %llu]: ret %d\n", start, start + len, ret);
if (em &&
(em->start != 0 || extent_map_end(em) != SZ_16K ||
em->block_start != 0 || em->block_len != SZ_16K))
test_msg(
"case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu\n",
start, start + len, ret, em->start, em->len,
em->block_start, em->block_len);
free_extent_map(em);
out:
/* free memory */
free_extent_map_tree(em_tree);
}
/*
* Test scenario:
*
* Reading the inline ending up with EEXIST, ie. read an inline
* extent and discard page cache and read it again.
*/
static void test_case_2(struct extent_map_tree *em_tree)
{
struct extent_map *em;
int ret;
em = alloc_extent_map();
if (!em)
/* Skip the test on error. */
return;
/* Add [0, 1K) */
em->start = 0;
em->len = SZ_1K;
em->block_start = EXTENT_MAP_INLINE;
em->block_len = (u64)-1;
ret = add_extent_mapping(em_tree, em, 0);
ASSERT(ret == 0);
free_extent_map(em);
/* Add [4K, 4K) following [0, 1K) */
em = alloc_extent_map();
if (!em)
goto out;
em->start = SZ_4K;
em->len = SZ_4K;
em->block_start = SZ_4K;
em->block_len = SZ_4K;
ret = add_extent_mapping(em_tree, em, 0);
ASSERT(ret == 0);
free_extent_map(em);
em = alloc_extent_map();
if (!em)
goto out;
/* Add [0, 1K) */
em->start = 0;
em->len = SZ_1K;
em->block_start = EXTENT_MAP_INLINE;
em->block_len = (u64)-1;
ret = btrfs_add_extent_mapping(em_tree, &em, em->start, em->len);
if (ret)
test_msg("case2 [0 1K]: ret %d\n", ret);
if (em &&
(em->start != 0 || extent_map_end(em) != SZ_1K ||
em->block_start != EXTENT_MAP_INLINE || em->block_len != (u64)-1))
test_msg(
"case2 [0 1K]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu\n",
ret, em->start, em->len, em->block_start,
em->block_len);
free_extent_map(em);
out:
/* free memory */
free_extent_map_tree(em_tree);
}
static void __test_case_3(struct extent_map_tree *em_tree, u64 start)
{
struct extent_map *em;
u64 len = SZ_4K;
int ret;
em = alloc_extent_map();
if (!em)
/* Skip this test on error. */
return;
/* Add [4K, 8K) */
em->start = SZ_4K;
em->len = SZ_4K;
em->block_start = SZ_4K;
em->block_len = SZ_4K;
ret = add_extent_mapping(em_tree, em, 0);
ASSERT(ret == 0);
free_extent_map(em);
em = alloc_extent_map();
if (!em)
goto out;
/* Add [0, 16K) */
em->start = 0;
em->len = SZ_16K;
em->block_start = 0;
em->block_len = SZ_16K;
ret = btrfs_add_extent_mapping(em_tree, &em, start, len);
if (ret)
test_msg("case3 [0x%llx 0x%llx): ret %d\n",
start, start + len, ret);
/*
* Since bytes within em are contiguous, em->block_start is identical to
* em->start.
*/
if (em &&
(start < em->start || start + len > extent_map_end(em) ||
em->start != em->block_start || em->len != em->block_len))
test_msg(
"case3 [0x%llx 0x%llx): ret %d em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)\n",
start, start + len, ret, em->start, em->len,
em->block_start, em->block_len);
free_extent_map(em);
out:
/* free memory */
free_extent_map_tree(em_tree);
}
/*
* Test scenario:
*
* Suppose that no extent map has been loaded into memory yet.
* There is a file extent [0, 16K), two jobs are running concurrently
* against it, t1 is buffered writing to [4K, 8K) and t2 is doing dio
* read from [0, 4K) or [8K, 12K) or [12K, 16K).
*
* t1 goes ahead of t2 and adds em [4K, 8K) into tree.
*
* t1 t2
* cow_file_range() btrfs_get_extent()
* -> lookup_extent_mapping()
* -> add_extent_mapping()
* -> add_extent_mapping()
*/
static void test_case_3(struct extent_map_tree *em_tree)
{
__test_case_3(em_tree, 0);
__test_case_3(em_tree, SZ_8K);
__test_case_3(em_tree, (12 * 1024ULL));
}
static void __test_case_4(struct extent_map_tree *em_tree, u64 start)
{
struct extent_map *em;
u64 len = SZ_4K;
int ret;
em = alloc_extent_map();
if (!em)
/* Skip this test on error. */
return;
/* Add [0K, 8K) */
em->start = 0;
em->len = SZ_8K;
em->block_start = 0;
em->block_len = SZ_8K;
ret = add_extent_mapping(em_tree, em, 0);
ASSERT(ret == 0);
free_extent_map(em);
em = alloc_extent_map();
if (!em)
goto out;
/* Add [8K, 24K) */
em->start = SZ_8K;
em->len = 24 * 1024ULL;
em->block_start = SZ_16K; /* avoid merging */
em->block_len = 24 * 1024ULL;
ret = add_extent_mapping(em_tree, em, 0);
ASSERT(ret == 0);
free_extent_map(em);
em = alloc_extent_map();
if (!em)
goto out;
/* Add [0K, 32K) */
em->start = 0;
em->len = SZ_32K;
em->block_start = 0;
em->block_len = SZ_32K;
ret = btrfs_add_extent_mapping(em_tree, &em, start, len);
if (ret)
test_msg("case4 [0x%llx 0x%llx): ret %d\n",
start, len, ret);
if (em &&
(start < em->start || start + len > extent_map_end(em)))
test_msg(
"case4 [0x%llx 0x%llx): ret %d, added wrong em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)\n",
start, len, ret, em->start, em->len, em->block_start,
em->block_len);
free_extent_map(em);
out:
/* free memory */
free_extent_map_tree(em_tree);
}
/*
* Test scenario:
*
* Suppose that no extent map has been loaded into memory yet.
* There is a file extent [0, 32K), two jobs are running concurrently
* against it, t1 is doing dio write to [8K, 32K) and t2 is doing dio
* read from [0, 4K) or [4K, 8K).
*
* t1 goes ahead of t2 and splits em [0, 32K) to em [0K, 8K) and [8K 32K).
*
* t1 t2
* btrfs_get_blocks_direct() btrfs_get_blocks_direct()
* -> btrfs_get_extent() -> btrfs_get_extent()
* -> lookup_extent_mapping()
* -> add_extent_mapping() -> lookup_extent_mapping()
* # load [0, 32K)
* -> btrfs_new_extent_direct()
* -> btrfs_drop_extent_cache()
* # split [0, 32K)
* -> add_extent_mapping()
* # add [8K, 32K)
* -> add_extent_mapping()
* # handle -EEXIST when adding
* # [0, 32K)
*/
static void test_case_4(struct extent_map_tree *em_tree)
{
__test_case_4(em_tree, 0);
__test_case_4(em_tree, SZ_4K);
}
int btrfs_test_extent_map()
{
struct extent_map_tree *em_tree;
test_msg("Running extent_map tests\n");
em_tree = kzalloc(sizeof(*em_tree), GFP_KERNEL);
if (!em_tree)
/* Skip the test on error. */
return 0;
extent_map_tree_init(em_tree);
test_case_1(em_tree);
test_case_2(em_tree);
test_case_3(em_tree);
test_case_4(em_tree);
kfree(em_tree);
return 0;
}
...@@ -288,10 +288,6 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) ...@@ -288,10 +288,6 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_msg("Expected a hole, got %llu\n", em->block_start); test_msg("Expected a hole, got %llu\n", em->block_start);
goto out; goto out;
} }
if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
test_msg("Vacancy flag wasn't set properly\n");
goto out;
}
free_extent_map(em); free_extent_map(em);
btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0); btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
...@@ -1001,8 +997,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) ...@@ -1001,8 +997,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
BTRFS_MAX_EXTENT_SIZE >> 1, BTRFS_MAX_EXTENT_SIZE >> 1,
(BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1, (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DIRTY |
EXTENT_UPTODATE, 0, 0, EXTENT_UPTODATE, 0, 0, NULL);
NULL, GFP_KERNEL);
if (ret) { if (ret) {
test_msg("clear_extent_bit returned %d\n", ret); test_msg("clear_extent_bit returned %d\n", ret);
goto out; goto out;
...@@ -1070,8 +1065,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) ...@@ -1070,8 +1065,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
BTRFS_MAX_EXTENT_SIZE + sectorsize, BTRFS_MAX_EXTENT_SIZE + sectorsize,
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_UPTODATE, 0, 0, EXTENT_UPTODATE, 0, 0, NULL);
NULL, GFP_KERNEL);
if (ret) { if (ret) {
test_msg("clear_extent_bit returned %d\n", ret); test_msg("clear_extent_bit returned %d\n", ret);
goto out; goto out;
...@@ -1104,8 +1098,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) ...@@ -1104,8 +1098,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
/* Empty */ /* Empty */
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_UPTODATE, 0, 0, EXTENT_UPTODATE, 0, 0, NULL);
NULL, GFP_KERNEL);
if (ret) { if (ret) {
test_msg("clear_extent_bit returned %d\n", ret); test_msg("clear_extent_bit returned %d\n", ret);
goto out; goto out;
...@@ -1121,8 +1114,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) ...@@ -1121,8 +1114,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
if (ret) if (ret)
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_UPTODATE, 0, 0, EXTENT_UPTODATE, 0, 0, NULL);
NULL, GFP_KERNEL);
iput(inode); iput(inode);
btrfs_free_dummy_root(root); btrfs_free_dummy_root(root);
btrfs_free_dummy_fs_info(fs_info); btrfs_free_dummy_fs_info(fs_info);
...@@ -1134,7 +1126,6 @@ int btrfs_test_inodes(u32 sectorsize, u32 nodesize) ...@@ -1134,7 +1126,6 @@ int btrfs_test_inodes(u32 sectorsize, u32 nodesize)
int ret; int ret;
set_bit(EXTENT_FLAG_COMPRESSED, &compressed_only); set_bit(EXTENT_FLAG_COMPRESSED, &compressed_only);
set_bit(EXTENT_FLAG_VACANCY, &vacancy_only);
set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only); set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only);
test_msg("Running btrfs_get_extent tests\n"); test_msg("Running btrfs_get_extent tests\n");
......
...@@ -495,8 +495,8 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, ...@@ -495,8 +495,8 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
if (current->journal_info) { if (current->journal_info) {
WARN_ON(type & TRANS_EXTWRITERS); WARN_ON(type & TRANS_EXTWRITERS);
h = current->journal_info; h = current->journal_info;
h->use_count++; refcount_inc(&h->use_count);
WARN_ON(h->use_count > 2); WARN_ON(refcount_read(&h->use_count) > 2);
h->orig_rsv = h->block_rsv; h->orig_rsv = h->block_rsv;
h->block_rsv = NULL; h->block_rsv = NULL;
goto got_it; goto got_it;
...@@ -567,7 +567,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, ...@@ -567,7 +567,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
h->transid = cur_trans->transid; h->transid = cur_trans->transid;
h->transaction = cur_trans; h->transaction = cur_trans;
h->root = root; h->root = root;
h->use_count = 1; refcount_set(&h->use_count, 1);
h->fs_info = root->fs_info; h->fs_info = root->fs_info;
h->type = type; h->type = type;
...@@ -837,8 +837,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, ...@@ -837,8 +837,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
int err = 0; int err = 0;
int must_run_delayed_refs = 0; int must_run_delayed_refs = 0;
if (trans->use_count > 1) { if (refcount_read(&trans->use_count) > 1) {
trans->use_count--; refcount_dec(&trans->use_count);
trans->block_rsv = trans->orig_rsv; trans->block_rsv = trans->orig_rsv;
return 0; return 0;
} }
...@@ -1016,8 +1016,7 @@ static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info, ...@@ -1016,8 +1016,7 @@ static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info,
* it's safe to do it (through clear_btree_io_tree()). * it's safe to do it (through clear_btree_io_tree()).
*/ */
err = clear_extent_bit(dirty_pages, start, end, err = clear_extent_bit(dirty_pages, start, end,
EXTENT_NEED_WAIT, EXTENT_NEED_WAIT, 0, 0, &cached_state);
0, 0, &cached_state, GFP_NOFS);
if (err == -ENOMEM) if (err == -ENOMEM)
err = 0; err = 0;
if (!err) if (!err)
...@@ -1869,7 +1868,7 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, ...@@ -1869,7 +1868,7 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_transaction *cur_trans = trans->transaction;
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
WARN_ON(trans->use_count > 1); WARN_ON(refcount_read(&trans->use_count) > 1);
btrfs_abort_transaction(trans, err); btrfs_abort_transaction(trans, err);
...@@ -2266,16 +2265,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) ...@@ -2266,16 +2265,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
} }
ret = write_all_supers(fs_info, 0); ret = write_all_supers(fs_info, 0);
if (ret) {
mutex_unlock(&fs_info->tree_log_mutex);
goto scrub_continue;
}
/* /*
* the super is written, we can safely allow the tree-loggers * the super is written, we can safely allow the tree-loggers
* to go about their business * to go about their business
*/ */
mutex_unlock(&fs_info->tree_log_mutex); mutex_unlock(&fs_info->tree_log_mutex);
if (ret)
goto scrub_continue;
btrfs_finish_extent_commit(trans, fs_info); btrfs_finish_extent_commit(trans, fs_info);
......
...@@ -58,6 +58,7 @@ struct btrfs_transaction { ...@@ -58,6 +58,7 @@ struct btrfs_transaction {
/* Be protected by fs_info->trans_lock when we want to change it. */ /* Be protected by fs_info->trans_lock when we want to change it. */
enum btrfs_trans_state state; enum btrfs_trans_state state;
int aborted;
struct list_head list; struct list_head list;
struct extent_io_tree dirty_pages; struct extent_io_tree dirty_pages;
unsigned long start_time; unsigned long start_time;
...@@ -70,7 +71,6 @@ struct btrfs_transaction { ...@@ -70,7 +71,6 @@ struct btrfs_transaction {
struct list_head dirty_bgs; struct list_head dirty_bgs;
struct list_head io_bgs; struct list_head io_bgs;
struct list_head dropped_roots; struct list_head dropped_roots;
u64 num_dirty_bgs;
/* /*
* we need to make sure block group deletion doesn't race with * we need to make sure block group deletion doesn't race with
...@@ -79,11 +79,11 @@ struct btrfs_transaction { ...@@ -79,11 +79,11 @@ struct btrfs_transaction {
*/ */
struct mutex cache_write_mutex; struct mutex cache_write_mutex;
spinlock_t dirty_bgs_lock; spinlock_t dirty_bgs_lock;
unsigned int num_dirty_bgs;
/* Protected by spin lock fs_info->unused_bgs_lock. */ /* Protected by spin lock fs_info->unused_bgs_lock. */
struct list_head deleted_bgs; struct list_head deleted_bgs;
spinlock_t dropped_roots_lock; spinlock_t dropped_roots_lock;
struct btrfs_delayed_ref_root delayed_refs; struct btrfs_delayed_ref_root delayed_refs;
int aborted;
struct btrfs_fs_info *fs_info; struct btrfs_fs_info *fs_info;
}; };
...@@ -111,20 +111,19 @@ struct btrfs_trans_handle { ...@@ -111,20 +111,19 @@ struct btrfs_trans_handle {
u64 transid; u64 transid;
u64 bytes_reserved; u64 bytes_reserved;
u64 chunk_bytes_reserved; u64 chunk_bytes_reserved;
unsigned long use_count;
unsigned long blocks_reserved;
unsigned long delayed_ref_updates; unsigned long delayed_ref_updates;
struct btrfs_transaction *transaction; struct btrfs_transaction *transaction;
struct btrfs_block_rsv *block_rsv; struct btrfs_block_rsv *block_rsv;
struct btrfs_block_rsv *orig_rsv; struct btrfs_block_rsv *orig_rsv;
refcount_t use_count;
unsigned int type;
short aborted; short aborted;
short adding_csums; bool adding_csums;
bool allocating_chunk; bool allocating_chunk;
bool can_flush_pending_bgs; bool can_flush_pending_bgs;
bool reloc_reserved; bool reloc_reserved;
bool sync; bool sync;
bool dirty; bool dirty;
unsigned int type;
struct btrfs_root *root; struct btrfs_root *root;
struct btrfs_fs_info *fs_info; struct btrfs_fs_info *fs_info;
struct list_head new_bgs; struct list_head new_bgs;
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "tree-checker.h" #include "tree-checker.h"
#include "disk-io.h" #include "disk-io.h"
#include "compression.h" #include "compression.h"
#include "hash.h"
/* /*
* Error message should follow the following format: * Error message should follow the following format:
...@@ -222,6 +223,142 @@ static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf, ...@@ -222,6 +223,142 @@ static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
return 0; return 0;
} }
/*
* Customized reported for dir_item, only important new info is key->objectid,
* which represents inode number
*/
__printf(4, 5)
static void dir_item_err(const struct btrfs_root *root,
const struct extent_buffer *eb, int slot,
const char *fmt, ...)
{
struct btrfs_key key;
struct va_format vaf;
va_list args;
btrfs_item_key_to_cpu(eb, &key, slot);
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
btrfs_crit(root->fs_info,
"corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid,
btrfs_header_bytenr(eb), slot, key.objectid, &vaf);
va_end(args);
}
static int check_dir_item(struct btrfs_root *root,
struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
struct btrfs_dir_item *di;
u32 item_size = btrfs_item_size_nr(leaf, slot);
u32 cur = 0;
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
while (cur < item_size) {
u32 name_len;
u32 data_len;
u32 max_name_len;
u32 total_size;
u32 name_hash;
u8 dir_type;
/* header itself should not cross item boundary */
if (cur + sizeof(*di) > item_size) {
dir_item_err(root, leaf, slot,
"dir item header crosses item boundary, have %zu boundary %u",
cur + sizeof(*di), item_size);
return -EUCLEAN;
}
/* dir type check */
dir_type = btrfs_dir_type(leaf, di);
if (dir_type >= BTRFS_FT_MAX) {
dir_item_err(root, leaf, slot,
"invalid dir item type, have %u expect [0, %u)",
dir_type, BTRFS_FT_MAX);
return -EUCLEAN;
}
if (key->type == BTRFS_XATTR_ITEM_KEY &&
dir_type != BTRFS_FT_XATTR) {
dir_item_err(root, leaf, slot,
"invalid dir item type for XATTR key, have %u expect %u",
dir_type, BTRFS_FT_XATTR);
return -EUCLEAN;
}
if (dir_type == BTRFS_FT_XATTR &&
key->type != BTRFS_XATTR_ITEM_KEY) {
dir_item_err(root, leaf, slot,
"xattr dir type found for non-XATTR key");
return -EUCLEAN;
}
if (dir_type == BTRFS_FT_XATTR)
max_name_len = XATTR_NAME_MAX;
else
max_name_len = BTRFS_NAME_LEN;
/* Name/data length check */
name_len = btrfs_dir_name_len(leaf, di);
data_len = btrfs_dir_data_len(leaf, di);
if (name_len > max_name_len) {
dir_item_err(root, leaf, slot,
"dir item name len too long, have %u max %u",
name_len, max_name_len);
return -EUCLEAN;
}
if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
dir_item_err(root, leaf, slot,
"dir item name and data len too long, have %u max %u",
name_len + data_len,
BTRFS_MAX_XATTR_SIZE(root->fs_info));
return -EUCLEAN;
}
if (data_len && dir_type != BTRFS_FT_XATTR) {
dir_item_err(root, leaf, slot,
"dir item with invalid data len, have %u expect 0",
data_len);
return -EUCLEAN;
}
total_size = sizeof(*di) + name_len + data_len;
/* header and name/data should not cross item boundary */
if (cur + total_size > item_size) {
dir_item_err(root, leaf, slot,
"dir item data crosses item boundary, have %u boundary %u",
cur + total_size, item_size);
return -EUCLEAN;
}
/*
* Special check for XATTR/DIR_ITEM, as key->offset is name
* hash, should match its name
*/
if (key->type == BTRFS_DIR_ITEM_KEY ||
key->type == BTRFS_XATTR_ITEM_KEY) {
char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
read_extent_buffer(leaf, namebuf,
(unsigned long)(di + 1), name_len);
name_hash = btrfs_name_hash(namebuf, name_len);
if (key->offset != name_hash) {
dir_item_err(root, leaf, slot,
"name hash mismatch with key, have 0x%016x expect 0x%016llx",
name_hash, key->offset);
return -EUCLEAN;
}
}
cur += total_size;
di = (struct btrfs_dir_item *)((void *)di + total_size);
}
return 0;
}
/* /*
* Common point to switch the item-specific validation. * Common point to switch the item-specific validation.
*/ */
...@@ -238,6 +375,11 @@ static int check_leaf_item(struct btrfs_root *root, ...@@ -238,6 +375,11 @@ static int check_leaf_item(struct btrfs_root *root,
case BTRFS_EXTENT_CSUM_KEY: case BTRFS_EXTENT_CSUM_KEY:
ret = check_csum_item(root, leaf, key, slot); ret = check_csum_item(root, leaf, key, slot);
break; break;
case BTRFS_DIR_ITEM_KEY:
case BTRFS_DIR_INDEX_KEY:
case BTRFS_XATTR_ITEM_KEY:
ret = check_dir_item(root, leaf, key, slot);
break;
} }
return ret; return ret;
} }
......
...@@ -1174,19 +1174,15 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, ...@@ -1174,19 +1174,15 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
return 0; return 0;
} }
static int extref_get_fields(struct extent_buffer *eb, int slot, static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
unsigned long ref_ptr, u32 *namelen, char **name, u32 *namelen, char **name, u64 *index,
u64 *index, u64 *parent_objectid) u64 *parent_objectid)
{ {
struct btrfs_inode_extref *extref; struct btrfs_inode_extref *extref;
extref = (struct btrfs_inode_extref *)ref_ptr; extref = (struct btrfs_inode_extref *)ref_ptr;
*namelen = btrfs_inode_extref_name_len(eb, extref); *namelen = btrfs_inode_extref_name_len(eb, extref);
if (!btrfs_is_name_len_valid(eb, slot, (unsigned long)&extref->name,
*namelen))
return -EIO;
*name = kmalloc(*namelen, GFP_NOFS); *name = kmalloc(*namelen, GFP_NOFS);
if (*name == NULL) if (*name == NULL)
return -ENOMEM; return -ENOMEM;
...@@ -1201,19 +1197,14 @@ static int extref_get_fields(struct extent_buffer *eb, int slot, ...@@ -1201,19 +1197,14 @@ static int extref_get_fields(struct extent_buffer *eb, int slot,
return 0; return 0;
} }
static int ref_get_fields(struct extent_buffer *eb, int slot, static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
unsigned long ref_ptr, u32 *namelen, char **name, u32 *namelen, char **name, u64 *index)
u64 *index)
{ {
struct btrfs_inode_ref *ref; struct btrfs_inode_ref *ref;
ref = (struct btrfs_inode_ref *)ref_ptr; ref = (struct btrfs_inode_ref *)ref_ptr;
*namelen = btrfs_inode_ref_name_len(eb, ref); *namelen = btrfs_inode_ref_name_len(eb, ref);
if (!btrfs_is_name_len_valid(eb, slot, (unsigned long)(ref + 1),
*namelen))
return -EIO;
*name = kmalloc(*namelen, GFP_NOFS); *name = kmalloc(*namelen, GFP_NOFS);
if (*name == NULL) if (*name == NULL)
return -ENOMEM; return -ENOMEM;
...@@ -1288,8 +1279,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, ...@@ -1288,8 +1279,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
while (ref_ptr < ref_end) { while (ref_ptr < ref_end) {
if (log_ref_ver) { if (log_ref_ver) {
ret = extref_get_fields(eb, slot, ref_ptr, &namelen, ret = extref_get_fields(eb, ref_ptr, &namelen, &name,
&name, &ref_index, &parent_objectid); &ref_index, &parent_objectid);
/* /*
* parent object can change from one array * parent object can change from one array
* item to another. * item to another.
...@@ -1301,8 +1292,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, ...@@ -1301,8 +1292,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
goto out; goto out;
} }
} else { } else {
ret = ref_get_fields(eb, slot, ref_ptr, &namelen, ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
&name, &ref_index); &ref_index);
} }
if (ret) if (ret)
goto out; goto out;
...@@ -1836,7 +1827,6 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, ...@@ -1836,7 +1827,6 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
struct extent_buffer *eb, int slot, struct extent_buffer *eb, int slot,
struct btrfs_key *key) struct btrfs_key *key)
{ {
struct btrfs_fs_info *fs_info = root->fs_info;
int ret = 0; int ret = 0;
u32 item_size = btrfs_item_size_nr(eb, slot); u32 item_size = btrfs_item_size_nr(eb, slot);
struct btrfs_dir_item *di; struct btrfs_dir_item *di;
...@@ -1849,8 +1839,6 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, ...@@ -1849,8 +1839,6 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
ptr_end = ptr + item_size; ptr_end = ptr + item_size;
while (ptr < ptr_end) { while (ptr < ptr_end) {
di = (struct btrfs_dir_item *)ptr; di = (struct btrfs_dir_item *)ptr;
if (verify_dir_item(fs_info, eb, slot, di))
return -EIO;
name_len = btrfs_dir_name_len(eb, di); name_len = btrfs_dir_name_len(eb, di);
ret = replay_one_name(trans, root, path, eb, di, key); ret = replay_one_name(trans, root, path, eb, di, key);
if (ret < 0) if (ret < 0)
...@@ -2025,11 +2013,6 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, ...@@ -2025,11 +2013,6 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
ptr_end = ptr + item_size; ptr_end = ptr + item_size;
while (ptr < ptr_end) { while (ptr < ptr_end) {
di = (struct btrfs_dir_item *)ptr; di = (struct btrfs_dir_item *)ptr;
if (verify_dir_item(fs_info, eb, slot, di)) {
ret = -EIO;
goto out;
}
name_len = btrfs_dir_name_len(eb, di); name_len = btrfs_dir_name_len(eb, di);
name = kmalloc(name_len, GFP_NOFS); name = kmalloc(name_len, GFP_NOFS);
if (!name) { if (!name) {
...@@ -2110,7 +2093,6 @@ static int replay_xattr_deletes(struct btrfs_trans_handle *trans, ...@@ -2110,7 +2093,6 @@ static int replay_xattr_deletes(struct btrfs_trans_handle *trans,
struct btrfs_path *path, struct btrfs_path *path,
const u64 ino) const u64 ino)
{ {
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_key search_key; struct btrfs_key search_key;
struct btrfs_path *log_path; struct btrfs_path *log_path;
int i; int i;
...@@ -2152,11 +2134,6 @@ static int replay_xattr_deletes(struct btrfs_trans_handle *trans, ...@@ -2152,11 +2134,6 @@ static int replay_xattr_deletes(struct btrfs_trans_handle *trans,
u32 this_len = sizeof(*di) + name_len + data_len; u32 this_len = sizeof(*di) + name_len + data_len;
char *name; char *name;
ret = verify_dir_item(fs_info, path->nodes[0], i, di);
if (ret) {
ret = -EIO;
goto out;
}
name = kmalloc(name_len, GFP_NOFS); name = kmalloc(name_len, GFP_NOFS);
if (!name) { if (!name) {
ret = -ENOMEM; ret = -ENOMEM;
...@@ -4574,12 +4551,6 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb, ...@@ -4574,12 +4551,6 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
this_len = sizeof(*extref) + this_name_len; this_len = sizeof(*extref) + this_name_len;
} }
ret = btrfs_is_name_len_valid(eb, slot, name_ptr,
this_name_len);
if (!ret) {
ret = -EIO;
goto out;
}
if (this_name_len > name_len) { if (this_name_len > name_len) {
char *new_name; char *new_name;
...@@ -5434,11 +5405,10 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, ...@@ -5434,11 +5405,10 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
struct dentry *parent, struct dentry *parent,
const loff_t start, const loff_t start,
const loff_t end, const loff_t end,
int exists_only, int inode_only,
struct btrfs_log_ctx *ctx) struct btrfs_log_ctx *ctx)
{ {
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;
int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
struct super_block *sb; struct super_block *sb;
struct dentry *old_parent = NULL; struct dentry *old_parent = NULL;
int ret = 0; int ret = 0;
...@@ -5604,7 +5574,7 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, ...@@ -5604,7 +5574,7 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
int ret; int ret;
ret = btrfs_log_inode_parent(trans, root, BTRFS_I(d_inode(dentry)), ret = btrfs_log_inode_parent(trans, root, BTRFS_I(d_inode(dentry)),
parent, start, end, 0, ctx); parent, start, end, LOG_INODE_ALL, ctx);
dput(parent); dput(parent);
return ret; return ret;
...@@ -5867,6 +5837,6 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans, ...@@ -5867,6 +5837,6 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
return 0; return 0;
return btrfs_log_inode_parent(trans, root, inode, parent, 0, return btrfs_log_inode_parent(trans, root, inode, parent, 0,
LLONG_MAX, 1, NULL); LLONG_MAX, LOG_INODE_EXISTS, NULL);
} }
此差异已折叠。
...@@ -47,6 +47,12 @@ struct btrfs_pending_bios { ...@@ -47,6 +47,12 @@ struct btrfs_pending_bios {
#define btrfs_device_data_ordered_init(device) do { } while (0) #define btrfs_device_data_ordered_init(device) do { } while (0)
#endif #endif
#define BTRFS_DEV_STATE_WRITEABLE (0)
#define BTRFS_DEV_STATE_IN_FS_METADATA (1)
#define BTRFS_DEV_STATE_MISSING (2)
#define BTRFS_DEV_STATE_REPLACE_TGT (3)
#define BTRFS_DEV_STATE_FLUSH_SENT (4)
struct btrfs_device { struct btrfs_device {
struct list_head dev_list; struct list_head dev_list;
struct list_head dev_alloc_list; struct list_head dev_alloc_list;
...@@ -69,11 +75,7 @@ struct btrfs_device { ...@@ -69,11 +75,7 @@ struct btrfs_device {
/* the mode sent to blkdev_get */ /* the mode sent to blkdev_get */
fmode_t mode; fmode_t mode;
int writeable; unsigned long dev_state;
int in_fs_metadata;
int missing;
int can_discard;
int is_tgtdev_for_dev_replace;
blk_status_t last_flush_error; blk_status_t last_flush_error;
int flush_bio_sent; int flush_bio_sent;
...@@ -129,14 +131,12 @@ struct btrfs_device { ...@@ -129,14 +131,12 @@ struct btrfs_device {
struct completion flush_wait; struct completion flush_wait;
/* per-device scrub information */ /* per-device scrub information */
struct scrub_ctx *scrub_device; struct scrub_ctx *scrub_ctx;
struct btrfs_work work; struct btrfs_work work;
struct rcu_head rcu; struct rcu_head rcu;
struct work_struct rcu_work;
/* readahead state */ /* readahead state */
spinlock_t reada_lock;
atomic_t reada_in_flight; atomic_t reada_in_flight;
u64 reada_next; u64 reada_next;
struct reada_zone *reada_curr_zone; struct reada_zone *reada_curr_zone;
...@@ -489,15 +489,16 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, ...@@ -489,15 +489,16 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
int btrfs_remove_chunk(struct btrfs_trans_handle *trans, int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 chunk_offset); struct btrfs_fs_info *fs_info, u64 chunk_offset);
static inline int btrfs_dev_stats_dirty(struct btrfs_device *dev)
{
return atomic_read(&dev->dev_stats_ccnt);
}
static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, static inline void btrfs_dev_stat_inc(struct btrfs_device *dev,
int index) int index)
{ {
atomic_inc(dev->dev_stat_values + index); atomic_inc(dev->dev_stat_values + index);
/*
* This memory barrier orders stores updating statistics before stores
* updating dev_stats_ccnt.
*
* It pairs with smp_rmb() in btrfs_run_dev_stats().
*/
smp_mb__before_atomic(); smp_mb__before_atomic();
atomic_inc(&dev->dev_stats_ccnt); atomic_inc(&dev->dev_stats_ccnt);
} }
...@@ -514,7 +515,13 @@ static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev, ...@@ -514,7 +515,13 @@ static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev,
int ret; int ret;
ret = atomic_xchg(dev->dev_stat_values + index, 0); ret = atomic_xchg(dev->dev_stat_values + index, 0);
smp_mb__before_atomic(); /*
* atomic_xchg implies a full memory barriers as per atomic_t.txt:
* - RMW operations that have a return value are fully ordered;
*
* This implicit memory barriers is paired with the smp_rmb in
* btrfs_run_dev_stats
*/
atomic_inc(&dev->dev_stats_ccnt); atomic_inc(&dev->dev_stats_ccnt);
return ret; return ret;
} }
...@@ -523,6 +530,12 @@ static inline void btrfs_dev_stat_set(struct btrfs_device *dev, ...@@ -523,6 +530,12 @@ static inline void btrfs_dev_stat_set(struct btrfs_device *dev,
int index, unsigned long val) int index, unsigned long val)
{ {
atomic_set(dev->dev_stat_values + index, val); atomic_set(dev->dev_stat_values + index, val);
/*
* This memory barrier orders stores updating statistics before stores
* updating dev_stats_ccnt.
*
* It pairs with smp_rmb() in btrfs_run_dev_stats().
*/
smp_mb__before_atomic(); smp_mb__before_atomic();
atomic_inc(&dev->dev_stats_ccnt); atomic_inc(&dev->dev_stats_ccnt);
} }
...@@ -540,7 +553,7 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_fs_info *fs_info, ...@@ -540,7 +553,7 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_fs_info *fs_info,
struct list_head *btrfs_get_fs_uuids(void); struct list_head *btrfs_get_fs_uuids(void);
void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info); void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info); void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info); struct btrfs_device *failing_dev);
#endif #endif
...@@ -268,7 +268,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) ...@@ -268,7 +268,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
{ {
struct btrfs_key key; struct btrfs_key key;
struct inode *inode = d_inode(dentry); struct inode *inode = d_inode(dentry);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path; struct btrfs_path *path;
int ret = 0; int ret = 0;
...@@ -337,11 +336,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) ...@@ -337,11 +336,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
u32 this_len = sizeof(*di) + name_len + data_len; u32 this_len = sizeof(*di) + name_len + data_len;
unsigned long name_ptr = (unsigned long)(di + 1); unsigned long name_ptr = (unsigned long)(di + 1);
if (verify_dir_item(fs_info, leaf, slot, di)) {
ret = -EIO;
goto err;
}
total_size += name_len + 1; total_size += name_len + 1;
/* /*
* We are just looking for how big our buffer needs to * We are just looking for how big our buffer needs to
......
此差异已折叠。
...@@ -193,7 +193,6 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict, ...@@ -193,7 +193,6 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict,
__print_flags(flag, "|", \ __print_flags(flag, "|", \
{ (1 << EXTENT_FLAG_PINNED), "PINNED" },\ { (1 << EXTENT_FLAG_PINNED), "PINNED" },\
{ (1 << EXTENT_FLAG_COMPRESSED), "COMPRESSED" },\ { (1 << EXTENT_FLAG_COMPRESSED), "COMPRESSED" },\
{ (1 << EXTENT_FLAG_VACANCY), "VACANCY" },\
{ (1 << EXTENT_FLAG_PREALLOC), "PREALLOC" },\ { (1 << EXTENT_FLAG_PREALLOC), "PREALLOC" },\
{ (1 << EXTENT_FLAG_LOGGING), "LOGGING" },\ { (1 << EXTENT_FLAG_LOGGING), "LOGGING" },\
{ (1 << EXTENT_FLAG_FILLING), "FILLING" },\ { (1 << EXTENT_FLAG_FILLING), "FILLING" },\
......
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册