提交 5cea7647 编写于 作者: L Linus Torvalds

Merge branch 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "There are some new user features and the usual load of invisible
  enhancements or cleanups.

  New features:

   - extend mount options to specify zlib compression level, -o
     compress=zlib:9

   - v2 of ioctl "extent to inode mapping", addressing a usecase where
     we want to retrieve more but inaccurate results and do the
     postprocessing in userspace, aiding defragmentation or
     deduplication tools

   - populate compression heuristics logic, do data sampling and try to
     guess compressibility by: looking for repeated patterns, counting
     unique byte values and distribution, calculating Shannon entropy;
     this will need more benchmarking and possibly fine tuning, but the
     base should be good enough

   - enable indexing for btrfs as lower filesystem in overlayfs

   - speedup page cache readahead during send on large files

  Internal enhancements:

   - more sanity checks of b-tree items when reading them from disk

   - more EINVAL/EUCLEAN fixups, missing BLK_STS_* conversion, other
     errno or error handling fixes

   - remove some homegrown IO-related logic, that's been obsoleted by
     core block layer changes (batching, plug/unplug, own counters)

   - add ref-verify, optional debugging feature to verify extent
     reference accounting

   - simplify code handling outstanding extents, make it more clear
     where and how the accounting is done

   - make delalloc reservations per-inode, simplify the code and make
     the logic more straightforward

   - extensive cleanup of delayed refs code

  Notable fixes:

   - fix send ioctl on 32bit with 64bit kernel"

* 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (102 commits)
  btrfs: Fix bug for misused dev_t when lookup in dev state hash table.
  Btrfs: heuristic: add Shannon entropy calculation
  Btrfs: heuristic: add byte core set calculation
  Btrfs: heuristic: add byte set calculation
  Btrfs: heuristic: add detection of repeated data patterns
  Btrfs: heuristic: implement sampling logic
  Btrfs: heuristic: add bucket and sample counters and other defines
  Btrfs: compression: separate heuristic/compression workspaces
  btrfs: move btrfs_truncate_block out of trans handle
  btrfs: don't call btrfs_start_delalloc_roots in flushoncommit
  btrfs: track refs in a rb_tree instead of a list
  btrfs: add a comp_refs() helper
  btrfs: switch args for comp_*_refs
  btrfs: make the delalloc block rsv per inode
  btrfs: add tracepoints for outstanding extents mods
  Btrfs: rework outstanding_extents
  btrfs: increase output size for LOGICAL_INO_V2 ioctl
  btrfs: add a flags argument to LOGICAL_INO and call it LOGICAL_INO_V2
  btrfs: add a flag to iterate_inodes_from_logical to find all extent refs for uncompressed extents
  btrfs: send: remove unused code
  ...
......@@ -91,3 +91,14 @@ config BTRFS_ASSERT
any of the assertions trip. This is meant for btrfs developers only.
If unsure, say N.
config BTRFS_FS_REF_VERIFY
bool "Btrfs with the ref verify tool compiled in"
depends on BTRFS_FS
default n
help
Enable run-time extent reference verification instrumentation. This
is meant to be used by btrfs developers for tracking down extent
reference problems or verifying they didn't break something.
If unsure, say N.
......@@ -10,10 +10,11 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
uuid-tree.o props.o hash.o free-space-tree.o
uuid-tree.o props.o hash.o free-space-tree.o tree-checker.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
tests/extent-buffer-tests.o tests/btrfs-tests.o \
......
......@@ -67,7 +67,7 @@ struct btrfs_workqueue {
static void normal_work_helper(struct btrfs_work *work);
#define BTRFS_WORK_HELPER(name) \
void btrfs_##name(struct work_struct *arg) \
noinline_for_stack void btrfs_##name(struct work_struct *arg) \
{ \
struct btrfs_work *work = container_of(arg, struct btrfs_work, \
normal_work); \
......
......@@ -40,12 +40,14 @@ static int check_extent_in_eb(const struct btrfs_key *key,
const struct extent_buffer *eb,
const struct btrfs_file_extent_item *fi,
u64 extent_item_pos,
struct extent_inode_elem **eie)
struct extent_inode_elem **eie,
bool ignore_offset)
{
u64 offset = 0;
struct extent_inode_elem *e;
if (!btrfs_file_extent_compression(eb, fi) &&
if (!ignore_offset &&
!btrfs_file_extent_compression(eb, fi) &&
!btrfs_file_extent_encryption(eb, fi) &&
!btrfs_file_extent_other_encoding(eb, fi)) {
u64 data_offset;
......@@ -84,7 +86,8 @@ static void free_inode_elem_list(struct extent_inode_elem *eie)
static int find_extent_in_eb(const struct extent_buffer *eb,
u64 wanted_disk_byte, u64 extent_item_pos,
struct extent_inode_elem **eie)
struct extent_inode_elem **eie,
bool ignore_offset)
{
u64 disk_byte;
struct btrfs_key key;
......@@ -113,7 +116,7 @@ static int find_extent_in_eb(const struct extent_buffer *eb,
if (disk_byte != wanted_disk_byte)
continue;
ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie);
ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie, ignore_offset);
if (ret < 0)
return ret;
}
......@@ -419,7 +422,7 @@ static int add_indirect_ref(const struct btrfs_fs_info *fs_info,
static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
struct ulist *parents, struct prelim_ref *ref,
int level, u64 time_seq, const u64 *extent_item_pos,
u64 total_refs)
u64 total_refs, bool ignore_offset)
{
int ret = 0;
int slot;
......@@ -472,7 +475,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
if (extent_item_pos) {
ret = check_extent_in_eb(&key, eb, fi,
*extent_item_pos,
&eie);
&eie, ignore_offset);
if (ret < 0)
break;
}
......@@ -510,7 +513,8 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, u64 time_seq,
struct prelim_ref *ref, struct ulist *parents,
const u64 *extent_item_pos, u64 total_refs)
const u64 *extent_item_pos, u64 total_refs,
bool ignore_offset)
{
struct btrfs_root *root;
struct btrfs_key root_key;
......@@ -581,7 +585,7 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
}
ret = add_all_parents(root, path, parents, ref, level, time_seq,
extent_item_pos, total_refs);
extent_item_pos, total_refs, ignore_offset);
out:
path->lowest_level = 0;
btrfs_release_path(path);
......@@ -616,7 +620,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, u64 time_seq,
struct preftrees *preftrees,
const u64 *extent_item_pos, u64 total_refs,
struct share_check *sc)
struct share_check *sc, bool ignore_offset)
{
int err;
int ret = 0;
......@@ -661,7 +665,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
}
err = resolve_indirect_ref(fs_info, path, time_seq, ref,
parents, extent_item_pos,
total_refs);
total_refs, ignore_offset);
/*
* we can only tolerate ENOENT,otherwise,we should catch error
* and return directly.
......@@ -769,6 +773,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
struct btrfs_key key;
struct btrfs_key tmp_op_key;
struct btrfs_key *op_key = NULL;
struct rb_node *n;
int count;
int ret = 0;
......@@ -778,7 +783,9 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
}
spin_lock(&head->lock);
list_for_each_entry(node, &head->ref_list, list) {
for (n = rb_first(&head->ref_tree); n; n = rb_next(n)) {
node = rb_entry(n, struct btrfs_delayed_ref_node,
ref_node);
if (node->seq > seq)
continue;
......@@ -1107,13 +1114,17 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info,
*
* Otherwise this returns 0 for success and <0 for an error.
*
* If ignore_offset is set to false, only extent refs whose offsets match
* extent_item_pos are returned. If true, every extent ref is returned
* and extent_item_pos is ignored.
*
* FIXME some caching might speed things up
*/
static int find_parent_nodes(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist *refs,
struct ulist *roots, const u64 *extent_item_pos,
struct share_check *sc)
struct share_check *sc, bool ignore_offset)
{
struct btrfs_key key;
struct btrfs_path *path;
......@@ -1178,7 +1189,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
if (head) {
if (!mutex_trylock(&head->mutex)) {
refcount_inc(&head->node.refs);
refcount_inc(&head->refs);
spin_unlock(&delayed_refs->lock);
btrfs_release_path(path);
......@@ -1189,7 +1200,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
*/
mutex_lock(&head->mutex);
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref(&head->node);
btrfs_put_delayed_ref_head(head);
goto again;
}
spin_unlock(&delayed_refs->lock);
......@@ -1235,7 +1246,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root));
ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees,
extent_item_pos, total_refs, sc);
extent_item_pos, total_refs, sc, ignore_offset);
if (ret)
goto out;
......@@ -1282,7 +1293,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
btrfs_tree_read_lock(eb);
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
ret = find_extent_in_eb(eb, bytenr,
*extent_item_pos, &eie);
*extent_item_pos, &eie, ignore_offset);
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
if (ret < 0)
......@@ -1350,7 +1361,7 @@ static void free_leaf_list(struct ulist *blocks)
static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **leafs,
const u64 *extent_item_pos)
const u64 *extent_item_pos, bool ignore_offset)
{
int ret;
......@@ -1359,7 +1370,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
return -ENOMEM;
ret = find_parent_nodes(trans, fs_info, bytenr, time_seq,
*leafs, NULL, extent_item_pos, NULL);
*leafs, NULL, extent_item_pos, NULL, ignore_offset);
if (ret < 0 && ret != -ENOENT) {
free_leaf_list(*leafs);
return ret;
......@@ -1383,7 +1394,8 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
*/
static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots)
u64 time_seq, struct ulist **roots,
bool ignore_offset)
{
struct ulist *tmp;
struct ulist_node *node = NULL;
......@@ -1402,7 +1414,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
ULIST_ITER_INIT(&uiter);
while (1) {
ret = find_parent_nodes(trans, fs_info, bytenr, time_seq,
tmp, *roots, NULL, NULL);
tmp, *roots, NULL, NULL, ignore_offset);
if (ret < 0 && ret != -ENOENT) {
ulist_free(tmp);
ulist_free(*roots);
......@@ -1421,14 +1433,15 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots)
u64 time_seq, struct ulist **roots,
bool ignore_offset)
{
int ret;
if (!trans)
down_read(&fs_info->commit_root_sem);
ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr,
time_seq, roots);
time_seq, roots, ignore_offset);
if (!trans)
up_read(&fs_info->commit_root_sem);
return ret;
......@@ -1483,7 +1496,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
ULIST_ITER_INIT(&uiter);
while (1) {
ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp,
roots, NULL, &shared);
roots, NULL, &shared, false);
if (ret == BACKREF_FOUND_SHARED) {
/* this is the only condition under which we return 1 */
ret = 1;
......@@ -1877,7 +1890,8 @@ static int iterate_leaf_refs(struct btrfs_fs_info *fs_info,
int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
u64 extent_item_objectid, u64 extent_item_pos,
int search_commit_root,
iterate_extent_inodes_t *iterate, void *ctx)
iterate_extent_inodes_t *iterate, void *ctx,
bool ignore_offset)
{
int ret;
struct btrfs_trans_handle *trans = NULL;
......@@ -1903,14 +1917,15 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
tree_mod_seq_elem.seq, &refs,
&extent_item_pos);
&extent_item_pos, ignore_offset);
if (ret)
goto out;
ULIST_ITER_INIT(&ref_uiter);
while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
ret = btrfs_find_all_roots_safe(trans, fs_info, ref_node->val,
tree_mod_seq_elem.seq, &roots);
tree_mod_seq_elem.seq, &roots,
ignore_offset);
if (ret)
break;
ULIST_ITER_INIT(&root_uiter);
......@@ -1943,7 +1958,8 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
iterate_extent_inodes_t *iterate, void *ctx)
iterate_extent_inodes_t *iterate, void *ctx,
bool ignore_offset)
{
int ret;
u64 extent_item_pos;
......@@ -1961,7 +1977,7 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
extent_item_pos = logical - found_key.objectid;
ret = iterate_extent_inodes(fs_info, found_key.objectid,
extent_item_pos, search_commit_root,
iterate, ctx);
iterate, ctx, ignore_offset);
return ret;
}
......
......@@ -43,17 +43,19 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
u64 extent_item_objectid,
u64 extent_offset, int search_commit_root,
iterate_extent_inodes_t *iterate, void *ctx);
iterate_extent_inodes_t *iterate, void *ctx,
bool ignore_offset);
int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
iterate_extent_inodes_t *iterate, void *ctx);
iterate_extent_inodes_t *iterate, void *ctx,
bool ignore_offset);
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots);
u64 time_seq, struct ulist **roots, bool ignore_offset);
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
u32 name_len, unsigned long name_off,
struct extent_buffer *eb_in, u64 parent,
......
......@@ -36,14 +36,13 @@
#define BTRFS_INODE_ORPHAN_META_RESERVED 1
#define BTRFS_INODE_DUMMY 2
#define BTRFS_INODE_IN_DEFRAG 3
#define BTRFS_INODE_DELALLOC_META_RESERVED 4
#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
#define BTRFS_INODE_HAS_ASYNC_EXTENT 6
#define BTRFS_INODE_NEEDS_FULL_SYNC 7
#define BTRFS_INODE_COPY_EVERYTHING 8
#define BTRFS_INODE_IN_DELALLOC_LIST 9
#define BTRFS_INODE_READDIO_NEED_LOCK 10
#define BTRFS_INODE_HAS_PROPS 11
#define BTRFS_INODE_HAS_ORPHAN_ITEM 4
#define BTRFS_INODE_HAS_ASYNC_EXTENT 5
#define BTRFS_INODE_NEEDS_FULL_SYNC 6
#define BTRFS_INODE_COPY_EVERYTHING 7
#define BTRFS_INODE_IN_DELALLOC_LIST 8
#define BTRFS_INODE_READDIO_NEED_LOCK 9
#define BTRFS_INODE_HAS_PROPS 10
/* in memory btrfs inode */
struct btrfs_inode {
......@@ -176,7 +175,8 @@ struct btrfs_inode {
* of extent items we've reserved metadata for.
*/
unsigned outstanding_extents;
unsigned reserved_extents;
struct btrfs_block_rsv block_rsv;
/*
* Cached values of inode properties
......@@ -267,6 +267,17 @@ static inline bool btrfs_is_free_space_inode(struct btrfs_inode *inode)
return false;
}
static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
int mod)
{
lockdep_assert_held(&inode->lock);
inode->outstanding_extents += mod;
if (btrfs_is_free_space_inode(inode))
return;
trace_btrfs_inode_mod_outstanding_extents(inode->root, btrfs_ino(inode),
mod);
}
static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
{
int ret = 0;
......
......@@ -613,7 +613,7 @@ static void btrfsic_dev_state_hashtable_add(
struct btrfsic_dev_state_hashtable *h)
{
const unsigned int hashval =
(((unsigned int)((uintptr_t)ds->bdev)) &
(((unsigned int)((uintptr_t)ds->bdev->bd_dev)) &
(BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
list_add(&ds->collision_resolving_node, h->table + hashval);
......@@ -2803,7 +2803,7 @@ static void __btrfsic_submit_bio(struct bio *bio)
mutex_lock(&btrfsic_mutex);
/* since btrfsic_submit_bio() is also called before
* btrfsic_mount(), this might return NULL */
dev_state = btrfsic_dev_state_lookup(bio_dev(bio));
dev_state = btrfsic_dev_state_lookup(bio_dev(bio) + bio->bi_partno);
if (NULL != dev_state &&
(bio_op(bio) == REQ_OP_WRITE) && bio_has_data(bio)) {
unsigned int i = 0;
......@@ -2913,7 +2913,7 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
state = kvzalloc(sizeof(*state), GFP_KERNEL);
if (!state) {
pr_info("btrfs check-integrity: allocation failed!\n");
return -1;
return -ENOMEM;
}
if (!btrfsic_is_initialized) {
......@@ -2945,7 +2945,7 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
if (NULL == ds) {
pr_info("btrfs check-integrity: kmalloc() failed!\n");
mutex_unlock(&btrfsic_mutex);
return -1;
return -ENOMEM;
}
ds->bdev = device->bdev;
ds->state = state;
......
......@@ -33,6 +33,8 @@
#include <linux/bit_spinlock.h>
#include <linux/slab.h>
#include <linux/sched/mm.h>
#include <linux/sort.h>
#include <linux/log2.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
......@@ -255,7 +257,8 @@ static void end_compressed_bio_write(struct bio *bio)
cb->start,
cb->start + cb->len - 1,
NULL,
bio->bi_status ? 0 : 1);
bio->bi_status ?
BLK_STS_OK : BLK_STS_NOTSUPP);
cb->compressed_pages[0]->mapping = NULL;
end_compressed_writeback(inode, cb);
......@@ -706,7 +709,86 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
return ret;
}
static struct {
/*
* Heuristic uses systematic sampling to collect data from the input data
* range, the logic can be tuned by the following constants:
*
* @SAMPLING_READ_SIZE - how many bytes will be copied from for each sample
* @SAMPLING_INTERVAL - range from which the sampled data can be collected
*/
#define SAMPLING_READ_SIZE (16)
#define SAMPLING_INTERVAL (256)
/*
* For statistical analysis of the input data we consider bytes that form a
* Galois Field of 256 objects. Each object has an attribute count, ie. how
* many times the object appeared in the sample.
*/
#define BUCKET_SIZE (256)
/*
* The size of the sample is based on a statistical sampling rule of thumb.
* The common way is to perform sampling tests as long as the number of
* elements in each cell is at least 5.
*
* Instead of 5, we choose 32 to obtain more accurate results.
* If the data contain the maximum number of symbols, which is 256, we obtain a
* sample size bound by 8192.
*
* For a sample of at most 8KB of data per data range: 16 consecutive bytes
* from up to 512 locations.
*/
#define MAX_SAMPLE_SIZE (BTRFS_MAX_UNCOMPRESSED * \
SAMPLING_READ_SIZE / SAMPLING_INTERVAL)
struct bucket_item {
u32 count;
};
struct heuristic_ws {
/* Partial copy of input data */
u8 *sample;
u32 sample_size;
/* Buckets store counters for each byte value */
struct bucket_item *bucket;
struct list_head list;
};
static void free_heuristic_ws(struct list_head *ws)
{
struct heuristic_ws *workspace;
workspace = list_entry(ws, struct heuristic_ws, list);
kvfree(workspace->sample);
kfree(workspace->bucket);
kfree(workspace);
}
static struct list_head *alloc_heuristic_ws(void)
{
struct heuristic_ws *ws;
ws = kzalloc(sizeof(*ws), GFP_KERNEL);
if (!ws)
return ERR_PTR(-ENOMEM);
ws->sample = kvmalloc(MAX_SAMPLE_SIZE, GFP_KERNEL);
if (!ws->sample)
goto fail;
ws->bucket = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket), GFP_KERNEL);
if (!ws->bucket)
goto fail;
INIT_LIST_HEAD(&ws->list);
return &ws->list;
fail:
free_heuristic_ws(&ws->list);
return ERR_PTR(-ENOMEM);
}
struct workspaces_list {
struct list_head idle_ws;
spinlock_t ws_lock;
/* Number of free workspaces */
......@@ -715,7 +797,11 @@ static struct {
atomic_t total_ws;
/* Waiters for a free workspace */
wait_queue_head_t ws_wait;
} btrfs_comp_ws[BTRFS_COMPRESS_TYPES];
};
static struct workspaces_list btrfs_comp_ws[BTRFS_COMPRESS_TYPES];
static struct workspaces_list btrfs_heuristic_ws;
static const struct btrfs_compress_op * const btrfs_compress_op[] = {
&btrfs_zlib_compress,
......@@ -725,11 +811,25 @@ static const struct btrfs_compress_op * const btrfs_compress_op[] = {
void __init btrfs_init_compress(void)
{
struct list_head *workspace;
int i;
for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
struct list_head *workspace;
INIT_LIST_HEAD(&btrfs_heuristic_ws.idle_ws);
spin_lock_init(&btrfs_heuristic_ws.ws_lock);
atomic_set(&btrfs_heuristic_ws.total_ws, 0);
init_waitqueue_head(&btrfs_heuristic_ws.ws_wait);
workspace = alloc_heuristic_ws();
if (IS_ERR(workspace)) {
pr_warn(
"BTRFS: cannot preallocate heuristic workspace, will try later\n");
} else {
atomic_set(&btrfs_heuristic_ws.total_ws, 1);
btrfs_heuristic_ws.free_ws = 1;
list_add(workspace, &btrfs_heuristic_ws.idle_ws);
}
for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws);
spin_lock_init(&btrfs_comp_ws[i].ws_lock);
atomic_set(&btrfs_comp_ws[i].total_ws, 0);
......@@ -756,18 +856,32 @@ void __init btrfs_init_compress(void)
* Preallocation makes a forward progress guarantees and we do not return
* errors.
*/
static struct list_head *find_workspace(int type)
static struct list_head *__find_workspace(int type, bool heuristic)
{
struct list_head *workspace;
int cpus = num_online_cpus();
int idx = type - 1;
unsigned nofs_flag;
struct list_head *idle_ws;
spinlock_t *ws_lock;
atomic_t *total_ws;
wait_queue_head_t *ws_wait;
int *free_ws;
if (heuristic) {
idle_ws = &btrfs_heuristic_ws.idle_ws;
ws_lock = &btrfs_heuristic_ws.ws_lock;
total_ws = &btrfs_heuristic_ws.total_ws;
ws_wait = &btrfs_heuristic_ws.ws_wait;
free_ws = &btrfs_heuristic_ws.free_ws;
} else {
idle_ws = &btrfs_comp_ws[idx].idle_ws;
ws_lock = &btrfs_comp_ws[idx].ws_lock;
total_ws = &btrfs_comp_ws[idx].total_ws;
ws_wait = &btrfs_comp_ws[idx].ws_wait;
free_ws = &btrfs_comp_ws[idx].free_ws;
}
struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws;
spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock;
atomic_t *total_ws = &btrfs_comp_ws[idx].total_ws;
wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait;
int *free_ws = &btrfs_comp_ws[idx].free_ws;
again:
spin_lock(ws_lock);
if (!list_empty(idle_ws)) {
......@@ -797,7 +911,10 @@ static struct list_head *find_workspace(int type)
* context of btrfs_compress_bio/btrfs_compress_pages
*/
nofs_flag = memalloc_nofs_save();
workspace = btrfs_compress_op[idx]->alloc_workspace();
if (heuristic)
workspace = alloc_heuristic_ws();
else
workspace = btrfs_compress_op[idx]->alloc_workspace();
memalloc_nofs_restore(nofs_flag);
if (IS_ERR(workspace)) {
......@@ -828,18 +945,38 @@ static struct list_head *find_workspace(int type)
return workspace;
}
static struct list_head *find_workspace(int type)
{
return __find_workspace(type, false);
}
/*
* put a workspace struct back on the list or free it if we have enough
* idle ones sitting around
*/
static void free_workspace(int type, struct list_head *workspace)
static void __free_workspace(int type, struct list_head *workspace,
bool heuristic)
{
int idx = type - 1;
struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws;
spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock;
atomic_t *total_ws = &btrfs_comp_ws[idx].total_ws;
wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait;
int *free_ws = &btrfs_comp_ws[idx].free_ws;
struct list_head *idle_ws;
spinlock_t *ws_lock;
atomic_t *total_ws;
wait_queue_head_t *ws_wait;
int *free_ws;
if (heuristic) {
idle_ws = &btrfs_heuristic_ws.idle_ws;
ws_lock = &btrfs_heuristic_ws.ws_lock;
total_ws = &btrfs_heuristic_ws.total_ws;
ws_wait = &btrfs_heuristic_ws.ws_wait;
free_ws = &btrfs_heuristic_ws.free_ws;
} else {
idle_ws = &btrfs_comp_ws[idx].idle_ws;
ws_lock = &btrfs_comp_ws[idx].ws_lock;
total_ws = &btrfs_comp_ws[idx].total_ws;
ws_wait = &btrfs_comp_ws[idx].ws_wait;
free_ws = &btrfs_comp_ws[idx].free_ws;
}
spin_lock(ws_lock);
if (*free_ws <= num_online_cpus()) {
......@@ -850,7 +987,10 @@ static void free_workspace(int type, struct list_head *workspace)
}
spin_unlock(ws_lock);
btrfs_compress_op[idx]->free_workspace(workspace);
if (heuristic)
free_heuristic_ws(workspace);
else
btrfs_compress_op[idx]->free_workspace(workspace);
atomic_dec(total_ws);
wake:
/*
......@@ -861,6 +1001,11 @@ static void free_workspace(int type, struct list_head *workspace)
wake_up(ws_wait);
}
static void free_workspace(int type, struct list_head *ws)
{
return __free_workspace(type, ws, false);
}
/*
* cleanup function for module exit
*/
......@@ -869,6 +1014,13 @@ static void free_workspaces(void)
struct list_head *workspace;
int i;
while (!list_empty(&btrfs_heuristic_ws.idle_ws)) {
workspace = btrfs_heuristic_ws.idle_ws.next;
list_del(workspace);
free_heuristic_ws(workspace);
atomic_dec(&btrfs_heuristic_ws.total_ws);
}
for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
while (!list_empty(&btrfs_comp_ws[i].idle_ws)) {
workspace = btrfs_comp_ws[i].idle_ws.next;
......@@ -883,6 +1035,11 @@ static void free_workspaces(void)
* Given an address space and start and length, compress the bytes into @pages
* that are allocated on demand.
*
* @type_level is encoded algorithm and level, where level 0 means whatever
* default the algorithm chooses and is opaque here;
* - compression algo are 0-3
* - the level are bits 4-7
*
* @out_pages is an in/out parameter, holds maximum number of pages to allocate
* and returns number of actually allocated pages
*
......@@ -897,7 +1054,7 @@ static void free_workspaces(void)
* @max_out tells us the max number of bytes that we're allowed to
* stuff into pages
*/
int btrfs_compress_pages(int type, struct address_space *mapping,
int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
u64 start, struct page **pages,
unsigned long *out_pages,
unsigned long *total_in,
......@@ -905,9 +1062,11 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
{
struct list_head *workspace;
int ret;
int type = type_level & 0xF;
workspace = find_workspace(type);
btrfs_compress_op[type - 1]->set_level(workspace, type_level);
ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
start, pages,
out_pages,
......@@ -1065,6 +1224,211 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
return 1;
}
/*
* Shannon Entropy calculation
*
* Pure byte distribution analysis fails to determine compressiability of data.
* Try calculating entropy to estimate the average minimum number of bits
* needed to encode the sampled data.
*
* For convenience, return the percentage of needed bits, instead of amount of
* bits directly.
*
* @ENTROPY_LVL_ACEPTABLE - below that threshold, sample has low byte entropy
* and can be compressible with high probability
*
* @ENTROPY_LVL_HIGH - data are not compressible with high probability
*
* Use of ilog2() decreases precision, we lower the LVL to 5 to compensate.
*/
#define ENTROPY_LVL_ACEPTABLE (65)
#define ENTROPY_LVL_HIGH (80)
/*
* For increasead precision in shannon_entropy calculation,
* let's do pow(n, M) to save more digits after comma:
*
* - maximum int bit length is 64
* - ilog2(MAX_SAMPLE_SIZE) -> 13
* - 13 * 4 = 52 < 64 -> M = 4
*
* So use pow(n, 4).
*/
static inline u32 ilog2_w(u64 n)
{
return ilog2(n * n * n * n);
}
static u32 shannon_entropy(struct heuristic_ws *ws)
{
const u32 entropy_max = 8 * ilog2_w(2);
u32 entropy_sum = 0;
u32 p, p_base, sz_base;
u32 i;
sz_base = ilog2_w(ws->sample_size);
for (i = 0; i < BUCKET_SIZE && ws->bucket[i].count > 0; i++) {
p = ws->bucket[i].count;
p_base = ilog2_w(p);
entropy_sum += p * (sz_base - p_base);
}
entropy_sum /= ws->sample_size;
return entropy_sum * 100 / entropy_max;
}
/* Compare buckets by size, ascending */
static int bucket_comp_rev(const void *lv, const void *rv)
{
const struct bucket_item *l = (const struct bucket_item *)lv;
const struct bucket_item *r = (const struct bucket_item *)rv;
return r->count - l->count;
}
/*
* Size of the core byte set - how many bytes cover 90% of the sample
*
* There are several types of structured binary data that use nearly all byte
* values. The distribution can be uniform and counts in all buckets will be
* nearly the same (eg. encrypted data). Unlikely to be compressible.
*
* Other possibility is normal (Gaussian) distribution, where the data could
* be potentially compressible, but we have to take a few more steps to decide
* how much.
*
* @BYTE_CORE_SET_LOW - main part of byte values repeated frequently,
* compression algo can easy fix that
* @BYTE_CORE_SET_HIGH - data have uniform distribution and with high
* probability is not compressible
*/
#define BYTE_CORE_SET_LOW (64)
#define BYTE_CORE_SET_HIGH (200)
static int byte_core_set_size(struct heuristic_ws *ws)
{
u32 i;
u32 coreset_sum = 0;
const u32 core_set_threshold = ws->sample_size * 90 / 100;
struct bucket_item *bucket = ws->bucket;
/* Sort in reverse order */
sort(bucket, BUCKET_SIZE, sizeof(*bucket), &bucket_comp_rev, NULL);
for (i = 0; i < BYTE_CORE_SET_LOW; i++)
coreset_sum += bucket[i].count;
if (coreset_sum > core_set_threshold)
return i;
for (; i < BYTE_CORE_SET_HIGH && bucket[i].count > 0; i++) {
coreset_sum += bucket[i].count;
if (coreset_sum > core_set_threshold)
break;
}
return i;
}
/*
* Count byte values in buckets.
* This heuristic can detect textual data (configs, xml, json, html, etc).
* Because in most text-like data byte set is restricted to limited number of
* possible characters, and that restriction in most cases makes data easy to
* compress.
*
* @BYTE_SET_THRESHOLD - consider all data within this byte set size:
* less - compressible
* more - need additional analysis
*/
#define BYTE_SET_THRESHOLD (64)
static u32 byte_set_size(const struct heuristic_ws *ws)
{
u32 i;
u32 byte_set_size = 0;
for (i = 0; i < BYTE_SET_THRESHOLD; i++) {
if (ws->bucket[i].count > 0)
byte_set_size++;
}
/*
* Continue collecting count of byte values in buckets. If the byte
* set size is bigger then the threshold, it's pointless to continue,
* the detection technique would fail for this type of data.
*/
for (; i < BUCKET_SIZE; i++) {
if (ws->bucket[i].count > 0) {
byte_set_size++;
if (byte_set_size > BYTE_SET_THRESHOLD)
return byte_set_size;
}
}
return byte_set_size;
}
static bool sample_repeated_patterns(struct heuristic_ws *ws)
{
const u32 half_of_sample = ws->sample_size / 2;
const u8 *data = ws->sample;
return memcmp(&data[0], &data[half_of_sample], half_of_sample) == 0;
}
static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
struct heuristic_ws *ws)
{
struct page *page;
u64 index, index_end;
u32 i, curr_sample_pos;
u8 *in_data;
/*
* Compression handles the input data by chunks of 128KiB
* (defined by BTRFS_MAX_UNCOMPRESSED)
*
* We do the same for the heuristic and loop over the whole range.
*
* MAX_SAMPLE_SIZE - calculated under assumption that heuristic will
* process no more than BTRFS_MAX_UNCOMPRESSED at a time.
*/
if (end - start > BTRFS_MAX_UNCOMPRESSED)
end = start + BTRFS_MAX_UNCOMPRESSED;
index = start >> PAGE_SHIFT;
index_end = end >> PAGE_SHIFT;
/* Don't miss unaligned end */
if (!IS_ALIGNED(end, PAGE_SIZE))
index_end++;
curr_sample_pos = 0;
while (index < index_end) {
page = find_get_page(inode->i_mapping, index);
in_data = kmap(page);
/* Handle case where the start is not aligned to PAGE_SIZE */
i = start % PAGE_SIZE;
while (i < PAGE_SIZE - SAMPLING_READ_SIZE) {
/* Don't sample any garbage from the last page */
if (start > end - SAMPLING_READ_SIZE)
break;
memcpy(&ws->sample[curr_sample_pos], &in_data[i],
SAMPLING_READ_SIZE);
i += SAMPLING_INTERVAL;
start += SAMPLING_INTERVAL;
curr_sample_pos += SAMPLING_READ_SIZE;
}
kunmap(page);
put_page(page);
index++;
}
ws->sample_size = curr_sample_pos;
}
/*
* Compression heuristic.
*
......@@ -1082,18 +1446,87 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
*/
int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
{
u64 index = start >> PAGE_SHIFT;
u64 end_index = end >> PAGE_SHIFT;
struct page *page;
int ret = 1;
struct list_head *ws_list = __find_workspace(0, true);
struct heuristic_ws *ws;
u32 i;
u8 byte;
int ret = 0;
while (index <= end_index) {
page = find_get_page(inode->i_mapping, index);
kmap(page);
kunmap(page);
put_page(page);
index++;
ws = list_entry(ws_list, struct heuristic_ws, list);
heuristic_collect_sample(inode, start, end, ws);
if (sample_repeated_patterns(ws)) {
ret = 1;
goto out;
}
memset(ws->bucket, 0, sizeof(*ws->bucket)*BUCKET_SIZE);
for (i = 0; i < ws->sample_size; i++) {
byte = ws->sample[i];
ws->bucket[byte].count++;
}
i = byte_set_size(ws);
if (i < BYTE_SET_THRESHOLD) {
ret = 2;
goto out;
}
i = byte_core_set_size(ws);
if (i <= BYTE_CORE_SET_LOW) {
ret = 3;
goto out;
}
if (i >= BYTE_CORE_SET_HIGH) {
ret = 0;
goto out;
}
i = shannon_entropy(ws);
if (i <= ENTROPY_LVL_ACEPTABLE) {
ret = 4;
goto out;
}
/*
* For the levels below ENTROPY_LVL_HIGH, additional analysis would be
* needed to give green light to compression.
*
* For now just assume that compression at that level is not worth the
* resources because:
*
* 1. it is possible to defrag the data later
*
* 2. the data would turn out to be hardly compressible, eg. 150 byte
* values, every bucket has counter at level ~54. The heuristic would
* be confused. This can happen when data have some internal repeated
* patterns like "abbacbbc...". This can be detected by analyzing
* pairs of bytes, which is too costly.
*/
if (i < ENTROPY_LVL_HIGH) {
ret = 5;
goto out;
} else {
ret = 0;
goto out;
}
out:
__free_workspace(0, ws_list, true);
return ret;
}
unsigned int btrfs_compress_str2level(const char *str)
{
if (strncmp(str, "zlib", 4) != 0)
return 0;
/* Accepted form: zlib:1 up to zlib:9 and nothing left after the number */
if (str[4] == ':' && '1' <= str[5] && str[5] <= '9' && str[6] == 0)
return str[5] - '0';
return 0;
}
......@@ -76,7 +76,7 @@ struct compressed_bio {
void btrfs_init_compress(void);
void btrfs_exit_compress(void);
int btrfs_compress_pages(int type, struct address_space *mapping,
int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
u64 start, struct page **pages,
unsigned long *out_pages,
unsigned long *total_in,
......@@ -95,6 +95,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
unsigned btrfs_compress_str2level(const char *str);
enum btrfs_compression_type {
BTRFS_COMPRESS_NONE = 0,
BTRFS_COMPRESS_ZLIB = 1,
......@@ -124,6 +126,8 @@ struct btrfs_compress_op {
struct page *dest_page,
unsigned long start_byte,
size_t srclen, size_t destlen);
void (*set_level)(struct list_head *ws, unsigned int type);
};
extern const struct btrfs_compress_op btrfs_zlib_compress;
......
......@@ -192,7 +192,7 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
* tree until you end up with a lock on the root. A locked buffer
* is returned, with a reference held.
*/
static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
{
struct extent_buffer *eb;
......@@ -5496,8 +5496,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
goto out;
} else if (left_end_reached) {
if (right_level == 0) {
ret = changed_cb(left_root, right_root,
left_path, right_path,
ret = changed_cb(left_path, right_path,
&right_key,
BTRFS_COMPARE_TREE_DELETED,
ctx);
......@@ -5508,8 +5507,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
continue;
} else if (right_end_reached) {
if (left_level == 0) {
ret = changed_cb(left_root, right_root,
left_path, right_path,
ret = changed_cb(left_path, right_path,
&left_key,
BTRFS_COMPARE_TREE_NEW,
ctx);
......@@ -5523,8 +5521,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
if (left_level == 0 && right_level == 0) {
cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
if (cmp < 0) {
ret = changed_cb(left_root, right_root,
left_path, right_path,
ret = changed_cb(left_path, right_path,
&left_key,
BTRFS_COMPARE_TREE_NEW,
ctx);
......@@ -5532,8 +5529,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
goto out;
advance_left = ADVANCE;
} else if (cmp > 0) {
ret = changed_cb(left_root, right_root,
left_path, right_path,
ret = changed_cb(left_path, right_path,
&right_key,
BTRFS_COMPARE_TREE_DELETED,
ctx);
......@@ -5550,8 +5546,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
result = BTRFS_COMPARE_TREE_CHANGED;
else
result = BTRFS_COMPARE_TREE_SAME;
ret = changed_cb(left_root, right_root,
left_path, right_path,
ret = changed_cb(left_path, right_path,
&left_key, result, ctx);
if (ret < 0)
goto out;
......
......@@ -523,7 +523,7 @@ struct btrfs_caching_control {
};
/* Once caching_thread() finds this much free space, it will wake up waiters. */
#define CACHING_CTL_WAKE_UP (1024 * 1024 * 2)
#define CACHING_CTL_WAKE_UP SZ_2M
struct btrfs_io_ctl {
void *cur, *orig;
......@@ -763,8 +763,6 @@ struct btrfs_fs_info {
* delayed dir index item
*/
struct btrfs_block_rsv global_block_rsv;
/* block reservation for delay allocation */
struct btrfs_block_rsv delalloc_block_rsv;
/* block reservation for metadata operations */
struct btrfs_block_rsv trans_block_rsv;
/* block reservation for chunk tree */
......@@ -790,6 +788,7 @@ struct btrfs_fs_info {
*/
unsigned long pending_changes;
unsigned long compress_type:4;
unsigned int compress_level;
int commit_interval;
/*
* It is a suggestive number, the read side is safe even it gets a
......@@ -878,9 +877,6 @@ struct btrfs_fs_info {
rwlock_t tree_mod_log_lock;
struct rb_root tree_mod_log;
atomic_t nr_async_submits;
atomic_t async_submit_draining;
atomic_t nr_async_bios;
atomic_t async_delalloc_pages;
atomic_t open_ioctl_trans;
......@@ -1100,6 +1096,11 @@ struct btrfs_fs_info {
u32 nodesize;
u32 sectorsize;
u32 stripesize;
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
spinlock_t ref_verify_lock;
struct rb_root block_tree;
#endif
};
static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
......@@ -1338,6 +1339,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
#define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25)
#define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26)
#define BTRFS_MOUNT_NOLOGREPLAY (1 << 27)
#define BTRFS_MOUNT_REF_VERIFY (1 << 28)
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
#define BTRFS_DEFAULT_MAX_INLINE (2048)
......@@ -2639,7 +2641,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct extent_buffer *buf,
u64 parent, int last_ref);
int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 root_objectid, u64 owner,
struct btrfs_root *root, u64 owner,
u64 offset, u64 ram_bytes,
struct btrfs_key *ins);
int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
......@@ -2658,7 +2660,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 flags,
int level, int is_data);
int btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
u64 owner, u64 offset);
......@@ -2670,7 +2672,7 @@ void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset);
......@@ -2744,6 +2746,8 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
u64 *qgroup_reserved, bool use_global_rsv);
void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes);
int btrfs_delalloc_reserve_space(struct inode *inode,
......@@ -2751,6 +2755,9 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
unsigned short type);
void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv,
unsigned short type);
void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv);
......@@ -2809,6 +2816,7 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
const struct btrfs_key *new_key);
struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root);
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *key, int lowest_level,
u64 min_trans);
......@@ -2821,9 +2829,7 @@ enum btrfs_compare_tree_result {
BTRFS_COMPARE_TREE_CHANGED,
BTRFS_COMPARE_TREE_SAME,
};
typedef int (*btrfs_changed_cb_t)(struct btrfs_root *left_root,
struct btrfs_root *right_root,
struct btrfs_path *left_path,
typedef int (*btrfs_changed_cb_t)(struct btrfs_path *left_path,
struct btrfs_path *right_path,
struct btrfs_key *key,
enum btrfs_compare_tree_result result,
......
......@@ -581,36 +581,12 @@ static int btrfs_delayed_inode_reserve_metadata(
struct btrfs_block_rsv *dst_rsv;
u64 num_bytes;
int ret;
bool release = false;
src_rsv = trans->block_rsv;
dst_rsv = &fs_info->delayed_block_rsv;
num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
/*
* If our block_rsv is the delalloc block reserve then check and see if
* we have our extra reservation for updating the inode. If not fall
* through and try to reserve space quickly.
*
* We used to try and steal from the delalloc block rsv or the global
* reserve, but we'd steal a full reservation, which isn't kind. We are
* here through delalloc which means we've likely just cowed down close
* to the leaf that contains the inode, so we would steal less just
* doing the fallback inode update, so if we do end up having to steal
* from the global block rsv we hopefully only steal one or two blocks
* worth which is less likely to hurt us.
*/
if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
spin_lock(&inode->lock);
if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&inode->runtime_flags))
release = true;
else
src_rsv = NULL;
spin_unlock(&inode->lock);
}
/*
* btrfs_dirty_inode will update the inode under btrfs_join_transaction
* which doesn't reserve space for speed. This is a problem since we
......@@ -618,7 +594,7 @@ static int btrfs_delayed_inode_reserve_metadata(
* space.
*
* Now if src_rsv == delalloc_block_rsv we'll let it just steal since
* we're accounted for.
* we always reserve enough to update the inode item.
*/
if (!src_rsv || (!trans->bytes_reserved &&
src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
......@@ -643,32 +619,12 @@ static int btrfs_delayed_inode_reserve_metadata(
}
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
/*
* Migrate only takes a reservation, it doesn't touch the size of the
* block_rsv. This is to simplify people who don't normally have things
* migrated from their block rsv. If they go to release their
* reservation, that will decrease the size as well, so if migrate
* reduced size we'd end up with a negative size. But for the
* delalloc_meta_reserved stuff we will only know to drop 1 reservation,
* but we could in fact do this reserve/migrate dance several times
* between the time we did the original reservation and we'd clean it
* up. So to take care of this, release the space for the meta
* reservation here. I think it may be time for a documentation page on
* how block rsvs. work.
*/
if (!ret) {
trace_btrfs_space_reservation(fs_info, "delayed_inode",
btrfs_ino(inode), num_bytes, 1);
node->bytes_reserved = num_bytes;
}
if (release) {
trace_btrfs_space_reservation(fs_info, "delalloc",
btrfs_ino(inode), num_bytes, 0);
btrfs_block_rsv_release(fs_info, src_rsv, num_bytes);
}
return ret;
}
......
......@@ -40,10 +40,10 @@ struct kmem_cache *btrfs_delayed_extent_op_cachep;
/*
* compare two delayed tree backrefs with same bytenr and type
*/
static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
struct btrfs_delayed_tree_ref *ref1, int type)
static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1,
struct btrfs_delayed_tree_ref *ref2)
{
if (type == BTRFS_TREE_BLOCK_REF_KEY) {
if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
if (ref1->root < ref2->root)
return -1;
if (ref1->root > ref2->root)
......@@ -60,8 +60,8 @@ static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
/*
* compare two delayed data backrefs with same bytenr and type
*/
static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
struct btrfs_delayed_data_ref *ref1)
static int comp_data_refs(struct btrfs_delayed_data_ref *ref1,
struct btrfs_delayed_data_ref *ref2)
{
if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
if (ref1->root < ref2->root)
......@@ -85,6 +85,34 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
return 0;
}
static int comp_refs(struct btrfs_delayed_ref_node *ref1,
struct btrfs_delayed_ref_node *ref2,
bool check_seq)
{
int ret = 0;
if (ref1->type < ref2->type)
return -1;
if (ref1->type > ref2->type)
return 1;
if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
ref1->type == BTRFS_SHARED_BLOCK_REF_KEY)
ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1),
btrfs_delayed_node_to_tree_ref(ref2));
else
ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1),
btrfs_delayed_node_to_data_ref(ref2));
if (ret)
return ret;
if (check_seq) {
if (ref1->seq < ref2->seq)
return -1;
if (ref1->seq > ref2->seq)
return 1;
}
return 0;
}
/* insert a new ref to head ref rbtree */
static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
struct rb_node *node)
......@@ -96,15 +124,43 @@ static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
u64 bytenr;
ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
bytenr = ins->node.bytenr;
bytenr = ins->bytenr;
while (*p) {
parent_node = *p;
entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
href_node);
if (bytenr < entry->node.bytenr)
if (bytenr < entry->bytenr)
p = &(*p)->rb_left;
else if (bytenr > entry->bytenr)
p = &(*p)->rb_right;
else
return entry;
}
rb_link_node(node, parent_node, p);
rb_insert_color(node, root);
return NULL;
}
static struct btrfs_delayed_ref_node* tree_insert(struct rb_root *root,
struct btrfs_delayed_ref_node *ins)
{
struct rb_node **p = &root->rb_node;
struct rb_node *node = &ins->ref_node;
struct rb_node *parent_node = NULL;
struct btrfs_delayed_ref_node *entry;
while (*p) {
int comp;
parent_node = *p;
entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
ref_node);
comp = comp_refs(ins, entry, true);
if (comp < 0)
p = &(*p)->rb_left;
else if (bytenr > entry->node.bytenr)
else if (comp > 0)
p = &(*p)->rb_right;
else
return entry;
......@@ -133,15 +189,15 @@ find_ref_head(struct rb_root *root, u64 bytenr,
while (n) {
entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
if (bytenr < entry->node.bytenr)
if (bytenr < entry->bytenr)
n = n->rb_left;
else if (bytenr > entry->node.bytenr)
else if (bytenr > entry->bytenr)
n = n->rb_right;
else
return entry;
}
if (entry && return_bigger) {
if (bytenr > entry->node.bytenr) {
if (bytenr > entry->bytenr) {
n = rb_next(&entry->href_node);
if (!n)
n = rb_first(root);
......@@ -164,17 +220,17 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
if (mutex_trylock(&head->mutex))
return 0;
refcount_inc(&head->node.refs);
refcount_inc(&head->refs);
spin_unlock(&delayed_refs->lock);
mutex_lock(&head->mutex);
spin_lock(&delayed_refs->lock);
if (!head->node.in_tree) {
if (RB_EMPTY_NODE(&head->href_node)) {
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref(&head->node);
btrfs_put_delayed_ref_head(head);
return -EAGAIN;
}
btrfs_put_delayed_ref(&head->node);
btrfs_put_delayed_ref_head(head);
return 0;
}
......@@ -183,15 +239,11 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head,
struct btrfs_delayed_ref_node *ref)
{
if (btrfs_delayed_ref_is_head(ref)) {
head = btrfs_delayed_node_to_head(ref);
rb_erase(&head->href_node, &delayed_refs->href_root);
} else {
assert_spin_locked(&head->lock);
list_del(&ref->list);
if (!list_empty(&ref->add_list))
list_del(&ref->add_list);
}
assert_spin_locked(&head->lock);
rb_erase(&ref->ref_node, &head->ref_tree);
RB_CLEAR_NODE(&ref->ref_node);
if (!list_empty(&ref->add_list))
list_del(&ref->add_list);
ref->in_tree = 0;
btrfs_put_delayed_ref(ref);
atomic_dec(&delayed_refs->num_entries);
......@@ -206,36 +258,18 @@ static bool merge_ref(struct btrfs_trans_handle *trans,
u64 seq)
{
struct btrfs_delayed_ref_node *next;
struct rb_node *node = rb_next(&ref->ref_node);
bool done = false;
next = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
list);
while (!done && &next->list != &head->ref_list) {
while (!done && node) {
int mod;
struct btrfs_delayed_ref_node *next2;
next2 = list_next_entry(next, list);
if (next == ref)
goto next;
next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
node = rb_next(node);
if (seq && next->seq >= seq)
goto next;
if (next->type != ref->type)
goto next;
if ((ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
ref->type == BTRFS_SHARED_BLOCK_REF_KEY) &&
comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref),
btrfs_delayed_node_to_tree_ref(next),
ref->type))
goto next;
if ((ref->type == BTRFS_EXTENT_DATA_REF_KEY ||
ref->type == BTRFS_SHARED_DATA_REF_KEY) &&
comp_data_refs(btrfs_delayed_node_to_data_ref(ref),
btrfs_delayed_node_to_data_ref(next)))
goto next;
break;
if (comp_refs(ref, next, false))
break;
if (ref->action == next->action) {
mod = next->ref_mod;
......@@ -259,8 +293,6 @@ static bool merge_ref(struct btrfs_trans_handle *trans,
WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
}
next:
next = next2;
}
return done;
......@@ -272,11 +304,12 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head)
{
struct btrfs_delayed_ref_node *ref;
struct rb_node *node;
u64 seq = 0;
assert_spin_locked(&head->lock);
if (list_empty(&head->ref_list))
if (RB_EMPTY_ROOT(&head->ref_tree))
return;
/* We don't have too many refs to merge for data. */
......@@ -293,22 +326,13 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
}
spin_unlock(&fs_info->tree_mod_seq_lock);
ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
list);
while (&ref->list != &head->ref_list) {
again:
for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) {
ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
if (seq && ref->seq >= seq)
goto next;
if (merge_ref(trans, delayed_refs, head, ref, seq)) {
if (list_empty(&head->ref_list))
break;
ref = list_first_entry(&head->ref_list,
struct btrfs_delayed_ref_node,
list);
continue;
}
next:
ref = list_next_entry(ref, list);
if (merge_ref(trans, delayed_refs, head, ref, seq))
goto again;
}
}
......@@ -380,8 +404,8 @@ btrfs_select_ref_head(struct btrfs_trans_handle *trans)
head->processing = 1;
WARN_ON(delayed_refs->num_heads_ready == 0);
delayed_refs->num_heads_ready--;
delayed_refs->run_delayed_start = head->node.bytenr +
head->node.num_bytes;
delayed_refs->run_delayed_start = head->bytenr +
head->num_bytes;
return head;
}
......@@ -391,37 +415,19 @@ btrfs_select_ref_head(struct btrfs_trans_handle *trans)
* Return 0 for insert.
* Return >0 for merge.
*/
static int
add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *root,
struct btrfs_delayed_ref_head *href,
struct btrfs_delayed_ref_node *ref)
static int insert_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *root,
struct btrfs_delayed_ref_head *href,
struct btrfs_delayed_ref_node *ref)
{
struct btrfs_delayed_ref_node *exist;
int mod;
int ret = 0;
spin_lock(&href->lock);
/* Check whether we can merge the tail node with ref */
if (list_empty(&href->ref_list))
goto add_tail;
exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node,
list);
/* No need to compare bytenr nor is_head */
if (exist->type != ref->type || exist->seq != ref->seq)
goto add_tail;
if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY ||
exist->type == BTRFS_SHARED_BLOCK_REF_KEY) &&
comp_tree_refs(btrfs_delayed_node_to_tree_ref(exist),
btrfs_delayed_node_to_tree_ref(ref),
ref->type))
goto add_tail;
if ((exist->type == BTRFS_EXTENT_DATA_REF_KEY ||
exist->type == BTRFS_SHARED_DATA_REF_KEY) &&
comp_data_refs(btrfs_delayed_node_to_data_ref(exist),
btrfs_delayed_node_to_data_ref(ref)))
goto add_tail;
exist = tree_insert(&href->ref_tree, ref);
if (!exist)
goto inserted;
/* Now we are sure we can merge */
ret = 1;
......@@ -452,9 +458,7 @@ add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
drop_delayed_ref(trans, root, href, exist);
spin_unlock(&href->lock);
return ret;
add_tail:
list_add_tail(&ref->list, &href->ref_list);
inserted:
if (ref->action == BTRFS_ADD_DELAYED_REF)
list_add_tail(&ref->add_list, &href->ref_add_list);
atomic_inc(&root->num_entries);
......@@ -469,20 +473,16 @@ add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
*/
static noinline void
update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_node *existing,
struct btrfs_delayed_ref_node *update,
struct btrfs_delayed_ref_head *existing,
struct btrfs_delayed_ref_head *update,
int *old_ref_mod_ret)
{
struct btrfs_delayed_ref_head *existing_ref;
struct btrfs_delayed_ref_head *ref;
int old_ref_mod;
existing_ref = btrfs_delayed_node_to_head(existing);
ref = btrfs_delayed_node_to_head(update);
BUG_ON(existing_ref->is_data != ref->is_data);
BUG_ON(existing->is_data != update->is_data);
spin_lock(&existing_ref->lock);
if (ref->must_insert_reserved) {
spin_lock(&existing->lock);
if (update->must_insert_reserved) {
/* if the extent was freed and then
* reallocated before the delayed ref
* entries were processed, we can end up
......@@ -490,7 +490,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
* the must_insert_reserved flag set.
* Set it again here
*/
existing_ref->must_insert_reserved = ref->must_insert_reserved;
existing->must_insert_reserved = update->must_insert_reserved;
/*
* update the num_bytes so we make sure the accounting
......@@ -500,22 +500,22 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
}
if (ref->extent_op) {
if (!existing_ref->extent_op) {
existing_ref->extent_op = ref->extent_op;
if (update->extent_op) {
if (!existing->extent_op) {
existing->extent_op = update->extent_op;
} else {
if (ref->extent_op->update_key) {
memcpy(&existing_ref->extent_op->key,
&ref->extent_op->key,
sizeof(ref->extent_op->key));
existing_ref->extent_op->update_key = true;
if (update->extent_op->update_key) {
memcpy(&existing->extent_op->key,
&update->extent_op->key,
sizeof(update->extent_op->key));
existing->extent_op->update_key = true;
}
if (ref->extent_op->update_flags) {
existing_ref->extent_op->flags_to_set |=
ref->extent_op->flags_to_set;
existing_ref->extent_op->update_flags = true;
if (update->extent_op->update_flags) {
existing->extent_op->flags_to_set |=
update->extent_op->flags_to_set;
existing->extent_op->update_flags = true;
}
btrfs_free_delayed_extent_op(ref->extent_op);
btrfs_free_delayed_extent_op(update->extent_op);
}
}
/*
......@@ -523,23 +523,23 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
* only need the lock for this case cause we could be processing it
* currently, for refs we just added we know we're a-ok.
*/
old_ref_mod = existing_ref->total_ref_mod;
old_ref_mod = existing->total_ref_mod;
if (old_ref_mod_ret)
*old_ref_mod_ret = old_ref_mod;
existing->ref_mod += update->ref_mod;
existing_ref->total_ref_mod += update->ref_mod;
existing->total_ref_mod += update->ref_mod;
/*
* If we are going to from a positive ref mod to a negative or vice
* versa we need to make sure to adjust pending_csums accordingly.
*/
if (existing_ref->is_data) {
if (existing_ref->total_ref_mod >= 0 && old_ref_mod < 0)
if (existing->is_data) {
if (existing->total_ref_mod >= 0 && old_ref_mod < 0)
delayed_refs->pending_csums -= existing->num_bytes;
if (existing_ref->total_ref_mod < 0 && old_ref_mod >= 0)
if (existing->total_ref_mod < 0 && old_ref_mod >= 0)
delayed_refs->pending_csums += existing->num_bytes;
}
spin_unlock(&existing_ref->lock);
spin_unlock(&existing->lock);
}
/*
......@@ -550,14 +550,13 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
static noinline struct btrfs_delayed_ref_head *
add_delayed_ref_head(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref,
struct btrfs_delayed_ref_head *head_ref,
struct btrfs_qgroup_extent_record *qrecord,
u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
int action, int is_data, int *qrecord_inserted_ret,
int *old_ref_mod, int *new_ref_mod)
{
struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_head *head_ref = NULL;
struct btrfs_delayed_ref_root *delayed_refs;
int count_mod = 1;
int must_insert_reserved = 0;
......@@ -593,26 +592,21 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
delayed_refs = &trans->transaction->delayed_refs;
/* first set the basic ref node struct up */
refcount_set(&ref->refs, 1);
ref->bytenr = bytenr;
ref->num_bytes = num_bytes;
ref->ref_mod = count_mod;
ref->type = 0;
ref->action = 0;
ref->is_head = 1;
ref->in_tree = 1;
ref->seq = 0;
head_ref = btrfs_delayed_node_to_head(ref);
refcount_set(&head_ref->refs, 1);
head_ref->bytenr = bytenr;
head_ref->num_bytes = num_bytes;
head_ref->ref_mod = count_mod;
head_ref->must_insert_reserved = must_insert_reserved;
head_ref->is_data = is_data;
INIT_LIST_HEAD(&head_ref->ref_list);
head_ref->ref_tree = RB_ROOT;
INIT_LIST_HEAD(&head_ref->ref_add_list);
RB_CLEAR_NODE(&head_ref->href_node);
head_ref->processing = 0;
head_ref->total_ref_mod = count_mod;
head_ref->qgroup_reserved = 0;
head_ref->qgroup_ref_root = 0;
spin_lock_init(&head_ref->lock);
mutex_init(&head_ref->mutex);
/* Record qgroup extent info if provided */
if (qrecord) {
......@@ -632,17 +626,14 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
qrecord_inserted = 1;
}
spin_lock_init(&head_ref->lock);
mutex_init(&head_ref->mutex);
trace_add_delayed_ref_head(fs_info, ref, head_ref, action);
trace_add_delayed_ref_head(fs_info, head_ref, action);
existing = htree_insert(&delayed_refs->href_root,
&head_ref->href_node);
if (existing) {
WARN_ON(ref_root && reserved && existing->qgroup_ref_root
&& existing->qgroup_reserved);
update_existing_head_ref(delayed_refs, &existing->node, ref,
update_existing_head_ref(delayed_refs, existing, head_ref,
old_ref_mod);
/*
* we've updated the existing ref, free the newly
......@@ -699,7 +690,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
ref->is_head = 0;
ref->in_tree = 1;
ref->seq = seq;
INIT_LIST_HEAD(&ref->list);
RB_CLEAR_NODE(&ref->ref_node);
INIT_LIST_HEAD(&ref->add_list);
full_ref = btrfs_delayed_node_to_tree_ref(ref);
......@@ -713,7 +704,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
trace_add_delayed_tree_ref(fs_info, ref, full_ref, action);
ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
/*
* XXX: memory should be freed at the same level allocated.
......@@ -756,7 +747,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
ref->is_head = 0;
ref->in_tree = 1;
ref->seq = seq;
INIT_LIST_HEAD(&ref->list);
RB_CLEAR_NODE(&ref->ref_node);
INIT_LIST_HEAD(&ref->add_list);
full_ref = btrfs_delayed_node_to_data_ref(ref);
......@@ -772,8 +763,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
trace_add_delayed_data_ref(fs_info, ref, full_ref, action);
ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
if (ret > 0)
kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
}
......@@ -821,7 +811,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
* insert both the head node and the new ref without dropping
* the spin lock
*/
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
bytenr, num_bytes, 0, 0, action, 0,
&qrecord_inserted, old_ref_mod,
new_ref_mod);
......@@ -888,7 +878,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
* insert both the head node and the new ref without dropping
* the spin lock
*/
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
bytenr, num_bytes, ref_root, reserved,
action, 1, &qrecord_inserted,
old_ref_mod, new_ref_mod);
......@@ -920,7 +910,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr,
add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr,
num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
extent_op->is_data, NULL, NULL, NULL);
......
......@@ -26,18 +26,8 @@
#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
/*
* XXX: Qu: I really hate the design that ref_head and tree/data ref shares the
* same ref_node structure.
* Ref_head is in a higher logic level than tree/data ref, and duplicated
* bytenr/num_bytes in ref_node is really a waste or memory, they should be
* referred from ref_head.
* This gets more disgusting after we use list to store tree/data ref in
* ref_head. Must clean this mess up later.
*/
struct btrfs_delayed_ref_node {
/*data/tree ref use list, stored in ref_head->ref_list. */
struct list_head list;
struct rb_node ref_node;
/*
* If action is BTRFS_ADD_DELAYED_REF, also link this node to
* ref_head->ref_add_list, then we do not need to iterate the
......@@ -91,8 +81,9 @@ struct btrfs_delayed_extent_op {
* reference count modifications we've queued up.
*/
struct btrfs_delayed_ref_head {
struct btrfs_delayed_ref_node node;
u64 bytenr;
u64 num_bytes;
refcount_t refs;
/*
* the mutex is held while running the refs, and it is also
* held when checking the sum of reference modifications.
......@@ -100,7 +91,7 @@ struct btrfs_delayed_ref_head {
struct mutex mutex;
spinlock_t lock;
struct list_head ref_list;
struct rb_root ref_tree;
/* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */
struct list_head ref_add_list;
......@@ -115,6 +106,14 @@ struct btrfs_delayed_ref_head {
*/
int total_ref_mod;
/*
* This is the current outstanding mod references for this bytenr. This
* is used with lookup_extent_info to get an accurate reference count
* for a bytenr, so it is adjusted as delayed refs are run so that any
* on disk reference count + ref_mod is accurate.
*/
int ref_mod;
/*
* For qgroup reserved space freeing.
*
......@@ -234,15 +233,18 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
case BTRFS_SHARED_DATA_REF_KEY:
kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
break;
case 0:
kmem_cache_free(btrfs_delayed_ref_head_cachep, ref);
break;
default:
BUG();
}
}
}
static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *head)
{
if (refcount_dec_and_test(&head->refs))
kmem_cache_free(btrfs_delayed_ref_head_cachep, head);
}
int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
......@@ -282,36 +284,18 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
u64 seq);
/*
* a node might live in a head or a regular ref, this lets you
* test for the proper type to use.
*/
static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node)
{
return node->is_head;
}
/*
* helper functions to cast a node into its container
*/
static inline struct btrfs_delayed_tree_ref *
btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node)
{
WARN_ON(btrfs_delayed_ref_is_head(node));
return container_of(node, struct btrfs_delayed_tree_ref, node);
}
static inline struct btrfs_delayed_data_ref *
btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node)
{
WARN_ON(btrfs_delayed_ref_is_head(node));
return container_of(node, struct btrfs_delayed_data_ref, node);
}
static inline struct btrfs_delayed_ref_head *
btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node)
{
WARN_ON(!btrfs_delayed_ref_is_head(node));
return container_of(node, struct btrfs_delayed_ref_head, node);
}
#endif
......@@ -50,6 +50,8 @@
#include "sysfs.h"
#include "qgroup.h"
#include "compression.h"
#include "tree-checker.h"
#include "ref-verify.h"
#ifdef CONFIG_X86
#include <asm/cpufeature.h>
......@@ -543,146 +545,6 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
return ret;
}
#define CORRUPT(reason, eb, root, slot) \
btrfs_crit(root->fs_info, \
"corrupt %s, %s: block=%llu, root=%llu, slot=%d", \
btrfs_header_level(eb) == 0 ? "leaf" : "node", \
reason, btrfs_header_bytenr(eb), root->objectid, slot)
static noinline int check_leaf(struct btrfs_root *root,
struct extent_buffer *leaf)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_key key;
struct btrfs_key leaf_key;
u32 nritems = btrfs_header_nritems(leaf);
int slot;
/*
* Extent buffers from a relocation tree have a owner field that
* corresponds to the subvolume tree they are based on. So just from an
* extent buffer alone we can not find out what is the id of the
* corresponding subvolume tree, so we can not figure out if the extent
* buffer corresponds to the root of the relocation tree or not. So skip
* this check for relocation trees.
*/
if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
struct btrfs_root *check_root;
key.objectid = btrfs_header_owner(leaf);
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
check_root = btrfs_get_fs_root(fs_info, &key, false);
/*
* The only reason we also check NULL here is that during
* open_ctree() some roots has not yet been set up.
*/
if (!IS_ERR_OR_NULL(check_root)) {
struct extent_buffer *eb;
eb = btrfs_root_node(check_root);
/* if leaf is the root, then it's fine */
if (leaf != eb) {
CORRUPT("non-root leaf's nritems is 0",
leaf, check_root, 0);
free_extent_buffer(eb);
return -EIO;
}
free_extent_buffer(eb);
}
return 0;
}
if (nritems == 0)
return 0;
/* Check the 0 item */
if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
BTRFS_LEAF_DATA_SIZE(fs_info)) {
CORRUPT("invalid item offset size pair", leaf, root, 0);
return -EIO;
}
/*
* Check to make sure each items keys are in the correct order and their
* offsets make sense. We only have to loop through nritems-1 because
* we check the current slot against the next slot, which verifies the
* next slot's offset+size makes sense and that the current's slot
* offset is correct.
*/
for (slot = 0; slot < nritems - 1; slot++) {
btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
btrfs_item_key_to_cpu(leaf, &key, slot + 1);
/* Make sure the keys are in the right order */
if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
CORRUPT("bad key order", leaf, root, slot);
return -EIO;
}
/*
* Make sure the offset and ends are right, remember that the
* item data starts at the end of the leaf and grows towards the
* front.
*/
if (btrfs_item_offset_nr(leaf, slot) !=
btrfs_item_end_nr(leaf, slot + 1)) {
CORRUPT("slot offset bad", leaf, root, slot);
return -EIO;
}
/*
* Check to make sure that we don't point outside of the leaf,
* just in case all the items are consistent to each other, but
* all point outside of the leaf.
*/
if (btrfs_item_end_nr(leaf, slot) >
BTRFS_LEAF_DATA_SIZE(fs_info)) {
CORRUPT("slot end outside of leaf", leaf, root, slot);
return -EIO;
}
}
return 0;
}
static int check_node(struct btrfs_root *root, struct extent_buffer *node)
{
unsigned long nr = btrfs_header_nritems(node);
struct btrfs_key key, next_key;
int slot;
u64 bytenr;
int ret = 0;
if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) {
btrfs_crit(root->fs_info,
"corrupt node: block %llu root %llu nritems %lu",
node->start, root->objectid, nr);
return -EIO;
}
for (slot = 0; slot < nr - 1; slot++) {
bytenr = btrfs_node_blockptr(node, slot);
btrfs_node_key_to_cpu(node, &key, slot);
btrfs_node_key_to_cpu(node, &next_key, slot + 1);
if (!bytenr) {
CORRUPT("invalid item slot", node, root, slot);
ret = -EIO;
goto out;
}
if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
CORRUPT("bad key order", node, root, slot);
ret = -EIO;
goto out;
}
}
out:
return ret;
}
static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
u64 phy_offset, struct page *page,
u64 start, u64 end, int mirror)
......@@ -748,12 +610,12 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
* that we don't try and read the other copies of this block, just
* return -EIO.
*/
if (found_level == 0 && check_leaf(root, eb)) {
if (found_level == 0 && btrfs_check_leaf(root, eb)) {
set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = -EIO;
}
if (found_level > 0 && check_node(root, eb))
if (found_level > 0 && btrfs_check_node(root, eb))
ret = -EIO;
if (!ret)
......@@ -879,22 +741,9 @@ static void run_one_async_start(struct btrfs_work *work)
static void run_one_async_done(struct btrfs_work *work)
{
struct btrfs_fs_info *fs_info;
struct async_submit_bio *async;
int limit;
async = container_of(work, struct async_submit_bio, work);
fs_info = async->fs_info;
limit = btrfs_async_submit_limit(fs_info);
limit = limit * 2 / 3;
/*
* atomic_dec_return implies a barrier for waitqueue_active
*/
if (atomic_dec_return(&fs_info->nr_async_submits) < limit &&
waitqueue_active(&fs_info->async_submit_wait))
wake_up(&fs_info->async_submit_wait);
/* If an error occurred we just want to clean up the bio and move on */
if (async->status) {
......@@ -942,19 +791,10 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
async->status = 0;
atomic_inc(&fs_info->nr_async_submits);
if (op_is_sync(bio->bi_opf))
btrfs_set_work_high_priority(&async->work);
btrfs_queue_work(fs_info->workers, &async->work);
while (atomic_read(&fs_info->async_submit_draining) &&
atomic_read(&fs_info->nr_async_submits)) {
wait_event(fs_info->async_submit_wait,
(atomic_read(&fs_info->nr_async_submits) == 0));
}
return 0;
}
......@@ -1005,9 +845,9 @@ static blk_status_t __btree_submit_bio_done(void *private_data, struct bio *bio,
return ret;
}
static int check_async_write(unsigned long bio_flags)
static int check_async_write(struct btrfs_inode *bi)
{
if (bio_flags & EXTENT_BIO_TREE_LOG)
if (atomic_read(&bi->sync_writers))
return 0;
#ifdef CONFIG_X86
if (static_cpu_has(X86_FEATURE_XMM4_2))
......@@ -1022,7 +862,7 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio,
{
struct inode *inode = private_data;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
int async = check_async_write(bio_flags);
int async = check_async_write(BTRFS_I(inode));
blk_status_t ret;
if (bio_op(bio) != REQ_OP_WRITE) {
......@@ -2607,14 +2447,6 @@ int open_ctree(struct super_block *sb,
goto fail_delalloc_bytes;
}
fs_info->btree_inode = new_inode(sb);
if (!fs_info->btree_inode) {
err = -ENOMEM;
goto fail_bio_counter;
}
mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
INIT_LIST_HEAD(&fs_info->trans_list);
......@@ -2647,17 +2479,12 @@ int open_ctree(struct super_block *sb,
btrfs_mapping_init(&fs_info->mapping_tree);
btrfs_init_block_rsv(&fs_info->global_block_rsv,
BTRFS_BLOCK_RSV_GLOBAL);
btrfs_init_block_rsv(&fs_info->delalloc_block_rsv,
BTRFS_BLOCK_RSV_DELALLOC);
btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK);
btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY);
btrfs_init_block_rsv(&fs_info->delayed_block_rsv,
BTRFS_BLOCK_RSV_DELOPS);
atomic_set(&fs_info->nr_async_submits, 0);
atomic_set(&fs_info->async_delalloc_pages, 0);
atomic_set(&fs_info->async_submit_draining, 0);
atomic_set(&fs_info->nr_async_bios, 0);
atomic_set(&fs_info->defrag_running, 0);
atomic_set(&fs_info->qgroup_op_seq, 0);
atomic_set(&fs_info->reada_works_cnt, 0);
......@@ -2673,12 +2500,21 @@ int open_ctree(struct super_block *sb,
/* readahead state */
INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
spin_lock_init(&fs_info->reada_lock);
btrfs_init_ref_verify(fs_info);
fs_info->thread_pool_size = min_t(unsigned long,
num_online_cpus() + 2, 8);
INIT_LIST_HEAD(&fs_info->ordered_roots);
spin_lock_init(&fs_info->ordered_root_lock);
fs_info->btree_inode = new_inode(sb);
if (!fs_info->btree_inode) {
err = -ENOMEM;
goto fail_bio_counter;
}
mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
GFP_KERNEL);
if (!fs_info->delayed_root) {
......@@ -2895,12 +2731,13 @@ int open_ctree(struct super_block *sb,
sb->s_bdi->congested_fn = btrfs_congested_fn;
sb->s_bdi->congested_data = fs_info;
sb->s_bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK;
sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
sb->s_bdi->ra_pages = VM_MAX_READAHEAD * SZ_1K / PAGE_SIZE;
sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super);
sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE);
sb->s_blocksize = sectorsize;
sb->s_blocksize_bits = blksize_bits(sectorsize);
memcpy(&sb->s_uuid, fs_info->fsid, BTRFS_FSID_SIZE);
mutex_lock(&fs_info->chunk_mutex);
ret = btrfs_read_sys_array(fs_info);
......@@ -3083,6 +2920,9 @@ int open_ctree(struct super_block *sb,
if (ret)
goto fail_trans_kthread;
if (btrfs_build_ref_tree(fs_info))
btrfs_err(fs_info, "couldn't build ref tree");
/* do not make disk changes in broken FS or nologreplay is given */
if (btrfs_super_log_root(disk_super) != 0 &&
!btrfs_test_opt(fs_info, NOLOGREPLAY)) {
......@@ -3948,6 +3788,7 @@ void close_ctree(struct btrfs_fs_info *fs_info)
cleanup_srcu_struct(&fs_info->subvol_srcu);
btrfs_free_stripe_hash_table(fs_info);
btrfs_free_ref_cache(fs_info);
__btrfs_free_block_rsv(root->orphan_block_rsv);
root->orphan_block_rsv = NULL;
......@@ -4007,7 +3848,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
buf->len,
fs_info->dirty_metadata_batch);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) {
if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(root, buf)) {
btrfs_print_leaf(buf);
ASSERT(0);
}
......@@ -4272,26 +4113,28 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
struct btrfs_delayed_ref_head *head;
struct btrfs_delayed_ref_node *tmp;
struct rb_node *n;
bool pin_bytes = false;
head = rb_entry(node, struct btrfs_delayed_ref_head,
href_node);
if (!mutex_trylock(&head->mutex)) {
refcount_inc(&head->node.refs);
refcount_inc(&head->refs);
spin_unlock(&delayed_refs->lock);
mutex_lock(&head->mutex);
mutex_unlock(&head->mutex);
btrfs_put_delayed_ref(&head->node);
btrfs_put_delayed_ref_head(head);
spin_lock(&delayed_refs->lock);
continue;
}
spin_lock(&head->lock);
list_for_each_entry_safe_reverse(ref, tmp, &head->ref_list,
list) {
while ((n = rb_first(&head->ref_tree)) != NULL) {
ref = rb_entry(n, struct btrfs_delayed_ref_node,
ref_node);
ref->in_tree = 0;
list_del(&ref->list);
rb_erase(&ref->ref_node, &head->ref_tree);
RB_CLEAR_NODE(&ref->ref_node);
if (!list_empty(&ref->add_list))
list_del(&ref->add_list);
atomic_dec(&delayed_refs->num_entries);
......@@ -4304,16 +4147,16 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
if (head->processing == 0)
delayed_refs->num_heads_ready--;
atomic_dec(&delayed_refs->num_entries);
head->node.in_tree = 0;
rb_erase(&head->href_node, &delayed_refs->href_root);
RB_CLEAR_NODE(&head->href_node);
spin_unlock(&head->lock);
spin_unlock(&delayed_refs->lock);
mutex_unlock(&head->mutex);
if (pin_bytes)
btrfs_pin_extent(fs_info, head->node.bytenr,
head->node.num_bytes, 1);
btrfs_put_delayed_ref(&head->node);
btrfs_pin_extent(fs_info, head->bytenr,
head->num_bytes, 1);
btrfs_put_delayed_ref_head(head);
cond_resched();
spin_lock(&delayed_refs->lock);
}
......
此差异已折叠。
......@@ -110,7 +110,6 @@ struct extent_page_data {
struct bio *bio;
struct extent_io_tree *tree;
get_extent_t *get_extent;
unsigned long bio_flags;
/* tells writepage not to lock the state bits for this range
* it still does the unlocking
......@@ -2762,8 +2761,8 @@ static int merge_bio(struct extent_io_tree *tree, struct page *page,
*/
static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
struct writeback_control *wbc,
struct page *page, sector_t sector,
size_t size, unsigned long offset,
struct page *page, u64 offset,
size_t size, unsigned long pg_offset,
struct block_device *bdev,
struct bio **bio_ret,
bio_end_io_t end_io_func,
......@@ -2777,6 +2776,7 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
int contig = 0;
int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
size_t page_size = min_t(size_t, size, PAGE_SIZE);
sector_t sector = offset >> 9;
if (bio_ret && *bio_ret) {
bio = *bio_ret;
......@@ -2787,8 +2787,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
if (prev_bio_flags != bio_flags || !contig ||
force_bio_submit ||
merge_bio(tree, page, offset, page_size, bio, bio_flags) ||
bio_add_page(bio, page, page_size, offset) < page_size) {
merge_bio(tree, page, pg_offset, page_size, bio, bio_flags) ||
bio_add_page(bio, page, page_size, pg_offset) < page_size) {
ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
if (ret < 0) {
*bio_ret = NULL;
......@@ -2802,8 +2802,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
}
}
bio = btrfs_bio_alloc(bdev, (u64)sector << 9);
bio_add_page(bio, page, page_size, offset);
bio = btrfs_bio_alloc(bdev, offset);
bio_add_page(bio, page, page_size, pg_offset);
bio->bi_end_io = end_io_func;
bio->bi_private = tree;
bio->bi_write_hint = page->mapping->host->i_write_hint;
......@@ -2893,7 +2893,6 @@ static int __do_readpage(struct extent_io_tree *tree,
u64 last_byte = i_size_read(inode);
u64 block_start;
u64 cur_end;
sector_t sector;
struct extent_map *em;
struct block_device *bdev;
int ret = 0;
......@@ -2929,6 +2928,7 @@ static int __do_readpage(struct extent_io_tree *tree,
}
while (cur <= end) {
bool force_bio_submit = false;
u64 offset;
if (cur >= last_byte) {
char *userpage;
......@@ -2968,9 +2968,9 @@ static int __do_readpage(struct extent_io_tree *tree,
iosize = ALIGN(iosize, blocksize);
if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
disk_io_size = em->block_len;
sector = em->block_start >> 9;
offset = em->block_start;
} else {
sector = (em->block_start + extent_offset) >> 9;
offset = em->block_start + extent_offset;
disk_io_size = iosize;
}
bdev = em->bdev;
......@@ -3063,8 +3063,8 @@ static int __do_readpage(struct extent_io_tree *tree,
}
ret = submit_extent_page(REQ_OP_READ | read_flags, tree, NULL,
page, sector, disk_io_size, pg_offset,
bdev, bio,
page, offset, disk_io_size,
pg_offset, bdev, bio,
end_bio_extent_readpage, mirror_num,
*bio_flags,
this_bio_flag,
......@@ -3325,7 +3325,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
u64 extent_offset;
u64 block_start;
u64 iosize;
sector_t sector;
struct extent_map *em;
struct block_device *bdev;
size_t pg_offset = 0;
......@@ -3368,6 +3367,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
while (cur <= end) {
u64 em_end;
u64 offset;
if (cur >= i_size) {
if (tree->ops && tree->ops->writepage_end_io_hook)
......@@ -3389,7 +3389,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
BUG_ON(end < cur);
iosize = min(em_end - cur, end - cur + 1);
iosize = ALIGN(iosize, blocksize);
sector = (em->block_start + extent_offset) >> 9;
offset = em->block_start + extent_offset;
bdev = em->bdev;
block_start = em->block_start;
compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
......@@ -3432,7 +3432,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
}
ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
page, sector, iosize, pg_offset,
page, offset, iosize, pg_offset,
bdev, &epd->bio,
end_bio_extent_writepage,
0, 0, 0, false);
......@@ -3716,7 +3716,6 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
u64 offset = eb->start;
u32 nritems;
unsigned long i, num_pages;
unsigned long bio_flags = 0;
unsigned long start, end;
unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
int ret = 0;
......@@ -3724,8 +3723,6 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
num_pages = num_extent_pages(eb->start, eb->len);
atomic_set(&eb->io_pages, num_pages);
if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID)
bio_flags = EXTENT_BIO_TREE_LOG;
/* set btree blocks beyond nritems with 0 to avoid stale content. */
nritems = btrfs_header_nritems(eb);
......@@ -3749,11 +3746,10 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
clear_page_dirty_for_io(p);
set_page_writeback(p);
ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
p, offset >> 9, PAGE_SIZE, 0, bdev,
p, offset, PAGE_SIZE, 0, bdev,
&epd->bio,
end_bio_extent_buffer_writepage,
0, epd->bio_flags, bio_flags, false);
epd->bio_flags = bio_flags;
0, 0, 0, false);
if (ret) {
set_btree_ioerr(p);
if (PageWriteback(p))
......@@ -3790,7 +3786,6 @@ int btree_write_cache_pages(struct address_space *mapping,
.tree = tree,
.extent_locked = 0,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
.bio_flags = 0,
};
int ret = 0;
int done = 0;
......@@ -4063,7 +4058,7 @@ static void flush_epd_write_bio(struct extent_page_data *epd)
if (epd->bio) {
int ret;
ret = submit_one_bio(epd->bio, 0, epd->bio_flags);
ret = submit_one_bio(epd->bio, 0, 0);
BUG_ON(ret < 0); /* -ENOMEM */
epd->bio = NULL;
}
......@@ -4086,7 +4081,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
.get_extent = get_extent,
.extent_locked = 0,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
.bio_flags = 0,
};
ret = __extent_writepage(page, wbc, &epd);
......@@ -4111,7 +4105,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
.get_extent = get_extent,
.extent_locked = 1,
.sync_io = mode == WB_SYNC_ALL,
.bio_flags = 0,
};
struct writeback_control wbc_writepages = {
.sync_mode = mode,
......@@ -4151,7 +4144,6 @@ int extent_writepages(struct extent_io_tree *tree,
.get_extent = get_extent,
.extent_locked = 0,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
.bio_flags = 0,
};
ret = extent_write_cache_pages(mapping, wbc, __extent_writepage, &epd,
......
......@@ -34,7 +34,6 @@
* type for this bio
*/
#define EXTENT_BIO_COMPRESSED 1
#define EXTENT_BIO_TREE_LOG 2
#define EXTENT_BIO_FLAG_SHIFT 16
/* these are bit numbers for test/set bit */
......
......@@ -856,7 +856,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
if (update_refs && disk_bytenr > 0) {
ret = btrfs_inc_extent_ref(trans, fs_info,
ret = btrfs_inc_extent_ref(trans, root,
disk_bytenr, num_bytes, 0,
root->root_key.objectid,
new_key.objectid,
......@@ -940,7 +940,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
extent_end = ALIGN(extent_end,
fs_info->sectorsize);
} else if (update_refs && disk_bytenr > 0) {
ret = btrfs_free_extent(trans, fs_info,
ret = btrfs_free_extent(trans, root,
disk_bytenr, num_bytes, 0,
root->root_key.objectid,
key.objectid, key.offset -
......@@ -1234,7 +1234,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
extent_end - split);
btrfs_mark_buffer_dirty(leaf);
ret = btrfs_inc_extent_ref(trans, fs_info, bytenr, num_bytes,
ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
0, root->root_key.objectid,
ino, orig_offset);
if (ret) {
......@@ -1268,7 +1268,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
extent_end = other_end;
del_slot = path->slots[0] + 1;
del_nr++;
ret = btrfs_free_extent(trans, fs_info, bytenr, num_bytes,
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
0, root->root_key.objectid,
ino, orig_offset);
if (ret) {
......@@ -1288,7 +1288,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
key.offset = other_start;
del_slot = path->slots[0];
del_nr++;
ret = btrfs_free_extent(trans, fs_info, bytenr, num_bytes,
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
0, root->root_key.objectid,
ino, orig_offset);
if (ret) {
......@@ -1590,7 +1590,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
int ret = 0;
bool only_release_metadata = false;
bool force_page_uptodate = false;
bool need_unlock;
nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE),
PAGE_SIZE / (sizeof(struct page *)));
......@@ -1613,6 +1612,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
size_t copied;
size_t dirty_sectors;
size_t num_sectors;
int extents_locked;
WARN_ON(num_pages > nrptrs);
......@@ -1656,6 +1656,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
}
}
WARN_ON(reserve_bytes == 0);
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
reserve_bytes);
if (ret) {
......@@ -1669,7 +1670,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
}
release_bytes = reserve_bytes;
need_unlock = false;
again:
/*
* This is going to setup the pages array with the number of
......@@ -1679,19 +1679,23 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
ret = prepare_pages(inode, pages, num_pages,
pos, write_bytes,
force_page_uptodate);
if (ret)
if (ret) {
btrfs_delalloc_release_extents(BTRFS_I(inode),
reserve_bytes);
break;
}
ret = lock_and_cleanup_extent_if_need(BTRFS_I(inode), pages,
extents_locked = lock_and_cleanup_extent_if_need(
BTRFS_I(inode), pages,
num_pages, pos, write_bytes, &lockstart,
&lockend, &cached_state);
if (ret < 0) {
if (ret == -EAGAIN)
if (extents_locked < 0) {
if (extents_locked == -EAGAIN)
goto again;
btrfs_delalloc_release_extents(BTRFS_I(inode),
reserve_bytes);
ret = extents_locked;
break;
} else if (ret > 0) {
need_unlock = true;
ret = 0;
}
copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
......@@ -1718,23 +1722,10 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
PAGE_SIZE);
}
/*
* If we had a short copy we need to release the excess delaloc
* bytes we reserved. We need to increment outstanding_extents
* because btrfs_delalloc_release_space and
* btrfs_delalloc_release_metadata will decrement it, but
* we still have an outstanding extent for the chunk we actually
* managed to copy.
*/
if (num_sectors > dirty_sectors) {
/* release everything except the sectors we dirtied */
release_bytes -= dirty_sectors <<
fs_info->sb->s_blocksize_bits;
if (copied > 0) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
}
if (only_release_metadata) {
btrfs_delalloc_release_metadata(BTRFS_I(inode),
release_bytes);
......@@ -1756,10 +1747,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
if (copied > 0)
ret = btrfs_dirty_pages(inode, pages, dirty_pages,
pos, copied, NULL);
if (need_unlock)
if (extents_locked)
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
lockstart, lockend, &cached_state,
GFP_NOFS);
btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
if (ret) {
btrfs_drop_pages(pages, num_pages);
break;
......@@ -2046,7 +2038,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
struct btrfs_trans_handle *trans;
struct btrfs_log_ctx ctx;
int ret = 0, err;
bool full_sync = 0;
bool full_sync = false;
u64 len;
/*
......
......@@ -1286,12 +1286,8 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path)
{
u64 start, end;
int ret;
start = block_group->key.objectid;
end = block_group->key.objectid + block_group->key.offset;
block_group->needs_free_space = 0;
ret = add_new_free_space_info(trans, fs_info, block_group, path);
......
......@@ -500,11 +500,12 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
prealloc, prealloc, &alloc_hint);
if (ret) {
btrfs_delalloc_release_metadata(BTRFS_I(inode), prealloc);
btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
goto out_put;
}
ret = btrfs_write_out_ino_cache(root, trans, path, inode);
btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
out_put:
iput(inode);
out_release:
......
此差异已折叠。
......@@ -86,6 +86,19 @@ struct btrfs_ioctl_received_subvol_args_32 {
struct btrfs_ioctl_received_subvol_args_32)
#endif
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
struct btrfs_ioctl_send_args_32 {
__s64 send_fd; /* in */
__u64 clone_sources_count; /* in */
compat_uptr_t clone_sources; /* in */
__u64 parent_root; /* in */
__u64 flags; /* in */
__u64 reserved[4]; /* in */
} __attribute__ ((__packed__));
#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
struct btrfs_ioctl_send_args_32)
#endif
static int btrfs_clone(struct inode *src, struct inode *inode,
u64 off, u64 olen, u64 olen_aligned, u64 destoff,
......@@ -609,23 +622,6 @@ static noinline int create_subvol(struct inode *dir,
return ret;
}
static void btrfs_wait_for_no_snapshotting_writes(struct btrfs_root *root)
{
s64 writers;
DEFINE_WAIT(wait);
do {
prepare_to_wait(&root->subv_writers->wait, &wait,
TASK_UNINTERRUPTIBLE);
writers = percpu_counter_sum(&root->subv_writers->counter);
if (writers)
schedule();
finish_wait(&root->subv_writers->wait, &wait);
} while (writers);
}
static int create_snapshot(struct btrfs_root *root, struct inode *dir,
struct dentry *dentry,
u64 *async_transid, bool readonly,
......@@ -654,7 +650,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
atomic_inc(&root->will_be_snapshotted);
smp_mb__after_atomic();
btrfs_wait_for_no_snapshotting_writes(root);
/* wait for no snapshot writes */
wait_event(root->subv_writers->wait,
percpu_counter_sum(&root->subv_writers->counter) == 0);
ret = btrfs_start_delalloc_inodes(root, 0);
if (ret)
......@@ -1219,6 +1217,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
unlock_page(pages[i]);
put_page(pages[i]);
}
btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
extent_changeset_free(data_reserved);
return i_done;
out:
......@@ -1229,6 +1228,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
btrfs_delalloc_release_space(inode, data_reserved,
start_index << PAGE_SHIFT,
page_cnt << PAGE_SHIFT);
btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
extent_changeset_free(data_reserved);
return ret;
......@@ -1420,21 +1420,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
filemap_flush(inode->i_mapping);
}
if (do_compress) {
/* the filemap_flush will queue IO into the worker threads, but
* we have to make sure the IO is actually started and that
* ordered extents get created before we return
*/
atomic_inc(&fs_info->async_submit_draining);
while (atomic_read(&fs_info->nr_async_submits) ||
atomic_read(&fs_info->async_delalloc_pages)) {
wait_event(fs_info->async_submit_wait,
(atomic_read(&fs_info->nr_async_submits) == 0 &&
atomic_read(&fs_info->async_delalloc_pages) == 0));
}
atomic_dec(&fs_info->async_submit_draining);
}
if (range->compress_type == BTRFS_COMPRESS_LZO) {
btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
} else if (range->compress_type == BTRFS_COMPRESS_ZSTD) {
......@@ -1842,8 +1827,13 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
ret = btrfs_update_root(trans, fs_info->tree_root,
&root->root_key, &root->root_item);
if (ret < 0) {
btrfs_end_transaction(trans);
goto out_reset;
}
ret = btrfs_commit_transaction(trans);
btrfs_commit_transaction(trans);
out_reset:
if (ret)
btrfs_set_root_flags(&root->root_item, root_flags);
......@@ -2179,7 +2169,7 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
inode = file_inode(file);
ret = search_ioctl(inode, &args.key, &buf_size,
(char *)(&uarg->buf[0]));
(char __user *)(&uarg->buf[0]));
if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key)))
ret = -EFAULT;
else if (ret == -EOVERFLOW &&
......@@ -3706,7 +3696,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
if (disko) {
inode_add_bytes(inode, datal);
ret = btrfs_inc_extent_ref(trans,
fs_info,
root,
disko, diskl, 0,
root->root_key.objectid,
btrfs_ino(BTRFS_I(inode)),
......@@ -4129,10 +4119,12 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_space_info *dest_orig;
struct btrfs_ioctl_space_info __user *user_dest;
struct btrfs_space_info *info;
u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
BTRFS_BLOCK_GROUP_SYSTEM,
BTRFS_BLOCK_GROUP_METADATA,
BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
static const u64 types[] = {
BTRFS_BLOCK_GROUP_DATA,
BTRFS_BLOCK_GROUP_SYSTEM,
BTRFS_BLOCK_GROUP_METADATA,
BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA
};
int num_types = 4;
int alloc_size;
int ret = 0;
......@@ -4504,8 +4496,8 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
ipath->fspath->val[i] = rel_ptr;
}
ret = copy_to_user((void *)(unsigned long)ipa->fspath,
(void *)(unsigned long)ipath->fspath, size);
ret = copy_to_user((void __user *)(unsigned long)ipa->fspath,
ipath->fspath, size);
if (ret) {
ret = -EFAULT;
goto out;
......@@ -4540,13 +4532,14 @@ static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
}
static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
void __user *arg)
void __user *arg, int version)
{
int ret = 0;
int size;
struct btrfs_ioctl_logical_ino_args *loi;
struct btrfs_data_container *inodes = NULL;
struct btrfs_path *path = NULL;
bool ignore_offset;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
......@@ -4555,13 +4548,30 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
if (IS_ERR(loi))
return PTR_ERR(loi);
if (version == 1) {
ignore_offset = false;
size = min_t(u32, loi->size, SZ_64K);
} else {
/* All reserved bits must be 0 for now */
if (memchr_inv(loi->reserved, 0, sizeof(loi->reserved))) {
ret = -EINVAL;
goto out_loi;
}
/* Only accept flags we have defined so far */
if (loi->flags & ~(BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET)) {
ret = -EINVAL;
goto out_loi;
}
ignore_offset = loi->flags & BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET;
size = min_t(u32, loi->size, SZ_16M);
}
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto out;
}
size = min_t(u32, loi->size, SZ_64K);
inodes = init_data_container(size);
if (IS_ERR(inodes)) {
ret = PTR_ERR(inodes);
......@@ -4570,20 +4580,21 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
}
ret = iterate_inodes_from_logical(loi->logical, fs_info, path,
build_ino_list, inodes);
build_ino_list, inodes, ignore_offset);
if (ret == -EINVAL)
ret = -ENOENT;
if (ret < 0)
goto out;
ret = copy_to_user((void *)(unsigned long)loi->inodes,
(void *)(unsigned long)inodes, size);
ret = copy_to_user((void __user *)(unsigned long)loi->inodes, inodes,
size);
if (ret)
ret = -EFAULT;
out:
btrfs_free_path(path);
kvfree(inodes);
out_loi:
kfree(loi);
return ret;
......@@ -5160,15 +5171,11 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
root->root_key.objectid);
if (ret < 0 && ret != -EEXIST) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
goto out;
}
}
ret = btrfs_commit_transaction(trans);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out;
}
out:
up_write(&fs_info->subvol_sem);
mnt_drop_write_file(file);
......@@ -5490,6 +5497,41 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
return ret;
}
static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat)
{
struct btrfs_ioctl_send_args *arg;
int ret;
if (compat) {
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
struct btrfs_ioctl_send_args_32 args32;
ret = copy_from_user(&args32, argp, sizeof(args32));
if (ret)
return -EFAULT;
arg = kzalloc(sizeof(*arg), GFP_KERNEL);
if (!arg)
return -ENOMEM;
arg->send_fd = args32.send_fd;
arg->clone_sources_count = args32.clone_sources_count;
arg->clone_sources = compat_ptr(args32.clone_sources);
arg->parent_root = args32.parent_root;
arg->flags = args32.flags;
memcpy(arg->reserved, args32.reserved,
sizeof(args32.reserved));
#else
return -ENOTTY;
#endif
} else {
arg = memdup_user(argp, sizeof(*arg));
if (IS_ERR(arg))
return PTR_ERR(arg);
}
ret = btrfs_ioctl_send(file, arg);
kfree(arg);
return ret;
}
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
......@@ -5554,7 +5596,9 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_INO_PATHS:
return btrfs_ioctl_ino_to_path(root, argp);
case BTRFS_IOC_LOGICAL_INO:
return btrfs_ioctl_logical_to_ino(fs_info, argp);
return btrfs_ioctl_logical_to_ino(fs_info, argp, 1);
case BTRFS_IOC_LOGICAL_INO_V2:
return btrfs_ioctl_logical_to_ino(fs_info, argp, 2);
case BTRFS_IOC_SPACE_INFO:
return btrfs_ioctl_space_info(fs_info, argp);
case BTRFS_IOC_SYNC: {
......@@ -5595,7 +5639,11 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_set_received_subvol_32(file, argp);
#endif
case BTRFS_IOC_SEND:
return btrfs_ioctl_send(file, argp);
return _btrfs_ioctl_send(file, argp, false);
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
case BTRFS_IOC_SEND_32:
return _btrfs_ioctl_send(file, argp, true);
#endif
case BTRFS_IOC_GET_DEV_STATS:
return btrfs_ioctl_get_dev_stats(fs_info, argp);
case BTRFS_IOC_QUOTA_CTL:
......
......@@ -430,10 +430,15 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
return ret;
}
static void lzo_set_level(struct list_head *ws, unsigned int type)
{
}
const struct btrfs_compress_op btrfs_lzo_compress = {
.alloc_workspace = lzo_alloc_workspace,
.free_workspace = lzo_free_workspace,
.compress_pages = lzo_compress_pages,
.decompress_bio = lzo_decompress_bio,
.decompress = lzo_decompress,
.set_level = lzo_set_level,
};
......@@ -242,6 +242,15 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
}
spin_unlock(&root->ordered_extent_lock);
/*
* We don't need the count_max_extents here, we can assume that all of
* that work has been done at higher layers, so this is truly the
* smallest the extent is going to get.
*/
spin_lock(&BTRFS_I(inode)->lock);
btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
spin_unlock(&BTRFS_I(inode)->lock);
return 0;
}
......@@ -591,11 +600,19 @@ void btrfs_remove_ordered_extent(struct inode *inode,
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ordered_inode_tree *tree;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
struct btrfs_root *root = btrfs_inode->root;
struct rb_node *node;
bool dec_pending_ordered = false;
tree = &BTRFS_I(inode)->ordered_tree;
/* This is paired with btrfs_add_ordered_extent. */
spin_lock(&btrfs_inode->lock);
btrfs_mod_outstanding_extents(btrfs_inode, -1);
spin_unlock(&btrfs_inode->lock);
if (root != fs_info->tree_root)
btrfs_delalloc_release_metadata(btrfs_inode, entry->len);
tree = &btrfs_inode->ordered_tree;
spin_lock_irq(&tree->lock);
node = &entry->rb_node;
rb_erase(node, &tree->tree);
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -226,10 +226,6 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
struct btrfs_root *root;
int err = 0;
int ret;
bool can_recover = true;
if (sb_rdonly(fs_info->sb))
can_recover = false;
path = btrfs_alloc_path();
if (!path)
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册