提交 afd582ac 编写于 作者: D David Sterba

Merge remote-tracking branch 'remotes/josef/for-chris' into btrfs-next-stable

无相关合并请求
......@@ -103,11 +103,6 @@ struct btrfs_inode {
*/
u64 delalloc_bytes;
/* total number of bytes that may be used for this inode for
* delalloc
*/
u64 reserved_bytes;
/*
* the size of the file stored in the metadata on disk. data=ordered
* means the in-memory i_size might be larger than the size on disk
......@@ -115,9 +110,6 @@ struct btrfs_inode {
*/
u64 disk_i_size;
/* flags field from the on disk inode */
u32 flags;
/*
* if this is a directory then index_cnt is the counter for the index
* number for new files that are created
......@@ -131,6 +123,15 @@ struct btrfs_inode {
*/
u64 last_unlink_trans;
/*
* Number of bytes outstanding that are going to need csums. This is
* used in ENOSPC accounting.
*/
u64 csum_bytes;
/* flags field from the on disk inode */
u32 flags;
/*
* Counters to keep track of the number of extent item's we may use due
* to delalloc and such. outstanding_extents is the number of extent
......
......@@ -30,6 +30,7 @@
#include <linux/kobject.h>
#include <trace/events/btrfs.h>
#include <asm/kmap_types.h>
#include <linux/pagemap.h>
#include "extent_io.h"
#include "extent_map.h"
#include "async-thread.h"
......@@ -772,14 +773,8 @@ struct btrfs_space_info {
struct btrfs_block_rsv {
u64 size;
u64 reserved;
u64 freed[2];
struct btrfs_space_info *space_info;
struct list_head list;
spinlock_t lock;
atomic_t usage;
unsigned int priority:8;
unsigned int durable:1;
unsigned int refill_used:1;
unsigned int full:1;
};
......@@ -840,10 +835,10 @@ struct btrfs_block_group_cache {
spinlock_t lock;
u64 pinned;
u64 reserved;
u64 reserved_pinned;
u64 bytes_super;
u64 flags;
u64 sectorsize;
u64 cache_generation;
unsigned int ro:1;
unsigned int dirty:1;
unsigned int iref:1;
......@@ -899,6 +894,10 @@ struct btrfs_fs_info {
spinlock_t block_group_cache_lock;
struct rb_root block_group_cache_tree;
/* keep track of unallocated space */
spinlock_t free_chunk_lock;
u64 free_chunk_space;
struct extent_io_tree freed_extents[2];
struct extent_io_tree *pinned_extents;
......@@ -919,11 +918,6 @@ struct btrfs_fs_info {
struct btrfs_block_rsv empty_block_rsv;
/* list of block reservations that cross multiple transactions */
struct list_head durable_block_rsv_list;
struct mutex durable_block_rsv_mutex;
u64 generation;
u64 last_trans_committed;
......@@ -2129,6 +2123,11 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
(space_info->flags & BTRFS_BLOCK_GROUP_DATA));
}
static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
{
return mapping_gfp_mask(mapping) & ~__GFP_FS;
}
/* extent-tree.c */
static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
unsigned num_items)
......@@ -2137,6 +2136,17 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
3 * num_items;
}
/*
* Doing a truncate won't result in new nodes or leaves, just what we need for
* COW.
*/
static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root,
unsigned num_items)
{
return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
num_items;
}
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root, unsigned long count);
......@@ -2196,8 +2206,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
u64 root_objectid, u64 owner, u64 offset);
int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
u64 num_bytes, int reserve, int sinfo);
int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
......@@ -2240,25 +2248,20 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv);
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root);
void btrfs_free_block_rsv(struct btrfs_root *root,
struct btrfs_block_rsv *rsv);
void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int btrfs_block_rsv_add(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int btrfs_block_rsv_check(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, int min_factor);
int btrfs_block_rsv_refill(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 min_reserved, int min_factor);
u64 min_reserved);
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
struct btrfs_block_rsv *dst_rsv,
u64 num_bytes);
void btrfs_block_rsv_release(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *rsv);
int btrfs_set_block_group_ro(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
int btrfs_set_block_group_rw(struct btrfs_root *root,
......@@ -2579,11 +2582,6 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans,
int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
int btrfs_orphan_cleanup(struct btrfs_root *root);
void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending,
u64 *bytes_to_reserve);
void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending);
void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
......
......@@ -1648,6 +1648,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->fs_roots_radix_lock);
spin_lock_init(&fs_info->delayed_iput_lock);
spin_lock_init(&fs_info->defrag_inodes_lock);
spin_lock_init(&fs_info->free_chunk_lock);
mutex_init(&fs_info->reloc_mutex);
init_completion(&fs_info->kobj_unregister);
......@@ -1665,8 +1666,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_init_block_rsv(&fs_info->trans_block_rsv);
btrfs_init_block_rsv(&fs_info->chunk_block_rsv);
btrfs_init_block_rsv(&fs_info->empty_block_rsv);
INIT_LIST_HEAD(&fs_info->durable_block_rsv_list);
mutex_init(&fs_info->durable_block_rsv_mutex);
atomic_set(&fs_info->nr_async_submits, 0);
atomic_set(&fs_info->async_delalloc_pages, 0);
atomic_set(&fs_info->async_submit_draining, 0);
......@@ -1677,6 +1676,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->metadata_ratio = 0;
fs_info->defrag_inodes = RB_ROOT;
fs_info->trans_no_join = 0;
fs_info->free_chunk_space = 0;
fs_info->thread_pool_size = min_t(unsigned long,
num_online_cpus() + 2, 8);
......@@ -2545,8 +2545,6 @@ int close_ctree(struct btrfs_root *root)
/* clear out the rbtree of defraggable inodes */
btrfs_run_defrag_inodes(root->fs_info);
btrfs_put_block_group_cache(fs_info);
/*
* Here come 2 situations when btrfs is broken to flip readonly:
*
......@@ -2572,6 +2570,8 @@ int close_ctree(struct btrfs_root *root)
printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
}
btrfs_put_block_group_cache(fs_info);
kthread_stop(root->fs_info->transaction_kthread);
kthread_stop(root->fs_info->cleaner_kthread);
......
......@@ -52,6 +52,21 @@ enum {
CHUNK_ALLOC_LIMITED = 2,
};
/*
* Control how reservations are dealt with.
*
* RESERVE_FREE - freeing a reservation.
* RESERVE_ALLOC - allocating space and we need to update bytes_may_use for
* ENOSPC accounting
* RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update
* bytes_may_use as the ENOSPC accounting is done elsewhere
*/
enum {
RESERVE_FREE = 0,
RESERVE_ALLOC = 1,
RESERVE_ALLOC_NO_ACCOUNT = 2,
};
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc);
......@@ -81,6 +96,8 @@ static int find_next_key(struct btrfs_path *path, int level,
struct btrfs_key *key);
static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
int dump_block_groups);
static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
u64 num_bytes, int reserve);
static noinline int
block_group_cache_done(struct btrfs_block_group_cache *cache)
......@@ -104,7 +121,6 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
if (atomic_dec_and_test(&cache->count)) {
WARN_ON(cache->pinned > 0);
WARN_ON(cache->reserved > 0);
WARN_ON(cache->reserved_pinned > 0);
kfree(cache->free_space_ctl);
kfree(cache);
}
......@@ -465,7 +481,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
* we likely hold important locks.
*/
if (trans && (!trans->transaction->in_commit) &&
(root && root != root->fs_info->tree_root)) {
(root && root != root->fs_info->tree_root) &&
btrfs_test_opt(root, SPACE_CACHE)) {
spin_lock(&cache->lock);
if (cache->cached != BTRFS_CACHE_NO) {
spin_unlock(&cache->lock);
......@@ -2700,6 +2717,13 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
goto again;
}
/* We've already setup this transaction, go ahead and exit */
if (block_group->cache_generation == trans->transid &&
i_size_read(inode)) {
dcs = BTRFS_DC_SETUP;
goto out_put;
}
/*
* We want to set the generation to 0, that way if anything goes wrong
* from here on out we know not to trust this cache when we load up next
......@@ -2749,12 +2773,15 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
if (!ret)
dcs = BTRFS_DC_SETUP;
btrfs_free_reserved_data_space(inode, num_pages);
out_put:
iput(inode);
out_free:
btrfs_release_path(path);
out:
spin_lock(&block_group->lock);
if (!ret)
block_group->cache_generation = trans->transid;
block_group->disk_cache_state = dcs;
spin_unlock(&block_group->lock);
......@@ -3122,16 +3149,13 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
return -ENOSPC;
}
data_sinfo->bytes_may_use += bytes;
BTRFS_I(inode)->reserved_bytes += bytes;
spin_unlock(&data_sinfo->lock);
return 0;
}
/*
* called when we are clearing an delalloc extent from the
* inode's io_tree or there was an error for whatever reason
* after calling btrfs_check_data_free_space
* Called if we need to clear a data reservation for this inode.
*/
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
{
......@@ -3144,7 +3168,6 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
data_sinfo = BTRFS_I(inode)->space_info;
spin_lock(&data_sinfo->lock);
data_sinfo->bytes_may_use -= bytes;
BTRFS_I(inode)->reserved_bytes -= bytes;
spin_unlock(&data_sinfo->lock);
}
......@@ -3165,6 +3188,7 @@ static int should_alloc_chunk(struct btrfs_root *root,
struct btrfs_space_info *sinfo, u64 alloc_bytes,
int force)
{
struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
u64 thresh;
......@@ -3172,6 +3196,13 @@ static int should_alloc_chunk(struct btrfs_root *root,
if (force == CHUNK_ALLOC_FORCE)
return 1;
/*
* We need to take into account the global rsv because for all intents
* and purposes it's used space. Don't worry about locking the
* global_rsv, it doesn't change except when the transaction commits.
*/
num_allocated += global_rsv->size;
/*
* in limited mode, we want to have some free space up to
* about 1% of the FS size.
......@@ -3303,7 +3334,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
* shrink metadata reservation for delalloc
*/
static int shrink_delalloc(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 to_reclaim, int sync)
struct btrfs_root *root, u64 to_reclaim,
bool wait_ordered)
{
struct btrfs_block_rsv *block_rsv;
struct btrfs_space_info *space_info;
......@@ -3311,7 +3343,7 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
u64 max_reclaim;
u64 reclaimed = 0;
long time_left;
int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
int loops = 0;
unsigned long progress;
......@@ -3319,7 +3351,7 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
space_info = block_rsv->space_info;
smp_mb();
reserved = space_info->bytes_reserved;
reserved = space_info->bytes_may_use;
progress = space_info->reservation_progress;
if (reserved == 0)
......@@ -3334,7 +3366,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
}
max_reclaim = min(reserved, to_reclaim);
nr_pages = max_t(unsigned long, nr_pages,
max_reclaim >> PAGE_CACHE_SHIFT);
while (loops < 1024) {
/* have the flusher threads jump in and do some IO */
smp_mb();
......@@ -3343,9 +3376,9 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
spin_lock(&space_info->lock);
if (reserved > space_info->bytes_reserved)
reclaimed += reserved - space_info->bytes_reserved;
reserved = space_info->bytes_reserved;
if (reserved > space_info->bytes_may_use)
reclaimed += reserved - space_info->bytes_may_use;
reserved = space_info->bytes_may_use;
spin_unlock(&space_info->lock);
loops++;
......@@ -3356,11 +3389,15 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
if (trans && trans->transaction->blocked)
return -EAGAIN;
time_left = schedule_timeout_interruptible(1);
if (wait_ordered && !trans) {
btrfs_wait_ordered_extents(root, 0, 0);
} else {
time_left = schedule_timeout_interruptible(1);
/* We were interrupted, exit */
if (time_left)
break;
/* We were interrupted, exit */
if (time_left)
break;
}
/* we've kicked the IO a few times, if anything has been freed,
* exit. There is no sense in looping here for a long time
......@@ -3375,35 +3412,39 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
}
}
if (reclaimed >= to_reclaim && !trans)
btrfs_wait_ordered_extents(root, 0, 0);
return reclaimed >= to_reclaim;
}
/*
* Retries tells us how many times we've called reserve_metadata_bytes. The
* idea is if this is the first call (retries == 0) then we will add to our
* reserved count if we can't make the allocation in order to hold our place
* while we go and try and free up space. That way for retries > 1 we don't try
* and add space, we just check to see if the amount of unused space is >= the
* total space, meaning that our reservation is valid.
/**
* reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
* @root - the root we're allocating for
* @block_rsv - the block_rsv we're allocating for
* @orig_bytes - the number of bytes we want
* @flush - wether or not we can flush to make our reservation
*
* However if we don't intend to retry this reservation, pass -1 as retries so
* that it short circuits this logic.
* This will reserve orgi_bytes number of bytes from the space info associated
* with the block_rsv. If there is not enough space it will make an attempt to
* flush out space to make room. It will do this by flushing delalloc if
* possible or committing the transaction. If flush is 0 then no attempts to
* regain reservations will be made and this will fail if there is not enough
* space already.
*/
static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
static int reserve_metadata_bytes(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 orig_bytes, int flush)
{
struct btrfs_space_info *space_info = block_rsv->space_info;
u64 unused;
struct btrfs_trans_handle *trans;
u64 used;
u64 num_bytes = orig_bytes;
int retries = 0;
int ret = 0;
bool committed = false;
bool flushing = false;
bool wait_ordered = false;
trans = (struct btrfs_trans_handle *)current->journal_info;
again:
ret = 0;
spin_lock(&space_info->lock);
......@@ -3431,9 +3472,9 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
}
ret = -ENOSPC;
unused = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly +
space_info->bytes_may_use;
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly +
space_info->bytes_may_use;
/*
* The idea here is that we've not already over-reserved the block group
......@@ -3442,10 +3483,9 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
* lets start flushing stuff first and then come back and try to make
* our reservation.
*/
if (unused <= space_info->total_bytes) {
unused = space_info->total_bytes - unused;
if (unused >= num_bytes) {
space_info->bytes_reserved += orig_bytes;
if (used <= space_info->total_bytes) {
if (used + orig_bytes <= space_info->total_bytes) {
space_info->bytes_may_use += orig_bytes;
ret = 0;
} else {
/*
......@@ -3461,10 +3501,60 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
* amount plus the amount of bytes that we need for this
* reservation.
*/
num_bytes = unused - space_info->total_bytes +
wait_ordered = true;
num_bytes = used - space_info->total_bytes +
(orig_bytes * (retries + 1));
}
if (ret) {
u64 profile = btrfs_get_alloc_profile(root, 0);
u64 avail;
/*
* If we have a lot of space that's pinned, don't bother doing
* the overcommit dance yet and just commit the transaction.
*/
avail = (space_info->total_bytes - space_info->bytes_used) * 8;
do_div(avail, 10);
if (space_info->bytes_pinned >= avail && flush && !trans &&
!committed) {
space_info->flush = 1;
flushing = true;
spin_unlock(&space_info->lock);
goto commit;
}
spin_lock(&root->fs_info->free_chunk_lock);
avail = root->fs_info->free_chunk_space;
/*
* If we have dup, raid1 or raid10 then only half of the free
* space is actually useable.
*/
if (profile & (BTRFS_BLOCK_GROUP_DUP |
BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID10))
avail >>= 1;
/*
* If we aren't flushing don't let us overcommit too much, say
* 1/8th of the space. If we can flush, let it overcommit up to
* 1/2 of the space.
*/
if (flush)
avail >>= 3;
else
avail >>= 1;
spin_unlock(&root->fs_info->free_chunk_lock);
if (used + num_bytes < space_info->total_bytes + avail) {
space_info->bytes_may_use += orig_bytes;
ret = 0;
} else {
wait_ordered = true;
}
}
/*
* Couldn't make our reservation, save our place so while we're trying
* to reclaim space we can actually use it instead of somebody else
......@@ -3484,7 +3574,7 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
* We do synchronous shrinking since we don't actually unreserve
* metadata until after the IO is completed.
*/
ret = shrink_delalloc(trans, root, num_bytes, 1);
ret = shrink_delalloc(trans, root, num_bytes, wait_ordered);
if (ret < 0)
goto out;
......@@ -3496,25 +3586,16 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
* so go back around and try again.
*/
if (retries < 2) {
wait_ordered = true;
retries++;
goto again;
}
/*
* Not enough space to be reclaimed, don't bother committing the
* transaction.
*/
spin_lock(&space_info->lock);
if (space_info->bytes_pinned < orig_bytes)
ret = -ENOSPC;
spin_unlock(&space_info->lock);
if (ret)
goto out;
ret = -EAGAIN;
if (trans)
goto out;
commit:
ret = -ENOSPC;
if (committed)
goto out;
......@@ -3542,10 +3623,12 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_block_rsv *block_rsv;
if (root->ref_cows)
struct btrfs_block_rsv *block_rsv = NULL;
if (root->ref_cows || root == root->fs_info->csum_root)
block_rsv = trans->block_rsv;
else
if (!block_rsv)
block_rsv = root->block_rsv;
if (!block_rsv)
......@@ -3616,7 +3699,7 @@ static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
}
if (num_bytes) {
spin_lock(&space_info->lock);
space_info->bytes_reserved -= num_bytes;
space_info->bytes_may_use -= num_bytes;
space_info->reservation_progress++;
spin_unlock(&space_info->lock);
}
......@@ -3640,9 +3723,6 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv)
{
memset(rsv, 0, sizeof(*rsv));
spin_lock_init(&rsv->lock);
atomic_set(&rsv->usage, 1);
rsv->priority = 6;
INIT_LIST_HEAD(&rsv->list);
}
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
......@@ -3663,29 +3743,11 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
void btrfs_free_block_rsv(struct btrfs_root *root,
struct btrfs_block_rsv *rsv)
{
if (rsv && atomic_dec_and_test(&rsv->usage)) {
btrfs_block_rsv_release(root, rsv, (u64)-1);
if (!rsv->durable)
kfree(rsv);
}
}
/*
* make the block_rsv struct be able to capture freed space.
* the captured space will re-add to the the block_rsv struct
* after transaction commit
*/
void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv)
{
block_rsv->durable = 1;
mutex_lock(&fs_info->durable_block_rsv_mutex);
list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list);
mutex_unlock(&fs_info->durable_block_rsv_mutex);
btrfs_block_rsv_release(root, rsv, (u64)-1);
kfree(rsv);
}
int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int btrfs_block_rsv_add(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes)
{
......@@ -3694,7 +3756,7 @@ int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
if (num_bytes == 0)
return 0;
ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1);
ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 1);
return 0;
......@@ -3703,55 +3765,52 @@ int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
return ret;
}
int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 min_reserved, int min_factor)
int btrfs_block_rsv_check(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, int min_factor)
{
u64 num_bytes = 0;
int commit_trans = 0;
int ret = -ENOSPC;
if (!block_rsv)
return 0;
spin_lock(&block_rsv->lock);
if (min_factor > 0)
num_bytes = div_factor(block_rsv->size, min_factor);
if (min_reserved > num_bytes)
num_bytes = min_reserved;
num_bytes = div_factor(block_rsv->size, min_factor);
if (block_rsv->reserved >= num_bytes)
ret = 0;
spin_unlock(&block_rsv->lock);
if (block_rsv->reserved >= num_bytes) {
return ret;
}
int btrfs_block_rsv_refill(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 min_reserved)
{
u64 num_bytes = 0;
int ret = -ENOSPC;
if (!block_rsv)
return 0;
spin_lock(&block_rsv->lock);
num_bytes = min_reserved;
if (block_rsv->reserved >= num_bytes)
ret = 0;
} else {
else
num_bytes -= block_rsv->reserved;
if (block_rsv->durable &&
block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes)
commit_trans = 1;
}
spin_unlock(&block_rsv->lock);
if (!ret)
return 0;
if (block_rsv->refill_used) {
ret = reserve_metadata_bytes(trans, root, block_rsv,
num_bytes, 0);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 0);
return 0;
}
}
if (commit_trans) {
if (trans)
return -EAGAIN;
trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
ret = btrfs_commit_transaction(trans, root);
ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 0);
return 0;
}
return -ENOSPC;
return ret;
}
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
......@@ -3827,12 +3886,12 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
if (sinfo->total_bytes > num_bytes) {
num_bytes = sinfo->total_bytes - num_bytes;
block_rsv->reserved += num_bytes;
sinfo->bytes_reserved += num_bytes;
sinfo->bytes_may_use += num_bytes;
}
if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
sinfo->bytes_reserved -= num_bytes;
sinfo->bytes_may_use -= num_bytes;
sinfo->reservation_progress++;
block_rsv->reserved = block_rsv->size;
block_rsv->full = 1;
......@@ -3848,16 +3907,12 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
fs_info->chunk_block_rsv.space_info = space_info;
fs_info->chunk_block_rsv.priority = 10;
space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
fs_info->global_block_rsv.space_info = space_info;
fs_info->global_block_rsv.priority = 10;
fs_info->global_block_rsv.refill_used = 1;
fs_info->delalloc_block_rsv.space_info = space_info;
fs_info->trans_block_rsv.space_info = space_info;
fs_info->empty_block_rsv.space_info = space_info;
fs_info->empty_block_rsv.priority = 10;
fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
......@@ -3865,10 +3920,6 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv);
btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv);
update_global_block_rsv(fs_info);
}
......@@ -3883,46 +3934,13 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
}
int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *rsv)
{
struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv;
u64 num_bytes;
int ret;
/*
* Truncate should be freeing data, but give us 2 items just in case it
* needs to use some space. We may want to be smarter about this in the
* future.
*/
num_bytes = btrfs_calc_trans_metadata_size(root, 2);
/* We already have enough bytes, just return */
if (rsv->reserved >= num_bytes)
return 0;
num_bytes -= rsv->reserved;
/*
* You should have reserved enough space before hand to do this, so this
* should not fail.
*/
ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes);
BUG_ON(ret);
return 0;
}
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
if (!trans->bytes_reserved)
return;
BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv);
btrfs_block_rsv_release(root, trans->block_rsv,
trans->bytes_reserved);
btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
trans->bytes_reserved = 0;
}
......@@ -3964,11 +3982,19 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
}
/**
* drop_outstanding_extent - drop an outstanding extent
* @inode: the inode we're dropping the extent for
*
* This is called when we are freeing up an outstanding extent, either called
* after an error or after an extent is written. This will return the number of
* reserved extents that need to be freed. This must be called with
* BTRFS_I(inode)->lock held.
*/
static unsigned drop_outstanding_extent(struct inode *inode)
{
unsigned dropped_extents = 0;
spin_lock(&BTRFS_I(inode)->lock);
BUG_ON(!BTRFS_I(inode)->outstanding_extents);
BTRFS_I(inode)->outstanding_extents--;
......@@ -3978,19 +4004,70 @@ static unsigned drop_outstanding_extent(struct inode *inode)
*/
if (BTRFS_I(inode)->outstanding_extents >=
BTRFS_I(inode)->reserved_extents)
goto out;
return 0;
dropped_extents = BTRFS_I(inode)->reserved_extents -
BTRFS_I(inode)->outstanding_extents;
BTRFS_I(inode)->reserved_extents -= dropped_extents;
out:
spin_unlock(&BTRFS_I(inode)->lock);
return dropped_extents;
}
static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes)
/**
* calc_csum_metadata_size - return the amount of metada space that must be
* reserved/free'd for the given bytes.
* @inode: the inode we're manipulating
* @num_bytes: the number of bytes in question
* @reserve: 1 if we are reserving space, 0 if we are freeing space
*
* This adjusts the number of csum_bytes in the inode and then returns the
* correct amount of metadata that must either be reserved or freed. We
* calculate how many checksums we can fit into one leaf and then divide the
* number of bytes that will need to be checksumed by this value to figure out
* how many checksums will be required. If we are adding bytes then the number
* may go up and we will return the number of additional bytes that must be
* reserved. If it is going down we will return the number of bytes that must
* be freed.
*
* This must be called with BTRFS_I(inode)->lock held.
*/
static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
int reserve)
{
return num_bytes >>= 3;
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 csum_size;
int num_csums_per_leaf;
int num_csums;
int old_csums;
if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
BTRFS_I(inode)->csum_bytes == 0)
return 0;
old_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
if (reserve)
BTRFS_I(inode)->csum_bytes += num_bytes;
else
BTRFS_I(inode)->csum_bytes -= num_bytes;
csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
num_csums_per_leaf = (int)div64_u64(csum_size,
sizeof(struct btrfs_csum_item) +
sizeof(struct btrfs_disk_key));
num_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
num_csums = num_csums + num_csums_per_leaf - 1;
num_csums = num_csums / num_csums_per_leaf;
old_csums = old_csums + num_csums_per_leaf - 1;
old_csums = old_csums / num_csums_per_leaf;
/* No change, no need to reserve more */
if (old_csums == num_csums)
return 0;
if (reserve)
return btrfs_calc_trans_metadata_size(root,
num_csums - old_csums);
return btrfs_calc_trans_metadata_size(root, old_csums - num_csums);
}
int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
......@@ -3999,9 +4076,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
u64 to_reserve = 0;
unsigned nr_extents = 0;
int flush = 1;
int ret;
if (btrfs_transaction_in_commit(root->fs_info))
if (btrfs_is_free_space_inode(root, inode))
flush = 0;
if (flush && btrfs_transaction_in_commit(root->fs_info))
schedule_timeout(1);
num_bytes = ALIGN(num_bytes, root->sectorsize);
......@@ -4017,18 +4098,29 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
}
to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
spin_unlock(&BTRFS_I(inode)->lock);
to_reserve += calc_csum_metadata_size(inode, num_bytes);
ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
if (ret) {
u64 to_free = 0;
unsigned dropped;
spin_lock(&BTRFS_I(inode)->lock);
dropped = drop_outstanding_extent(inode);
to_free = calc_csum_metadata_size(inode, num_bytes, 0);
spin_unlock(&BTRFS_I(inode)->lock);
to_free += btrfs_calc_trans_metadata_size(root, dropped);
/*
* We don't need the return value since our reservation failed,
* we just need to clean up our counter.
* Somebody could have come in and twiddled with the
* reservation, so if we have to free more than we would have
* reserved from this reservation go ahead and release those
* bytes.
*/
dropped = drop_outstanding_extent(inode);
WARN_ON(dropped > 1);
to_free -= to_reserve;
if (to_free)
btrfs_block_rsv_release(root, block_rsv, to_free);
return ret;
}
......@@ -4037,6 +4129,15 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
return 0;
}
/**
* btrfs_delalloc_release_metadata - release a metadata reservation for an inode
* @inode: the inode to release the reservation for
* @num_bytes: the number of bytes we're releasing
*
* This will release the metadata reservation for an inode. This can be called
* once we complete IO for a given set of bytes to release their metadata
* reservations.
*/
void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
......@@ -4044,9 +4145,11 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
unsigned dropped;
num_bytes = ALIGN(num_bytes, root->sectorsize);
spin_lock(&BTRFS_I(inode)->lock);
dropped = drop_outstanding_extent(inode);
to_free = calc_csum_metadata_size(inode, num_bytes);
to_free = calc_csum_metadata_size(inode, num_bytes, 0);
spin_unlock(&BTRFS_I(inode)->lock);
if (dropped > 0)
to_free += btrfs_calc_trans_metadata_size(root, dropped);
......@@ -4054,6 +4157,21 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
to_free);
}
/**
* btrfs_delalloc_reserve_space - reserve data and metadata space for delalloc
* @inode: inode we're writing to
* @num_bytes: the number of bytes we want to allocate
*
* This will do the following things
*
* o reserve space in the data space info for num_bytes
* o reserve space in the metadata space info based on number of outstanding
* extents and how much csums will be needed
* o add to the inodes ->delalloc_bytes
* o add it to the fs_info's delalloc inodes list.
*
* This will return 0 for success and -ENOSPC if there is no space left.
*/
int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
{
int ret;
......@@ -4071,6 +4189,19 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
return 0;
}
/**
* btrfs_delalloc_release_space - release data and metadata space for delalloc
* @inode: inode we're releasing space for
* @num_bytes: the number of bytes we want to free up
*
* This must be matched with a call to btrfs_delalloc_reserve_space. This is
* called in the case that we don't need the metadata AND data reservations
* anymore. So if there is an error or we insert an inline extent.
*
* This function will release the metadata space that was not used and will
* decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
* list if there are no delalloc bytes left.
*/
void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
{
btrfs_delalloc_release_metadata(inode, num_bytes);
......@@ -4123,7 +4254,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
if (btrfs_super_cache_generation(&info->super_copy) != 0 &&
if (btrfs_test_opt(root, SPACE_CACHE) &&
cache->disk_cache_state < BTRFS_DC_CLEAR)
cache->disk_cache_state = BTRFS_DC_CLEAR;
......@@ -4135,7 +4266,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
btrfs_set_block_group_used(&cache->item, old_val);
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
cache->space_info->reservation_progress++;
cache->space_info->bytes_used += num_bytes;
cache->space_info->disk_used += num_bytes * factor;
spin_unlock(&cache->lock);
......@@ -4187,7 +4317,6 @@ static int pin_down_extent(struct btrfs_root *root,
if (reserved) {
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
cache->space_info->reservation_progress++;
}
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
......@@ -4214,46 +4343,55 @@ int btrfs_pin_extent(struct btrfs_root *root,
return 0;
}
/*
* update size of reserved extents. this function may return -EAGAIN
* if 'reserve' is true or 'sinfo' is false.
/**
* btrfs_update_reserved_bytes - update the block_group and space info counters
* @cache: The cache we are manipulating
* @num_bytes: The number of bytes in question
* @reserve: One of the reservation enums
*
* This is called by the allocator when it reserves space, or by somebody who is
* freeing space that was never actually used on disk. For example if you
* reserve some space for a new leaf in transaction A and before transaction A
* commits you free that leaf, you call this with reserve set to 0 in order to
* clear the reservation.
*
* Metadata reservations should be called with RESERVE_ALLOC so we do the proper
* ENOSPC accounting. For data we handle the reservation through clearing the
* delalloc bits in the io_tree. We have to do this since we could end up
* allocating less disk space for the amount of data we have reserved in the
* case of compression.
*
* If this is a reservation and the block group has become read only we cannot
* make the reservation and return -EAGAIN, otherwise this function always
* succeeds.
*/
int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
u64 num_bytes, int reserve, int sinfo)
static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
u64 num_bytes, int reserve)
{
struct btrfs_space_info *space_info = cache->space_info;
int ret = 0;
if (sinfo) {
struct btrfs_space_info *space_info = cache->space_info;
spin_lock(&space_info->lock);
spin_lock(&cache->lock);
if (reserve) {
if (cache->ro) {
ret = -EAGAIN;
} else {
cache->reserved += num_bytes;
space_info->bytes_reserved += num_bytes;
}
} else {
if (cache->ro)
space_info->bytes_readonly += num_bytes;
cache->reserved -= num_bytes;
space_info->bytes_reserved -= num_bytes;
space_info->reservation_progress++;
}
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
} else {
spin_lock(&cache->lock);
spin_lock(&space_info->lock);
spin_lock(&cache->lock);
if (reserve != RESERVE_FREE) {
if (cache->ro) {
ret = -EAGAIN;
} else {
if (reserve)
cache->reserved += num_bytes;
else
cache->reserved -= num_bytes;
cache->reserved += num_bytes;
space_info->bytes_reserved += num_bytes;
if (reserve == RESERVE_ALLOC) {
BUG_ON(space_info->bytes_may_use < num_bytes);
space_info->bytes_may_use -= num_bytes;
}
}
spin_unlock(&cache->lock);
} else {
if (cache->ro)
space_info->bytes_readonly += num_bytes;
cache->reserved -= num_bytes;
space_info->bytes_reserved -= num_bytes;
space_info->reservation_progress++;
}
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
return ret;
}
......@@ -4319,13 +4457,8 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
spin_lock(&cache->lock);
cache->pinned -= len;
cache->space_info->bytes_pinned -= len;
if (cache->ro) {
if (cache->ro)
cache->space_info->bytes_readonly += len;
} else if (cache->reserved_pinned > 0) {
len = min(len, cache->reserved_pinned);
cache->reserved_pinned -= len;
cache->space_info->bytes_reserved += len;
}
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
}
......@@ -4340,11 +4473,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_io_tree *unpin;
struct btrfs_block_rsv *block_rsv;
struct btrfs_block_rsv *next_rsv;
u64 start;
u64 end;
int idx;
int ret;
if (fs_info->pinned_extents == &fs_info->freed_extents[0])
......@@ -4367,30 +4497,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
cond_resched();
}
mutex_lock(&fs_info->durable_block_rsv_mutex);
list_for_each_entry_safe(block_rsv, next_rsv,
&fs_info->durable_block_rsv_list, list) {
idx = trans->transid & 0x1;
if (block_rsv->freed[idx] > 0) {
block_rsv_add_bytes(block_rsv,
block_rsv->freed[idx], 0);
block_rsv->freed[idx] = 0;
}
if (atomic_read(&block_rsv->usage) == 0) {
btrfs_block_rsv_release(root, block_rsv, (u64)-1);
if (block_rsv->freed[0] == 0 &&
block_rsv->freed[1] == 0) {
list_del_init(&block_rsv->list);
kfree(block_rsv);
}
} else {
btrfs_block_rsv_release(root, block_rsv, 0);
}
}
mutex_unlock(&fs_info->durable_block_rsv_mutex);
return 0;
}
......@@ -4668,7 +4774,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct extent_buffer *buf,
u64 parent, int last_ref)
{
struct btrfs_block_rsv *block_rsv;
struct btrfs_block_group_cache *cache = NULL;
int ret;
......@@ -4683,64 +4788,24 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
if (!last_ref)
return;
block_rsv = get_block_rsv(trans, root);
cache = btrfs_lookup_block_group(root->fs_info, buf->start);
if (block_rsv->space_info != cache->space_info)
goto out;
if (btrfs_header_generation(buf) == trans->transid) {
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
ret = check_ref_cleanup(trans, root, buf->start);
if (!ret)
goto pin;
goto out;
}
if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
pin_down_extent(root, cache, buf->start, buf->len, 1);
goto pin;
goto out;
}
WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
btrfs_add_free_space(cache, buf->start, buf->len);
ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0);
if (ret == -EAGAIN) {
/* block group became read-only */
btrfs_update_reserved_bytes(cache, buf->len, 0, 1);
goto out;
}
ret = 1;
spin_lock(&block_rsv->lock);
if (block_rsv->reserved < block_rsv->size) {
block_rsv->reserved += buf->len;
ret = 0;
}
spin_unlock(&block_rsv->lock);
if (ret) {
spin_lock(&cache->space_info->lock);
cache->space_info->bytes_reserved -= buf->len;
cache->space_info->reservation_progress++;
spin_unlock(&cache->space_info->lock);
}
goto out;
}
pin:
if (block_rsv->durable && !cache->ro) {
ret = 0;
spin_lock(&cache->lock);
if (!cache->ro) {
cache->reserved_pinned += buf->len;
ret = 1;
}
spin_unlock(&cache->lock);
if (ret) {
spin_lock(&block_rsv->lock);
block_rsv->freed[trans->transid & 0x1] += buf->len;
spin_unlock(&block_rsv->lock);
}
btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE);
}
out:
/*
......@@ -4883,6 +4948,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
int last_ptr_loop = 0;
int loop = 0;
int index = 0;
int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
bool found_uncached_bg = false;
bool failed_cluster_refill = false;
bool failed_alloc = false;
......@@ -5202,8 +5269,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
search_start - offset);
BUG_ON(offset > search_start);
ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1,
(data & BTRFS_BLOCK_GROUP_DATA));
ret = btrfs_update_reserved_bytes(block_group, num_bytes,
alloc_type);
if (ret == -EAGAIN) {
btrfs_add_free_space(block_group, offset, num_bytes);
goto loop;
......@@ -5325,7 +5392,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
int index = 0;
spin_lock(&info->lock);
printk(KERN_INFO "space_info has %llu free, is %sfull\n",
printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n",
(unsigned long long)info->flags,
(unsigned long long)(info->total_bytes - info->bytes_used -
info->bytes_pinned - info->bytes_reserved -
info->bytes_readonly),
......@@ -5427,7 +5495,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
ret = btrfs_discard_extent(root, start, len, NULL);
btrfs_add_free_space(cache, start, len);
btrfs_update_reserved_bytes(cache, len, 0, 1);
btrfs_update_reserved_bytes(cache, len, RESERVE_FREE);
btrfs_put_block_group(cache);
trace_btrfs_reserved_extent_free(root, start, len);
......@@ -5630,7 +5698,8 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
put_caching_control(caching_ctl);
}
ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1);
ret = btrfs_update_reserved_bytes(block_group, ins->offset,
RESERVE_ALLOC_NO_ACCOUNT);
BUG_ON(ret);
btrfs_put_block_group(block_group);
ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
......@@ -5687,8 +5756,7 @@ use_block_rsv(struct btrfs_trans_handle *trans,
block_rsv = get_block_rsv(trans, root);
if (block_rsv->size == 0) {
ret = reserve_metadata_bytes(trans, root, block_rsv,
blocksize, 0);
ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0);
/*
* If we couldn't reserve metadata bytes try and use some from
* the global reserve.
......@@ -5709,12 +5777,8 @@ use_block_rsv(struct btrfs_trans_handle *trans,
return block_rsv;
if (ret) {
WARN_ON(1);
ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize,
0);
ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0);
if (!ret) {
spin_lock(&block_rsv->lock);
block_rsv->size += blocksize;
spin_unlock(&block_rsv->lock);
return block_rsv;
} else if (ret && block_rsv != global_rsv) {
ret = block_rsv_use_bytes(global_rsv, blocksize);
......@@ -6592,12 +6656,9 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
cache->bytes_super - btrfs_block_group_used(&cache->item);
if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
sinfo->bytes_may_use + sinfo->bytes_readonly +
cache->reserved_pinned + num_bytes + min_allocable_bytes <=
sinfo->total_bytes) {
sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes +
min_allocable_bytes <= sinfo->total_bytes) {
sinfo->bytes_readonly += num_bytes;
sinfo->bytes_reserved += cache->reserved_pinned;
cache->reserved_pinned = 0;
cache->ro = 1;
ret = 0;
}
......@@ -6964,7 +7025,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
struct btrfs_space_info,
list);
if (space_info->bytes_pinned > 0 ||
space_info->bytes_reserved > 0) {
space_info->bytes_reserved > 0 ||
space_info->bytes_may_use > 0) {
WARN_ON(1);
dump_space_info(space_info, 0, 0);
}
......@@ -7007,13 +7069,11 @@ int btrfs_read_block_groups(struct btrfs_root *root)
path->reada = 1;
cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
if (cache_gen != 0 &&
if (btrfs_test_opt(root, SPACE_CACHE) &&
btrfs_super_generation(&root->fs_info->super_copy) != cache_gen)
need_clear = 1;
if (btrfs_test_opt(root, CLEAR_CACHE))
need_clear = 1;
if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen)
printk(KERN_INFO "btrfs: disk space caching is enabled\n");
while (1) {
ret = find_first_block_group(root, path, &key);
......@@ -7268,7 +7328,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&block_group->lock);
}
/* One for our lookup ref */
iput(inode);
btrfs_add_delayed_iput(inode);
}
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
......
......@@ -894,6 +894,194 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
goto again;
}
/**
* convert_extent - convert all bits in a given range from one bit to another
* @tree: the io tree to search
* @start: the start offset in bytes
* @end: the end offset in bytes (inclusive)
* @bits: the bits to set in this range
* @clear_bits: the bits to clear in this range
* @mask: the allocation mask
*
* This will go through and set bits for the given range. If any states exist
* already in this range they are set with the given bit and cleared of the
* clear_bits. This is only meant to be used by things that are mergeable, ie
* converting from say DELALLOC to DIRTY. This is not meant to be used with
* boundary bits like LOCK.
*/
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int bits, int clear_bits, gfp_t mask)
{
struct extent_state *state;
struct extent_state *prealloc = NULL;
struct rb_node *node;
int err = 0;
u64 last_start;
u64 last_end;
again:
if (!prealloc && (mask & __GFP_WAIT)) {
prealloc = alloc_extent_state(mask);
if (!prealloc)
return -ENOMEM;
}
spin_lock(&tree->lock);
/*
* this search will find all the extents that end after
* our range starts.
*/
node = tree_search(tree, start);
if (!node) {
prealloc = alloc_extent_state_atomic(prealloc);
if (!prealloc)
return -ENOMEM;
err = insert_state(tree, prealloc, start, end, &bits);
prealloc = NULL;
BUG_ON(err == -EEXIST);
goto out;
}
state = rb_entry(node, struct extent_state, rb_node);
hit_next:
last_start = state->start;
last_end = state->end;
/*
* | ---- desired range ---- |
* | state |
*
* Just lock what we found and keep going
*/
if (state->start == start && state->end <= end) {
struct rb_node *next_node;
set_state_bits(tree, state, &bits);
clear_state_bit(tree, state, &clear_bits, 0);
merge_state(tree, state);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
next_node = rb_next(&state->rb_node);
if (next_node && start < end && prealloc && !need_resched()) {
state = rb_entry(next_node, struct extent_state,
rb_node);
if (state->start == start)
goto hit_next;
}
goto search_again;
}
/*
* | ---- desired range ---- |
* | state |
* or
* | ------------- state -------------- |
*
* We need to split the extent we found, and may flip bits on
* second half.
*
* If the extent we found extends past our
* range, we just split and search again. It'll get split
* again the next time though.
*
* If the extent we found is inside our range, we set the
* desired bit on it.
*/
if (state->start < start) {
prealloc = alloc_extent_state_atomic(prealloc);
if (!prealloc)
return -ENOMEM;
err = split_state(tree, state, prealloc, start);
BUG_ON(err == -EEXIST);
prealloc = NULL;
if (err)
goto out;
if (state->end <= end) {
set_state_bits(tree, state, &bits);
clear_state_bit(tree, state, &clear_bits, 0);
merge_state(tree, state);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
}
goto search_again;
}
/*
* | ---- desired range ---- |
* | state | or | state |
*
* There's a hole, we need to insert something in it and
* ignore the extent we found.
*/
if (state->start > start) {
u64 this_end;
if (end < last_start)
this_end = end;
else
this_end = last_start - 1;
prealloc = alloc_extent_state_atomic(prealloc);
if (!prealloc)
return -ENOMEM;
/*
* Avoid to free 'prealloc' if it can be merged with
* the later extent.
*/
err = insert_state(tree, prealloc, start, this_end,
&bits);
BUG_ON(err == -EEXIST);
if (err) {
free_extent_state(prealloc);
prealloc = NULL;
goto out;
}
prealloc = NULL;
start = this_end + 1;
goto search_again;
}
/*
* | ---- desired range ---- |
* | state |
* We need to split the extent, and set the bit
* on the first half
*/
if (state->start <= end && state->end > end) {
prealloc = alloc_extent_state_atomic(prealloc);
if (!prealloc)
return -ENOMEM;
err = split_state(tree, state, prealloc, end + 1);
BUG_ON(err == -EEXIST);
set_state_bits(tree, prealloc, &bits);
clear_state_bit(tree, prealloc, &clear_bits, 0);
merge_state(tree, prealloc);
prealloc = NULL;
goto out;
}
goto search_again;
out:
spin_unlock(&tree->lock);
if (prealloc)
free_extent_state(prealloc);
return err;
search_again:
if (start > end)
goto out;
spin_unlock(&tree->lock);
if (mask & __GFP_WAIT)
cond_resched();
goto again;
}
/* wrappers around set/clear extent bit */
int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
......@@ -2136,6 +2324,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
int compressed;
int write_flags;
unsigned long nr_written = 0;
bool fill_delalloc = true;
if (wbc->sync_mode == WB_SYNC_ALL)
write_flags = WRITE_SYNC;
......@@ -2166,10 +2355,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
set_page_extent_mapped(page);
if (!tree->ops || !tree->ops->fill_delalloc)
fill_delalloc = false;
delalloc_start = start;
delalloc_end = 0;
page_started = 0;
if (!epd->extent_locked) {
if (!epd->extent_locked && fill_delalloc) {
u64 delalloc_to_write = 0;
/*
* make sure the wbc mapping index is at least updated
......
......@@ -17,6 +17,7 @@
#define EXTENT_NODATASUM (1 << 10)
#define EXTENT_DO_ACCOUNTING (1 << 11)
#define EXTENT_FIRST_DELALLOC (1 << 12)
#define EXTENT_NEED_WAIT (1 << 13)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
......@@ -214,6 +215,8 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int bits, int clear_bits, gfp_t mask);
int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask);
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
......
......@@ -1069,6 +1069,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
int i;
unsigned long index = pos >> PAGE_CACHE_SHIFT;
struct inode *inode = fdentry(file)->d_inode;
gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
int err = 0;
int faili = 0;
u64 start_pos;
......@@ -1080,7 +1081,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
again:
for (i = 0; i < num_pages; i++) {
pages[i] = find_or_create_page(inode->i_mapping, index + i,
GFP_NOFS);
mask);
if (!pages[i]) {
faili = i - 1;
err = -ENOMEM;
......@@ -1615,10 +1616,6 @@ static long btrfs_fallocate(struct file *file, int mode,
goto out;
}
ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
if (ret)
goto out;
locked_end = alloc_end - 1;
while (1) {
struct btrfs_ordered_extent *ordered;
......@@ -1664,11 +1661,27 @@ static long btrfs_fallocate(struct file *file, int mode,
if (em->block_start == EXTENT_MAP_HOLE ||
(cur_offset >= inode->i_size &&
!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
/*
* Make sure we have enough space before we do the
* allocation.
*/
ret = btrfs_check_data_free_space(inode, last_byte -
cur_offset);
if (ret) {
free_extent_map(em);
break;
}
ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
last_byte - cur_offset,
1 << inode->i_blkbits,
offset + len,
&alloc_hint);
/* Let go of our reservation. */
btrfs_free_reserved_data_space(inode, last_byte -
cur_offset);
if (ret < 0) {
free_extent_map(em);
break;
......@@ -1694,8 +1707,6 @@ static long btrfs_fallocate(struct file *file, int mode,
}
unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
&cached_state, GFP_NOFS);
btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
out:
mutex_unlock(&inode->i_mutex);
return ret;
......
......@@ -20,6 +20,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/math64.h>
#include <linux/ratelimit.h>
#include "ctree.h"
#include "free-space-cache.h"
#include "transaction.h"
......@@ -84,6 +85,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
*block_group, struct btrfs_path *path)
{
struct inode *inode = NULL;
u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
spin_lock(&block_group->lock);
if (block_group->inode)
......@@ -98,13 +100,14 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
return inode;
spin_lock(&block_group->lock);
if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) {
if (!((BTRFS_I(inode)->flags & flags) == flags)) {
printk(KERN_INFO "Old style space inode found, converting.\n");
BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM;
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
BTRFS_INODE_NODATACOW;
block_group->disk_cache_state = BTRFS_DC_CLEAR;
}
if (!btrfs_fs_closing(root->fs_info)) {
if (!block_group->iref) {
block_group->inode = igrab(inode);
block_group->iref = 1;
}
......@@ -122,12 +125,17 @@ int __create_free_space_inode(struct btrfs_root *root,
struct btrfs_free_space_header *header;
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
int ret;
ret = btrfs_insert_empty_inode(trans, root, path, ino);
if (ret)
return ret;
/* We inline crc's for the free disk space cache */
if (ino != BTRFS_FREE_INO_OBJECTID)
flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
leaf = path->nodes[0];
inode_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_item);
......@@ -140,8 +148,7 @@ int __create_free_space_inode(struct btrfs_root *root,
btrfs_set_inode_uid(leaf, inode_item, 0);
btrfs_set_inode_gid(leaf, inode_item, 0);
btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
BTRFS_INODE_PREALLOC);
btrfs_set_inode_flags(leaf, inode_item, flags);
btrfs_set_inode_nlink(leaf, inode_item, 1);
btrfs_set_inode_transid(leaf, inode_item, trans->transid);
btrfs_set_inode_block_group(leaf, inode_item, offset);
......@@ -196,9 +203,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
rsv = trans->block_rsv;
trans->block_rsv = root->orphan_block_rsv;
ret = btrfs_block_rsv_check(trans, root,
root->orphan_block_rsv,
0, 5);
ret = btrfs_block_rsv_check(root, root->orphan_block_rsv, 5);
if (ret)
return ret;
......@@ -242,26 +247,342 @@ static int readahead_cache(struct inode *inode)
return 0;
}
struct io_ctl {
void *cur, *orig;
struct page *page;
struct page **pages;
struct btrfs_root *root;
unsigned long size;
int index;
int num_pages;
unsigned check_crcs:1;
};
static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
struct btrfs_root *root)
{
memset(io_ctl, 0, sizeof(struct io_ctl));
io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT;
io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages,
GFP_NOFS);
if (!io_ctl->pages)
return -ENOMEM;
io_ctl->root = root;
if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
io_ctl->check_crcs = 1;
return 0;
}
static void io_ctl_free(struct io_ctl *io_ctl)
{
kfree(io_ctl->pages);
}
static void io_ctl_unmap_page(struct io_ctl *io_ctl)
{
if (io_ctl->cur) {
kunmap(io_ctl->page);
io_ctl->cur = NULL;
io_ctl->orig = NULL;
}
}
static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
{
WARN_ON(io_ctl->cur);
BUG_ON(io_ctl->index >= io_ctl->num_pages);
io_ctl->page = io_ctl->pages[io_ctl->index++];
io_ctl->cur = kmap(io_ctl->page);
io_ctl->orig = io_ctl->cur;
io_ctl->size = PAGE_CACHE_SIZE;
if (clear)
memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
}
static void io_ctl_drop_pages(struct io_ctl *io_ctl)
{
int i;
io_ctl_unmap_page(io_ctl);
for (i = 0; i < io_ctl->num_pages; i++) {
ClearPageChecked(io_ctl->pages[i]);
unlock_page(io_ctl->pages[i]);
page_cache_release(io_ctl->pages[i]);
}
}
static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
int uptodate)
{
struct page *page;
gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
int i;
for (i = 0; i < io_ctl->num_pages; i++) {
page = find_or_create_page(inode->i_mapping, i, mask);
if (!page) {
io_ctl_drop_pages(io_ctl);
return -ENOMEM;
}
io_ctl->pages[i] = page;
if (uptodate && !PageUptodate(page)) {
btrfs_readpage(NULL, page);
lock_page(page);
if (!PageUptodate(page)) {
printk(KERN_ERR "btrfs: error reading free "
"space cache\n");
io_ctl_drop_pages(io_ctl);
return -EIO;
}
}
}
return 0;
}
static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
{
u64 *val;
io_ctl_map_page(io_ctl, 1);
/*
* Skip the csum areas. If we don't check crcs then we just have a
* 64bit chunk at the front of the first page.
*/
if (io_ctl->check_crcs) {
io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
} else {
io_ctl->cur += sizeof(u64);
io_ctl->size -= sizeof(u64) * 2;
}
val = io_ctl->cur;
*val = cpu_to_le64(generation);
io_ctl->cur += sizeof(u64);
}
static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
{
u64 *gen;
/*
* Skip the crc area. If we don't check crcs then we just have a 64bit
* chunk at the front of the first page.
*/
if (io_ctl->check_crcs) {
io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
io_ctl->size -= sizeof(u64) +
(sizeof(u32) * io_ctl->num_pages);
} else {
io_ctl->cur += sizeof(u64);
io_ctl->size -= sizeof(u64) * 2;
}
gen = io_ctl->cur;
if (le64_to_cpu(*gen) != generation) {
printk_ratelimited(KERN_ERR "btrfs: space cache generation "
"(%Lu) does not match inode (%Lu)\n", *gen,
generation);
io_ctl_unmap_page(io_ctl);
return -EIO;
}
io_ctl->cur += sizeof(u64);
return 0;
}
static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
{
u32 *tmp;
u32 crc = ~(u32)0;
unsigned offset = 0;
if (!io_ctl->check_crcs) {
io_ctl_unmap_page(io_ctl);
return;
}
if (index == 0)
offset = sizeof(u32) * io_ctl->num_pages;;
crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
PAGE_CACHE_SIZE - offset);
btrfs_csum_final(crc, (char *)&crc);
io_ctl_unmap_page(io_ctl);
tmp = kmap(io_ctl->pages[0]);
tmp += index;
*tmp = crc;
kunmap(io_ctl->pages[0]);
}
static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
{
u32 *tmp, val;
u32 crc = ~(u32)0;
unsigned offset = 0;
if (!io_ctl->check_crcs) {
io_ctl_map_page(io_ctl, 0);
return 0;
}
if (index == 0)
offset = sizeof(u32) * io_ctl->num_pages;
tmp = kmap(io_ctl->pages[0]);
tmp += index;
val = *tmp;
kunmap(io_ctl->pages[0]);
io_ctl_map_page(io_ctl, 0);
crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
PAGE_CACHE_SIZE - offset);
btrfs_csum_final(crc, (char *)&crc);
if (val != crc) {
printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free "
"space cache\n");
io_ctl_unmap_page(io_ctl);
return -EIO;
}
return 0;
}
static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
void *bitmap)
{
struct btrfs_free_space_entry *entry;
if (!io_ctl->cur)
return -ENOSPC;
entry = io_ctl->cur;
entry->offset = cpu_to_le64(offset);
entry->bytes = cpu_to_le64(bytes);
entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
BTRFS_FREE_SPACE_EXTENT;
io_ctl->cur += sizeof(struct btrfs_free_space_entry);
io_ctl->size -= sizeof(struct btrfs_free_space_entry);
if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
return 0;
io_ctl_set_crc(io_ctl, io_ctl->index - 1);
/* No more pages to map */
if (io_ctl->index >= io_ctl->num_pages)
return 0;
/* map the next page */
io_ctl_map_page(io_ctl, 1);
return 0;
}
static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
{
if (!io_ctl->cur)
return -ENOSPC;
/*
* If we aren't at the start of the current page, unmap this one and
* map the next one if there is any left.
*/
if (io_ctl->cur != io_ctl->orig) {
io_ctl_set_crc(io_ctl, io_ctl->index - 1);
if (io_ctl->index >= io_ctl->num_pages)
return -ENOSPC;
io_ctl_map_page(io_ctl, 0);
}
memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE);
io_ctl_set_crc(io_ctl, io_ctl->index - 1);
if (io_ctl->index < io_ctl->num_pages)
io_ctl_map_page(io_ctl, 0);
return 0;
}
static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
{
/*
* If we're not on the boundary we know we've modified the page and we
* need to crc the page.
*/
if (io_ctl->cur != io_ctl->orig)
io_ctl_set_crc(io_ctl, io_ctl->index - 1);
else
io_ctl_unmap_page(io_ctl);
while (io_ctl->index < io_ctl->num_pages) {
io_ctl_map_page(io_ctl, 1);
io_ctl_set_crc(io_ctl, io_ctl->index - 1);
}
}
static int io_ctl_read_entry(struct io_ctl *io_ctl,
struct btrfs_free_space *entry, u8 *type)
{
struct btrfs_free_space_entry *e;
e = io_ctl->cur;
entry->offset = le64_to_cpu(e->offset);
entry->bytes = le64_to_cpu(e->bytes);
*type = e->type;
io_ctl->cur += sizeof(struct btrfs_free_space_entry);
io_ctl->size -= sizeof(struct btrfs_free_space_entry);
if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
return 0;
io_ctl_unmap_page(io_ctl);
if (io_ctl->index >= io_ctl->num_pages)
return 0;
return io_ctl_check_crc(io_ctl, io_ctl->index);
}
static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
struct btrfs_free_space *entry)
{
int ret;
if (io_ctl->cur && io_ctl->cur != io_ctl->orig)
io_ctl_unmap_page(io_ctl);
ret = io_ctl_check_crc(io_ctl, io_ctl->index);
if (ret)
return ret;
memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE);
io_ctl_unmap_page(io_ctl);
return 0;
}
int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_free_space_ctl *ctl,
struct btrfs_path *path, u64 offset)
{
struct btrfs_free_space_header *header;
struct extent_buffer *leaf;
struct page *page;
struct io_ctl io_ctl;
struct btrfs_key key;
struct btrfs_free_space *e, *n;
struct list_head bitmaps;
u64 num_entries;
u64 num_bitmaps;
u64 generation;
pgoff_t index = 0;
u8 type;
int ret = 0;
INIT_LIST_HEAD(&bitmaps);
/* Nothing in the space cache, goodbye */
if (!i_size_read(inode))
goto out;
return 0;
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
key.offset = offset;
......@@ -269,11 +590,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
return 0;
else if (ret > 0) {
btrfs_release_path(path);
ret = 0;
goto out;
return 0;
}
ret = -1;
......@@ -291,169 +611,100 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
" not match free space cache generation (%llu)\n",
(unsigned long long)BTRFS_I(inode)->generation,
(unsigned long long)generation);
goto out;
return 0;
}
if (!num_entries)
goto out;
return 0;
io_ctl_init(&io_ctl, inode, root);
ret = readahead_cache(inode);
if (ret)
goto out;
while (1) {
struct btrfs_free_space_entry *entry;
struct btrfs_free_space *e;
void *addr;
unsigned long offset = 0;
int need_loop = 0;
ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
if (ret)
goto out;
if (!num_entries && !num_bitmaps)
break;
ret = io_ctl_check_crc(&io_ctl, 0);
if (ret)
goto free_cache;
page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
if (!page)
ret = io_ctl_check_generation(&io_ctl, generation);
if (ret)
goto free_cache;
while (num_entries) {
e = kmem_cache_zalloc(btrfs_free_space_cachep,
GFP_NOFS);
if (!e)
goto free_cache;
if (!PageUptodate(page)) {
btrfs_readpage(NULL, page);
lock_page(page);
if (!PageUptodate(page)) {
unlock_page(page);
page_cache_release(page);
printk(KERN_ERR "btrfs: error reading free "
"space cache\n");
goto free_cache;
}
ret = io_ctl_read_entry(&io_ctl, e, &type);
if (ret) {
kmem_cache_free(btrfs_free_space_cachep, e);
goto free_cache;
}
addr = kmap(page);
if (index == 0) {
u64 *gen;
if (!e->bytes) {
kmem_cache_free(btrfs_free_space_cachep, e);
goto free_cache;
}
/*
* We put a bogus crc in the front of the first page in
* case old kernels try to mount a fs with the new
* format to make sure they discard the cache.
*/
addr += sizeof(u64);
offset += sizeof(u64);
gen = addr;
if (*gen != BTRFS_I(inode)->generation) {
printk(KERN_ERR "btrfs: space cache generation"
" (%llu) does not match inode (%llu)\n",
(unsigned long long)*gen,
(unsigned long long)
BTRFS_I(inode)->generation);
kunmap(page);
unlock_page(page);
page_cache_release(page);
if (type == BTRFS_FREE_SPACE_EXTENT) {
spin_lock(&ctl->tree_lock);
ret = link_free_space(ctl, e);
spin_unlock(&ctl->tree_lock);
if (ret) {
printk(KERN_ERR "Duplicate entries in "
"free space cache, dumping\n");
kmem_cache_free(btrfs_free_space_cachep, e);
goto free_cache;
}
addr += sizeof(u64);
offset += sizeof(u64);
}
entry = addr;
while (1) {
if (!num_entries)
break;
need_loop = 1;
e = kmem_cache_zalloc(btrfs_free_space_cachep,
GFP_NOFS);
if (!e) {
kunmap(page);
unlock_page(page);
page_cache_release(page);
} else {
BUG_ON(!num_bitmaps);
num_bitmaps--;
e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
if (!e->bitmap) {
kmem_cache_free(
btrfs_free_space_cachep, e);
goto free_cache;
}
e->offset = le64_to_cpu(entry->offset);
e->bytes = le64_to_cpu(entry->bytes);
if (!e->bytes) {
kunmap(page);
spin_lock(&ctl->tree_lock);
ret = link_free_space(ctl, e);
ctl->total_bitmaps++;
ctl->op->recalc_thresholds(ctl);
spin_unlock(&ctl->tree_lock);
if (ret) {
printk(KERN_ERR "Duplicate entries in "
"free space cache, dumping\n");
kmem_cache_free(btrfs_free_space_cachep, e);
unlock_page(page);
page_cache_release(page);
goto free_cache;
}
if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
spin_lock(&ctl->tree_lock);
ret = link_free_space(ctl, e);
spin_unlock(&ctl->tree_lock);
if (ret) {
printk(KERN_ERR "Duplicate entries in "
"free space cache, dumping\n");
kunmap(page);
unlock_page(page);
page_cache_release(page);
goto free_cache;
}
} else {
e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
if (!e->bitmap) {
kunmap(page);
kmem_cache_free(
btrfs_free_space_cachep, e);
unlock_page(page);
page_cache_release(page);
goto free_cache;
}
spin_lock(&ctl->tree_lock);
ret = link_free_space(ctl, e);
ctl->total_bitmaps++;
ctl->op->recalc_thresholds(ctl);
spin_unlock(&ctl->tree_lock);
if (ret) {
printk(KERN_ERR "Duplicate entries in "
"free space cache, dumping\n");
kunmap(page);
unlock_page(page);
page_cache_release(page);
goto free_cache;
}
list_add_tail(&e->list, &bitmaps);
}
num_entries--;
offset += sizeof(struct btrfs_free_space_entry);
if (offset + sizeof(struct btrfs_free_space_entry) >=
PAGE_CACHE_SIZE)
break;
entry++;
list_add_tail(&e->list, &bitmaps);
}
/*
* We read an entry out of this page, we need to move on to the
* next page.
*/
if (need_loop) {
kunmap(page);
goto next;
}
num_entries--;
}
/*
* We add the bitmaps at the end of the entries in order that
* the bitmap entries are added to the cache.
*/
e = list_entry(bitmaps.next, struct btrfs_free_space, list);
/*
* We add the bitmaps at the end of the entries in order that
* the bitmap entries are added to the cache.
*/
list_for_each_entry_safe(e, n, &bitmaps, list) {
list_del_init(&e->list);
memcpy(e->bitmap, addr, PAGE_CACHE_SIZE);
kunmap(page);
num_bitmaps--;
next:
unlock_page(page);
page_cache_release(page);
index++;
ret = io_ctl_read_bitmap(&io_ctl, e);
if (ret)
goto free_cache;
}
io_ctl_drop_pages(&io_ctl);
ret = 1;
out:
io_ctl_free(&io_ctl);
return ret;
free_cache:
io_ctl_drop_pages(&io_ctl);
__btrfs_remove_free_space_cache(ctl);
goto out;
}
......@@ -465,7 +716,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
struct btrfs_root *root = fs_info->tree_root;
struct inode *inode;
struct btrfs_path *path;
int ret;
int ret = 0;
bool matched;
u64 used = btrfs_block_group_used(&block_group->item);
......@@ -497,6 +748,14 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
return 0;
}
/* We may have converted the inode and made the cache invalid. */
spin_lock(&block_group->lock);
if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
spin_unlock(&block_group->lock);
goto out;
}
spin_unlock(&block_group->lock);
ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
path, block_group->key.objectid);
btrfs_free_path(path);
......@@ -530,6 +789,19 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
return ret;
}
/**
* __btrfs_write_out_cache - write out cached info to an inode
* @root - the root the inode belongs to
* @ctl - the free space cache we are going to write out
* @block_group - the block_group for this cache if it belongs to a block_group
* @trans - the trans handle
* @path - the path to use
* @offset - the offset for the key we'll insert
*
* This function writes out a free space cache struct to disk for quick recovery
* on mount. This will return 0 if it was successfull in writing the cache out,
* and -1 if it was not.
*/
int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_free_space_ctl *ctl,
struct btrfs_block_group_cache *block_group,
......@@ -540,42 +812,24 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
struct extent_buffer *leaf;
struct rb_node *node;
struct list_head *pos, *n;
struct page **pages;
struct page *page;
struct extent_state *cached_state = NULL;
struct btrfs_free_cluster *cluster = NULL;
struct extent_io_tree *unpin = NULL;
struct io_ctl io_ctl;
struct list_head bitmap_list;
struct btrfs_key key;
u64 start, end, len;
u64 bytes = 0;
u32 crc = ~(u32)0;
int index = 0, num_pages = 0;
int entries = 0;
int bitmaps = 0;
int ret = -1;
bool next_page = false;
bool out_of_space = false;
int ret;
int err = -1;
INIT_LIST_HEAD(&bitmap_list);
node = rb_first(&ctl->free_space_offset);
if (!node)
return 0;
if (!i_size_read(inode))
return -1;
num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT;
filemap_write_and_wait(inode->i_mapping);
btrfs_wait_ordered_range(inode, inode->i_size &
~(root->sectorsize - 1), (u64)-1);
pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
if (!pages)
return -1;
io_ctl_init(&io_ctl, inode, root);
/* Get the cluster for this block_group if it exists */
if (block_group && !list_empty(&block_group->cluster_list))
......@@ -589,30 +843,9 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
*/
unpin = root->fs_info->pinned_extents;
/*
* Lock all pages first so we can lock the extent safely.
*
* NOTE: Because we hold the ref the entire time we're going to write to
* the page find_get_page should never fail, so we don't do a check
* after find_get_page at this point. Just putting this here so people
* know and don't freak out.
*/
while (index < num_pages) {
page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
if (!page) {
int i;
for (i = 0; i < num_pages; i++) {
unlock_page(pages[i]);
page_cache_release(pages[i]);
}
goto out;
}
pages[index] = page;
index++;
}
/* Lock all pages first so we can lock the extent safely. */
io_ctl_prepare_pages(&io_ctl, inode, 0);
index = 0;
lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
0, &cached_state, GFP_NOFS);
......@@ -623,189 +856,111 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
if (block_group)
start = block_group->key.objectid;
/* Write out the extent entries */
do {
struct btrfs_free_space_entry *entry;
void *addr, *orig;
unsigned long offset = 0;
node = rb_first(&ctl->free_space_offset);
if (!node && cluster) {
node = rb_first(&cluster->root);
cluster = NULL;
}
next_page = false;
/* Make sure we can fit our crcs into the first page */
if (io_ctl.check_crcs &&
(io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) {
WARN_ON(1);
goto out_nospc;
}
if (index >= num_pages) {
out_of_space = true;
break;
}
io_ctl_set_generation(&io_ctl, trans->transid);
page = pages[index];
/* Write out the extent entries */
while (node) {
struct btrfs_free_space *e;
orig = addr = kmap(page);
if (index == 0) {
u64 *gen;
e = rb_entry(node, struct btrfs_free_space, offset_index);
entries++;
/*
* We're going to put in a bogus crc for this page to
* make sure that old kernels who aren't aware of this
* format will be sure to discard the cache.
*/
addr += sizeof(u64);
offset += sizeof(u64);
ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes,
e->bitmap);
if (ret)
goto out_nospc;
gen = addr;
*gen = trans->transid;
addr += sizeof(u64);
offset += sizeof(u64);
if (e->bitmap) {
list_add_tail(&e->list, &bitmap_list);
bitmaps++;
}
entry = addr;
memset(addr, 0, PAGE_CACHE_SIZE - offset);
while (node && !next_page) {
struct btrfs_free_space *e;
e = rb_entry(node, struct btrfs_free_space, offset_index);
entries++;
entry->offset = cpu_to_le64(e->offset);
entry->bytes = cpu_to_le64(e->bytes);
if (e->bitmap) {
entry->type = BTRFS_FREE_SPACE_BITMAP;
list_add_tail(&e->list, &bitmap_list);
bitmaps++;
} else {
entry->type = BTRFS_FREE_SPACE_EXTENT;
}
node = rb_next(node);
if (!node && cluster) {
node = rb_first(&cluster->root);
cluster = NULL;
}
offset += sizeof(struct btrfs_free_space_entry);
if (offset + sizeof(struct btrfs_free_space_entry) >=
PAGE_CACHE_SIZE)
next_page = true;
entry++;
node = rb_next(node);
if (!node && cluster) {
node = rb_first(&cluster->root);
cluster = NULL;
}
}
/*
* We want to add any pinned extents to our free space cache
* so we don't leak the space
*/
while (block_group && !next_page &&
(start < block_group->key.objectid +
block_group->key.offset)) {
ret = find_first_extent_bit(unpin, start, &start, &end,
EXTENT_DIRTY);
if (ret) {
ret = 0;
break;
}
/* This pinned extent is out of our range */
if (start >= block_group->key.objectid +
block_group->key.offset)
break;
len = block_group->key.objectid +
block_group->key.offset - start;
len = min(len, end + 1 - start);
entries++;
entry->offset = cpu_to_le64(start);
entry->bytes = cpu_to_le64(len);
entry->type = BTRFS_FREE_SPACE_EXTENT;
start = end + 1;
offset += sizeof(struct btrfs_free_space_entry);
if (offset + sizeof(struct btrfs_free_space_entry) >=
PAGE_CACHE_SIZE)
next_page = true;
entry++;
/*
* We want to add any pinned extents to our free space cache
* so we don't leak the space
*/
while (block_group && (start < block_group->key.objectid +
block_group->key.offset)) {
ret = find_first_extent_bit(unpin, start, &start, &end,
EXTENT_DIRTY);
if (ret) {
ret = 0;
break;
}
/* Generate bogus crc value */
if (index == 0) {
u32 *tmp;
crc = btrfs_csum_data(root, orig + sizeof(u64), crc,
PAGE_CACHE_SIZE - sizeof(u64));
btrfs_csum_final(crc, (char *)&crc);
crc++;
tmp = orig;
*tmp = crc;
}
/* This pinned extent is out of our range */
if (start >= block_group->key.objectid +
block_group->key.offset)
break;
kunmap(page);
len = block_group->key.objectid +
block_group->key.offset - start;
len = min(len, end + 1 - start);
bytes += PAGE_CACHE_SIZE;
entries++;
ret = io_ctl_add_entry(&io_ctl, start, len, NULL);
if (ret)
goto out_nospc;
index++;
} while (node || next_page);
start = end + 1;
}
/* Write out the bitmaps */
list_for_each_safe(pos, n, &bitmap_list) {
void *addr;
struct btrfs_free_space *entry =
list_entry(pos, struct btrfs_free_space, list);
if (index >= num_pages) {
out_of_space = true;
break;
}
page = pages[index];
addr = kmap(page);
memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
kunmap(page);
bytes += PAGE_CACHE_SIZE;
ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap);
if (ret)
goto out_nospc;
list_del_init(&entry->list);
index++;
}
if (out_of_space) {
btrfs_drop_pages(pages, num_pages);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
i_size_read(inode) - 1, &cached_state,
GFP_NOFS);
ret = 0;
goto out;
}
/* Zero out the rest of the pages just to make sure */
while (index < num_pages) {
void *addr;
io_ctl_zero_remaining_pages(&io_ctl);
page = pages[index];
addr = kmap(page);
memset(addr, 0, PAGE_CACHE_SIZE);
kunmap(page);
bytes += PAGE_CACHE_SIZE;
index++;
}
ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
bytes, &cached_state);
btrfs_drop_pages(pages, num_pages);
ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
0, i_size_read(inode), &cached_state);
io_ctl_drop_pages(&io_ctl);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
i_size_read(inode) - 1, &cached_state, GFP_NOFS);
if (ret) {
ret = 0;
if (ret)
goto out;
}
BTRFS_I(inode)->generation = trans->transid;
filemap_write_and_wait(inode->i_mapping);
ret = filemap_write_and_wait(inode->i_mapping);
if (ret)
goto out;
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
key.offset = offset;
key.type = 0;
ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret < 0) {
ret = -1;
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
GFP_NOFS);
goto out;
}
leaf = path->nodes[0];
......@@ -816,15 +971,16 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
found_key.offset != offset) {
ret = -1;
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING, 0, 0, NULL,
GFP_NOFS);
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
inode->i_size - 1,
EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
NULL, GFP_NOFS);
btrfs_release_path(path);
goto out;
}
}
BTRFS_I(inode)->generation = trans->transid;
header = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_free_space_header);
btrfs_set_free_space_entries(leaf, header, entries);
......@@ -833,16 +989,26 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
ret = 1;
err = 0;
out:
kfree(pages);
if (ret != 1) {
invalidate_inode_pages2_range(inode->i_mapping, 0, index);
io_ctl_free(&io_ctl);
if (err) {
invalidate_inode_pages2(inode->i_mapping);
BTRFS_I(inode)->generation = 0;
}
btrfs_update_inode(trans, root, inode);
return ret;
return err;
out_nospc:
list_for_each_safe(pos, n, &bitmap_list) {
struct btrfs_free_space *entry =
list_entry(pos, struct btrfs_free_space, list);
list_del_init(&entry->list);
}
io_ctl_drop_pages(&io_ctl);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
i_size_read(inode) - 1, &cached_state, GFP_NOFS);
goto out;
}
int btrfs_write_out_cache(struct btrfs_root *root,
......@@ -869,14 +1035,15 @@ int btrfs_write_out_cache(struct btrfs_root *root,
ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
path, block_group->key.objectid);
if (ret < 0) {
if (ret) {
spin_lock(&block_group->lock);
block_group->disk_cache_state = BTRFS_DC_ERROR;
spin_unlock(&block_group->lock);
ret = 0;
#ifdef DEBUG
printk(KERN_ERR "btrfs: failed to write free space cace "
"for block group %llu\n", block_group->key.objectid);
#endif
}
iput(inode);
......@@ -2472,9 +2639,19 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
spin_unlock(&ctl->tree_lock);
if (bytes >= minlen) {
int update_ret;
update_ret = btrfs_update_reserved_bytes(block_group,
bytes, 1, 1);
struct btrfs_space_info *space_info;
int update = 0;
space_info = block_group->space_info;
spin_lock(&space_info->lock);
spin_lock(&block_group->lock);
if (!block_group->ro) {
block_group->reserved += bytes;
space_info->bytes_reserved += bytes;
update = 1;
}
spin_unlock(&block_group->lock);
spin_unlock(&space_info->lock);
ret = btrfs_error_discard_extent(fs_info->extent_root,
start,
......@@ -2482,9 +2659,16 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
&actually_trimmed);
btrfs_add_free_space(block_group, start, bytes);
if (!update_ret)
btrfs_update_reserved_bytes(block_group,
bytes, 0, 1);
if (update) {
spin_lock(&space_info->lock);
spin_lock(&block_group->lock);
if (block_group->ro)
space_info->bytes_readonly += bytes;
block_group->reserved -= bytes;
space_info->bytes_reserved -= bytes;
spin_unlock(&space_info->lock);
spin_unlock(&block_group->lock);
}
if (ret)
break;
......@@ -2643,9 +2827,13 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
return 0;
ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
if (ret < 0)
if (ret) {
btrfs_delalloc_release_metadata(inode, inode->i_size);
#ifdef DEBUG
printk(KERN_ERR "btrfs: failed to write free ino cache "
"for root %llu\n", root->root_key.objectid);
#endif
}
iput(inode);
return ret;
......
......@@ -465,14 +465,16 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
/* Just to make sure we have enough space */
prealloc += 8 * PAGE_CACHE_SIZE;
ret = btrfs_check_data_free_space(inode, prealloc);
ret = btrfs_delalloc_reserve_space(inode, prealloc);
if (ret)
goto out_put;
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
prealloc, prealloc, &alloc_hint);
if (ret)
if (ret) {
btrfs_delalloc_release_space(inode, prealloc);
goto out_put;
}
btrfs_free_reserved_data_space(inode, prealloc);
out_put:
......
......@@ -1792,12 +1792,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
}
ret = 0;
out:
if (nolock) {
if (trans)
btrfs_end_transaction_nolock(trans, root);
} else {
if (root != root->fs_info->tree_root)
btrfs_delalloc_release_metadata(inode, ordered_extent->len);
if (trans)
if (trans) {
if (nolock)
btrfs_end_transaction_nolock(trans, root);
else
btrfs_end_transaction(trans, root);
}
......@@ -2079,89 +2079,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
up_read(&root->fs_info->cleanup_work_sem);
}
/*
* calculate extra metadata reservation when snapshotting a subvolume
* contains orphan files.
*/
void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending,
u64 *bytes_to_reserve)
{
struct btrfs_root *root;
struct btrfs_block_rsv *block_rsv;
u64 num_bytes;
int index;
root = pending->root;
if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
return;
block_rsv = root->orphan_block_rsv;
/* orphan block reservation for the snapshot */
num_bytes = block_rsv->size;
/*
* after the snapshot is created, COWing tree blocks may use more
* space than it frees. So we should make sure there is enough
* reserved space.
*/
index = trans->transid & 0x1;
if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
num_bytes += block_rsv->size -
(block_rsv->reserved + block_rsv->freed[index]);
}
*bytes_to_reserve += num_bytes;
}
void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending)
{
struct btrfs_root *root = pending->root;
struct btrfs_root *snap = pending->snap;
struct btrfs_block_rsv *block_rsv;
u64 num_bytes;
int index;
int ret;
if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
return;
/* refill source subvolume's orphan block reservation */
block_rsv = root->orphan_block_rsv;
index = trans->transid & 0x1;
if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
num_bytes = block_rsv->size -
(block_rsv->reserved + block_rsv->freed[index]);
ret = btrfs_block_rsv_migrate(&pending->block_rsv,
root->orphan_block_rsv,
num_bytes);
BUG_ON(ret);
}
/* setup orphan block reservation for the snapshot */
block_rsv = btrfs_alloc_block_rsv(snap);
BUG_ON(!block_rsv);
btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
snap->orphan_block_rsv = block_rsv;
num_bytes = root->orphan_block_rsv->size;
ret = btrfs_block_rsv_migrate(&pending->block_rsv,
block_rsv, num_bytes);
BUG_ON(ret);
#if 0
/* insert orphan item for the snapshot */
WARN_ON(!root->orphan_item_inserted);
ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
snap->root_key.objectid);
BUG_ON(ret);
snap->orphan_item_inserted = 1;
#endif
}
enum btrfs_orphan_cleanup_state {
ORPHAN_CLEANUP_STARTED = 1,
ORPHAN_CLEANUP_DONE = 2,
......@@ -2247,9 +2164,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
}
spin_unlock(&root->orphan_lock);
if (block_rsv)
btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
/* grab metadata reservation from transaction handle */
if (reserve) {
ret = btrfs_orphan_reserve_metadata(trans, inode);
......@@ -2316,6 +2230,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
struct btrfs_key key, found_key;
struct btrfs_trans_handle *trans;
struct inode *inode;
u64 last_objectid = 0;
int ret = 0, nr_unlink = 0, nr_truncate = 0;
if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
......@@ -2367,41 +2282,49 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
* crossing root thing. we store the inode number in the
* offset of the orphan item.
*/
if (found_key.offset == last_objectid) {
printk(KERN_ERR "btrfs: Error removing orphan entry, "
"stopping orphan cleanup\n");
ret = -EINVAL;
goto out;
}
last_objectid = found_key.offset;
found_key.objectid = found_key.offset;
found_key.type = BTRFS_INODE_ITEM_KEY;
found_key.offset = 0;
inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
ret = PTR_RET(inode);
if (ret && ret != -ESTALE)
goto out;
}
/*
* add this inode to the orphan list so btrfs_orphan_del does
* the proper thing when we hit it
*/
spin_lock(&root->orphan_lock);
list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
spin_unlock(&root->orphan_lock);
/*
* if this is a bad inode, means we actually succeeded in
* removing the inode, but not the orphan record, which means
* we need to manually delete the orphan since iput will just
* do a destroy_inode
* Inode is already gone but the orphan item is still there,
* kill the orphan item.
*/
if (is_bad_inode(inode)) {
trans = btrfs_start_transaction(root, 0);
if (ret == -ESTALE) {
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
btrfs_orphan_del(trans, inode);
ret = btrfs_del_orphan_item(trans, root,
found_key.objectid);
BUG_ON(ret);
btrfs_end_transaction(trans, root);
iput(inode);
continue;
}
/*
* add this inode to the orphan list so btrfs_orphan_del does
* the proper thing when we hit it
*/
spin_lock(&root->orphan_lock);
list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
spin_unlock(&root->orphan_lock);
/* if we have links, this was a truncate, lets do that */
if (inode->i_nlink) {
if (!S_ISREG(inode->i_mode)) {
......@@ -2835,7 +2758,16 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
u64 ino = btrfs_ino(inode);
u64 dir_ino = btrfs_ino(dir);
trans = btrfs_start_transaction(root, 10);
/*
* 1 for the possible orphan item
* 1 for the dir item
* 1 for the dir index
* 1 for the inode ref
* 1 for the inode ref in the tree log
* 2 for the dir entries in the log
* 1 for the inode
*/
trans = btrfs_start_transaction(root, 8);
if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
return trans;
......@@ -2858,7 +2790,8 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
return ERR_PTR(-ENOMEM);
}
trans = btrfs_start_transaction(root, 0);
/* 1 for the orphan item */
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
btrfs_free_path(path);
root->fs_info->enospc_unlink = 0;
......@@ -2963,6 +2896,12 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
err = 0;
out:
btrfs_free_path(path);
/* Migrate the orphan reservation over */
if (!err)
err = btrfs_block_rsv_migrate(trans->block_rsv,
&root->fs_info->global_block_rsv,
btrfs_calc_trans_metadata_size(root, 1));
if (err) {
btrfs_end_transaction(trans, root);
root->fs_info->enospc_unlink = 0;
......@@ -3368,6 +3307,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
pgoff_t index = from >> PAGE_CACHE_SHIFT;
unsigned offset = from & (PAGE_CACHE_SIZE-1);
struct page *page;
gfp_t mask = btrfs_alloc_write_mask(mapping);
int ret = 0;
u64 page_start;
u64 page_end;
......@@ -3380,7 +3320,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
ret = -ENOMEM;
again:
page = find_or_create_page(mapping, index, GFP_NOFS);
page = find_or_create_page(mapping, index, mask);
if (!page) {
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
goto out;
......@@ -3613,6 +3553,8 @@ void btrfs_evict_inode(struct inode *inode)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_rsv *rsv, *global_rsv;
u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
unsigned long nr;
int ret;
......@@ -3640,22 +3582,55 @@ void btrfs_evict_inode(struct inode *inode)
goto no_delete;
}
rsv = btrfs_alloc_block_rsv(root);
if (!rsv) {
btrfs_orphan_del(NULL, inode);
goto no_delete;
}
rsv->size = min_size;
global_rsv = &root->fs_info->global_block_rsv;
btrfs_i_size_write(inode, 0);
/*
* This is a bit simpler than btrfs_truncate since
*
* 1) We've already reserved our space for our orphan item in the
* unlink.
* 2) We're going to delete the inode item, so we don't need to update
* it at all.
*
* So we just need to reserve some slack space in case we add bytes when
* doing the truncate.
*/
while (1) {
trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
trans->block_rsv = root->orphan_block_rsv;
ret = btrfs_block_rsv_refill(root, rsv, min_size);
/*
* Try and steal from the global reserve since we will
* likely not use this space anyway, we want to try as
* hard as possible to get this to work.
*/
if (ret)
ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size);
ret = btrfs_block_rsv_check(trans, root,
root->orphan_block_rsv, 0, 5);
if (ret) {
BUG_ON(ret != -EAGAIN);
ret = btrfs_commit_transaction(trans, root);
BUG_ON(ret);
continue;
printk(KERN_WARNING "Could not get space for a "
"delete, will truncate on mount %d\n", ret);
btrfs_orphan_del(NULL, inode);
btrfs_free_block_rsv(root, rsv);
goto no_delete;
}
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
btrfs_orphan_del(NULL, inode);
btrfs_free_block_rsv(root, rsv);
goto no_delete;
}
trans->block_rsv = rsv;
ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
if (ret != -EAGAIN)
break;
......@@ -3664,14 +3639,17 @@ void btrfs_evict_inode(struct inode *inode)
btrfs_end_transaction(trans, root);
trans = NULL;
btrfs_btree_balance_dirty(root, nr);
}
btrfs_free_block_rsv(root, rsv);
if (ret == 0) {
trans->block_rsv = root->orphan_block_rsv;
ret = btrfs_orphan_del(trans, inode);
BUG_ON(ret);
}
trans->block_rsv = &root->fs_info->trans_block_rsv;
if (!(root == root->fs_info->tree_root ||
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
btrfs_return_ino(root, btrfs_ino(inode));
......@@ -6541,6 +6519,7 @@ static int btrfs_truncate(struct inode *inode)
struct btrfs_trans_handle *trans;
unsigned long nr;
u64 mask = root->sectorsize - 1;
u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
if (ret)
......@@ -6588,19 +6567,23 @@ static int btrfs_truncate(struct inode *inode)
rsv = btrfs_alloc_block_rsv(root);
if (!rsv)
return -ENOMEM;
btrfs_add_durable_block_rsv(root->fs_info, rsv);
rsv->size = min_size;
/*
* 1 for the truncate slack space
* 1 for the orphan item we're going to add
* 1 for the orphan item deletion
* 1 for updating the inode.
*/
trans = btrfs_start_transaction(root, 4);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
goto out;
}
/*
* Reserve space for the truncate process. Truncate should be adding
* space, but if there are snapshots it may end up using space.
*/
ret = btrfs_truncate_reserve_metadata(trans, root, rsv);
/* Migrate the slack space for the truncate to our reserve */
ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
min_size);
BUG_ON(ret);
ret = btrfs_orphan_add(trans, inode);
......@@ -6609,21 +6592,6 @@ static int btrfs_truncate(struct inode *inode)
goto out;
}
nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root, nr);
/*
* Ok so we've already migrated our bytes over for the truncate, so here
* just reserve the one slot we need for updating the inode.
*/
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
goto out;
}
trans->block_rsv = rsv;
/*
* setattr is responsible for setting the ordered_data_close flag,
* but that is only tested during the last file release. That
......@@ -6645,20 +6613,30 @@ static int btrfs_truncate(struct inode *inode)
btrfs_add_ordered_operation(trans, root, inode);
while (1) {
ret = btrfs_block_rsv_refill(root, rsv, min_size);
if (ret) {
/*
* This can only happen with the original transaction we
* started above, every other time we shouldn't have a
* transaction started yet.
*/
if (ret == -EAGAIN)
goto end_trans;
err = ret;
break;
}
if (!trans) {
trans = btrfs_start_transaction(root, 3);
/* Just need the 1 for updating the inode */
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
goto out;
}
ret = btrfs_truncate_reserve_metadata(trans, root,
rsv);
BUG_ON(ret);
trans->block_rsv = rsv;
}
trans->block_rsv = rsv;
ret = btrfs_truncate_inode_items(trans, root, inode,
inode->i_size,
BTRFS_EXTENT_DATA_KEY);
......@@ -6673,7 +6651,7 @@ static int btrfs_truncate(struct inode *inode)
err = ret;
break;
}
end_trans:
nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
trans = NULL;
......@@ -6755,9 +6733,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->last_sub_trans = 0;
ei->logged_trans = 0;
ei->delalloc_bytes = 0;
ei->reserved_bytes = 0;
ei->disk_i_size = 0;
ei->flags = 0;
ei->csum_bytes = 0;
ei->index_cnt = (u64)-1;
ei->last_unlink_trans = 0;
......@@ -6803,6 +6781,8 @@ void btrfs_destroy_inode(struct inode *inode)
WARN_ON(inode->i_data.nrpages);
WARN_ON(BTRFS_I(inode)->outstanding_extents);
WARN_ON(BTRFS_I(inode)->reserved_extents);
WARN_ON(BTRFS_I(inode)->delalloc_bytes);
WARN_ON(BTRFS_I(inode)->csum_bytes);
/*
* This can happen where we create an inode, but somebody else also
......
......@@ -117,7 +117,7 @@ void btrfs_update_iflags(struct inode *inode)
/*
* Inherit flags from the parent inode.
*
* Unlike extN we don't have any flags we don't want to inherit currently.
* Currently only the compression flags and the cow flags are inherited.
*/
void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
{
......@@ -128,12 +128,17 @@ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
flags = BTRFS_I(dir)->flags;
if (S_ISREG(inode->i_mode))
flags &= ~BTRFS_INODE_DIRSYNC;
else if (!S_ISDIR(inode->i_mode))
flags &= (BTRFS_INODE_NODUMP | BTRFS_INODE_NOATIME);
if (flags & BTRFS_INODE_NOCOMPRESS) {
BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
} else if (flags & BTRFS_INODE_COMPRESS) {
BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
}
if (flags & BTRFS_INODE_NODATACOW)
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
BTRFS_I(inode)->flags = flags;
btrfs_update_iflags(inode);
}
......@@ -843,6 +848,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
int i_done;
struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
if (isize == 0)
return 0;
......@@ -860,7 +866,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
for (i = 0; i < num_pages; i++) {
struct page *page;
page = find_or_create_page(inode->i_mapping,
start_index + i, GFP_NOFS);
start_index + i, mask);
if (!page)
break;
......
......@@ -2041,8 +2041,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
BUG_ON(IS_ERR(trans));
trans->block_rsv = rc->block_rsv;
ret = btrfs_block_rsv_check(trans, root, rc->block_rsv,
min_reserved, 0);
ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved);
if (ret) {
BUG_ON(ret != -EAGAIN);
ret = btrfs_commit_transaction(trans, root);
......@@ -2152,8 +2151,7 @@ int prepare_to_merge(struct reloc_control *rc, int err)
again:
if (!err) {
num_bytes = rc->merging_rsv_size;
ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv,
num_bytes);
ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes);
if (ret)
err = ret;
}
......@@ -2427,7 +2425,7 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
num_bytes = calcu_metadata_size(rc, node, 1) * 2;
trans->block_rsv = rc->block_rsv;
ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes);
ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes);
if (ret) {
if (ret == -EAGAIN)
rc->commit_transaction = 1;
......@@ -2922,6 +2920,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
unsigned long last_index;
struct page *page;
struct file_ra_state *ra;
gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
int nr = 0;
int ret = 0;
......@@ -2956,7 +2955,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
ra, NULL, index,
last_index + 1 - index);
page = find_or_create_page(inode->i_mapping, index,
GFP_NOFS);
mask);
if (!page) {
btrfs_delalloc_release_metadata(inode,
PAGE_CACHE_SIZE);
......@@ -3645,14 +3644,11 @@ int prepare_to_relocate(struct reloc_control *rc)
* btrfs_init_reloc_root will use them when there
* is no reservation in transaction handle.
*/
ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv,
ret = btrfs_block_rsv_add(rc->extent_root, rc->block_rsv,
rc->extent_root->nodesize * 256);
if (ret)
return ret;
rc->block_rsv->refill_used = 1;
btrfs_add_durable_block_rsv(rc->extent_root->fs_info, rc->block_rsv);
memset(&rc->cluster, 0, sizeof(rc->cluster));
rc->search_start = rc->block_group->key.objectid;
rc->extents_found = 0;
......@@ -3777,8 +3773,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
}
}
ret = btrfs_block_rsv_check(trans, rc->extent_root,
rc->block_rsv, 0, 5);
ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5);
if (ret < 0) {
if (ret != -EAGAIN) {
err = ret;
......
......@@ -40,6 +40,7 @@
#include <linux/magic.h>
#include <linux/slab.h>
#include <linux/cleancache.h>
#include <linux/mnt_namespace.h>
#include "compat.h"
#include "delayed-inode.h"
#include "ctree.h"
......@@ -58,6 +59,7 @@
#include <trace/events/btrfs.h>
static const struct super_operations btrfs_super_ops;
static struct file_system_type btrfs_fs_type;
static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno,
char nbuf[16])
......@@ -162,7 +164,7 @@ enum {
Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
Opt_enospc_debug, Opt_subvolrootid, Opt_defrag,
Opt_inode_cache, Opt_err,
Opt_inode_cache, Opt_no_space_cache, Opt_err,
};
static match_table_t tokens = {
......@@ -195,6 +197,7 @@ static match_table_t tokens = {
{Opt_subvolrootid, "subvolrootid=%d"},
{Opt_defrag, "autodefrag"},
{Opt_inode_cache, "inode_cache"},
{Opt_no_space_cache, "no_space_cache"},
{Opt_err, NULL},
};
......@@ -206,14 +209,19 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
{
struct btrfs_fs_info *info = root->fs_info;
substring_t args[MAX_OPT_ARGS];
char *p, *num, *orig;
char *p, *num, *orig = NULL;
u64 cache_gen;
int intarg;
int ret = 0;
char *compress_type;
bool compress_force = false;
cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
if (cache_gen)
btrfs_set_opt(info->mount_opt, SPACE_CACHE);
if (!options)
return 0;
goto out;
/*
* strsep changes the string, duplicate it because parse_options
......@@ -360,9 +368,12 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
btrfs_set_opt(info->mount_opt, DISCARD);
break;
case Opt_space_cache:
printk(KERN_INFO "btrfs: enabling disk space caching\n");
btrfs_set_opt(info->mount_opt, SPACE_CACHE);
break;
case Opt_no_space_cache:
printk(KERN_INFO "btrfs: disabling disk space caching\n");
btrfs_clear_opt(info->mount_opt, SPACE_CACHE);
break;
case Opt_inode_cache:
printk(KERN_INFO "btrfs: enabling inode map caching\n");
btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE);
......@@ -391,6 +402,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
}
}
out:
if (!ret && btrfs_test_opt(root, SPACE_CACHE))
printk(KERN_INFO "btrfs: disk space caching is enabled\n");
kfree(orig);
return ret;
}
......@@ -411,7 +424,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
int intarg;
if (!options)
goto out;
return 0;
/*
* strsep changes the string, duplicate it because parse_options
......@@ -460,26 +473,15 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
error = btrfs_scan_one_device(match_strdup(&args[0]),
flags, holder, fs_devices);
if (error)
goto out_free_opts;
goto out;
break;
default:
break;
}
}
out_free_opts:
out:
kfree(orig);
out:
/*
* If no subvolume name is specified we use the default one. Allocate
* a copy of the string "." here so that code later in the
* mount path doesn't care if it's the default volume or another one.
*/
if (!*subvol_name) {
*subvol_name = kstrdup(".", GFP_KERNEL);
if (!*subvol_name)
return -ENOMEM;
}
return error;
}
......@@ -492,7 +494,6 @@ static struct dentry *get_default_root(struct super_block *sb,
struct btrfs_path *path;
struct btrfs_key location;
struct inode *inode;
struct dentry *dentry;
u64 dir_id;
int new = 0;
......@@ -566,29 +567,7 @@ static struct dentry *get_default_root(struct super_block *sb,
return dget(sb->s_root);
}
if (new) {
const struct qstr name = { .name = "/", .len = 1 };
/*
* New inode, we need to make the dentry a sibling of s_root so
* everything gets cleaned up properly on unmount.
*/
dentry = d_alloc(sb->s_root, &name);
if (!dentry) {
iput(inode);
return ERR_PTR(-ENOMEM);
}
d_splice_alias(inode, dentry);
} else {
/*
* We found the inode in cache, just find a dentry for it and
* put the reference to the inode we just got.
*/
dentry = d_find_alias(inode);
iput(inode);
}
return dentry;
return d_obtain_alias(inode);
}
static int btrfs_fill_super(struct super_block *sb,
......@@ -719,6 +698,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, ",noacl");
if (btrfs_test_opt(root, SPACE_CACHE))
seq_puts(seq, ",space_cache");
else
seq_puts(seq, ",no_space_cache");
if (btrfs_test_opt(root, CLEAR_CACHE))
seq_puts(seq, ",clear_cache");
if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
......@@ -753,6 +734,118 @@ static int btrfs_set_super(struct super_block *s, void *data)
return set_anon_super(s, data);
}
/*
* This will strip out the subvol=%s argument for an argument string and add
* subvolid=0 to make sure we get the actual tree root for path walking to the
* subvol we want.
*/
static char *setup_root_args(char *args)
{
unsigned copied = 0;
unsigned len = strlen(args) + 2;
char *pos;
char *ret;
/*
* We need the same args as before, but minus
*
* subvol=a
*
* and add
*
* subvolid=0
*
* which is a difference of 2 characters, so we allocate strlen(args) +
* 2 characters.
*/
ret = kzalloc(len * sizeof(char), GFP_NOFS);
if (!ret)
return NULL;
pos = strstr(args, "subvol=");
/* This shouldn't happen, but just in case.. */
if (!pos) {
kfree(ret);
return NULL;
}
/*
* The subvol=<> arg is not at the front of the string, copy everybody
* up to that into ret.
*/
if (pos != args) {
*pos = '\0';
strcpy(ret, args);
copied += strlen(args);
pos++;
}
strncpy(ret + copied, "subvolid=0", len - copied);
/* Length of subvolid=0 */
copied += 10;
/*
* If there is no , after the subvol= option then we know there's no
* other options and we can just return.
*/
pos = strchr(pos, ',');
if (!pos)
return ret;
/* Copy the rest of the arguments into our buffer */
strncpy(ret + copied, pos, len - copied);
copied += strlen(pos);
return ret;
}
static struct dentry *mount_subvol(const char *subvol_name, int flags,
const char *device_name, char *data)
{
struct super_block *s;
struct dentry *root;
struct vfsmount *mnt;
struct mnt_namespace *ns_private;
char *newargs;
struct path path;
int error;
newargs = setup_root_args(data);
if (!newargs)
return ERR_PTR(-ENOMEM);
mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name,
newargs);
kfree(newargs);
if (IS_ERR(mnt))
return ERR_CAST(mnt);
ns_private = create_mnt_ns(mnt);
if (IS_ERR(ns_private)) {
mntput(mnt);
return ERR_CAST(ns_private);
}
/*
* This will trigger the automount of the subvol so we can just
* drop the mnt we have here and return the dentry that we
* found.
*/
error = vfs_path_lookup(mnt->mnt_root, mnt, subvol_name,
LOOKUP_FOLLOW, &path);
put_mnt_ns(ns_private);
if (error)
return ERR_PTR(error);
/* Get a ref to the sb and the dentry we found and return it */
s = path.mnt->mnt_sb;
atomic_inc(&s->s_active);
root = dget(path.dentry);
path_put(&path);
down_write(&s->s_umount);
return root;
}
/*
* Find a superblock for the given device / mount point.
......@@ -784,13 +877,19 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
if (error)
return ERR_PTR(error);
if (subvol_name) {
root = mount_subvol(subvol_name, flags, device_name, data);
kfree(subvol_name);
return root;
}
error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
if (error)
goto error_free_subvol_name;
return ERR_PTR(error);
error = btrfs_open_devices(fs_devices, mode, fs_type);
if (error)
goto error_free_subvol_name;
return ERR_PTR(error);
if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) {
error = -EACCES;
......@@ -815,14 +914,15 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
bdev = fs_devices->latest_bdev;
s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root);
if (IS_ERR(s))
goto error_s;
if (IS_ERR(s)) {
error = PTR_ERR(s);
goto error_close_devices;
}
if (s->s_root) {
if ((flags ^ s->s_flags) & MS_RDONLY) {
deactivate_locked_super(s);
error = -EBUSY;
goto error_close_devices;
return ERR_PTR(-EBUSY);
}
btrfs_close_devices(fs_devices);
......@@ -837,64 +937,25 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
flags & MS_SILENT ? 1 : 0);
if (error) {
deactivate_locked_super(s);
goto error_free_subvol_name;
return ERR_PTR(error);
}
btrfs_sb(s)->fs_info->bdev_holder = fs_type;
s->s_flags |= MS_ACTIVE;
}
/* if they gave us a subvolume name bind mount into that */
if (strcmp(subvol_name, ".")) {
struct dentry *new_root;
root = get_default_root(s, subvol_rootid);
if (IS_ERR(root)) {
error = PTR_ERR(root);
deactivate_locked_super(s);
goto error_free_subvol_name;
}
mutex_lock(&root->d_inode->i_mutex);
new_root = lookup_one_len(subvol_name, root,
strlen(subvol_name));
mutex_unlock(&root->d_inode->i_mutex);
if (IS_ERR(new_root)) {
dput(root);
deactivate_locked_super(s);
error = PTR_ERR(new_root);
goto error_free_subvol_name;
}
if (!new_root->d_inode) {
dput(root);
dput(new_root);
deactivate_locked_super(s);
error = -ENXIO;
goto error_free_subvol_name;
}
dput(root);
root = new_root;
} else {
root = get_default_root(s, subvol_objectid);
if (IS_ERR(root)) {
error = PTR_ERR(root);
deactivate_locked_super(s);
goto error_free_subvol_name;
}
root = get_default_root(s, subvol_objectid);
if (IS_ERR(root)) {
deactivate_locked_super(s);
return root;
}
kfree(subvol_name);
return root;
error_s:
error = PTR_ERR(s);
error_close_devices:
btrfs_close_devices(fs_devices);
kfree(fs_info);
kfree(tree_root);
error_free_subvol_name:
kfree(subvol_name);
return ERR_PTR(error);
}
......
......@@ -275,7 +275,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
*/
if (num_items > 0 && root != root->fs_info->chunk_root) {
num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
ret = btrfs_block_rsv_add(NULL, root,
ret = btrfs_block_rsv_add(root,
&root->fs_info->trans_block_rsv,
num_bytes);
if (ret)
......@@ -418,8 +418,8 @@ static int should_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
int ret;
ret = btrfs_block_rsv_check(trans, root,
&root->fs_info->global_block_rsv, 0, 5);
ret = btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5);
return ret ? 1 : 0;
}
......@@ -427,17 +427,26 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_transaction *cur_trans = trans->transaction;
struct btrfs_block_rsv *rsv = trans->block_rsv;
int updates;
smp_mb();
if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
return 1;
/*
* We need to do this in case we're deleting csums so the global block
* rsv get's used instead of the csum block rsv.
*/
trans->block_rsv = NULL;
updates = trans->delayed_ref_updates;
trans->delayed_ref_updates = 0;
if (updates)
btrfs_run_delayed_refs(trans, root, updates);
trans->block_rsv = rsv;
return should_end_transaction(trans, root);
}
......@@ -453,6 +462,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
return 0;
}
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
while (count < 4) {
unsigned long cur = trans->delayed_ref_updates;
trans->delayed_ref_updates = 0;
......@@ -473,8 +484,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
count++;
}
btrfs_trans_release_metadata(trans, root);
if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
should_end_transaction(trans, root)) {
trans->transaction->blocked = 1;
......@@ -562,50 +571,21 @@ int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
int btrfs_write_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages, int mark)
{
int ret;
int err = 0;
int werr = 0;
struct page *page;
struct inode *btree_inode = root->fs_info->btree_inode;
struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
u64 start = 0;
u64 end;
unsigned long index;
while (1) {
ret = find_first_extent_bit(dirty_pages, start, &start, &end,
mark);
if (ret)
break;
while (start <= end) {
cond_resched();
index = start >> PAGE_CACHE_SHIFT;
start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
page = find_get_page(btree_inode->i_mapping, index);
if (!page)
continue;
btree_lock_page_hook(page);
if (!page->mapping) {
unlock_page(page);
page_cache_release(page);
continue;
}
if (PageWriteback(page)) {
if (PageDirty(page))
wait_on_page_writeback(page);
else {
unlock_page(page);
page_cache_release(page);
continue;
}
}
err = write_one_page(page, 0);
if (err)
werr = err;
page_cache_release(page);
}
while (!find_first_extent_bit(dirty_pages, start, &start, &end,
mark)) {
convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, mark,
GFP_NOFS);
err = filemap_fdatawrite_range(mapping, start, end);
if (err)
werr = err;
cond_resched();
start = end + 1;
}
if (err)
werr = err;
......@@ -621,39 +601,20 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
int btrfs_wait_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages, int mark)
{
int ret;
int err = 0;
int werr = 0;
struct page *page;
struct inode *btree_inode = root->fs_info->btree_inode;
struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
u64 start = 0;
u64 end;
unsigned long index;
while (1) {
ret = find_first_extent_bit(dirty_pages, start, &start, &end,
mark);
if (ret)
break;
clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
while (start <= end) {
index = start >> PAGE_CACHE_SHIFT;
start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
page = find_get_page(btree_inode->i_mapping, index);
if (!page)
continue;
if (PageDirty(page)) {
btree_lock_page_hook(page);
wait_on_page_writeback(page);
err = write_one_page(page, 0);
if (err)
werr = err;
}
wait_on_page_writeback(page);
page_cache_release(page);
cond_resched();
}
while (!find_first_extent_bit(dirty_pages, start, &start, &end,
EXTENT_NEED_WAIT)) {
clear_extent_bits(dirty_pages, start, end, EXTENT_NEED_WAIT, GFP_NOFS);
err = filemap_fdatawait_range(mapping, start, end);
if (err)
werr = err;
cond_resched();
start = end + 1;
}
if (err)
werr = err;
......@@ -911,10 +872,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
}
btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
btrfs_orphan_pre_snapshot(trans, pending, &to_reserve);
if (to_reserve > 0) {
ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv,
ret = btrfs_block_rsv_add(root, &pending->block_rsv,
to_reserve);
if (ret) {
pending->error = ret;
......@@ -1002,7 +962,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
BUG_ON(IS_ERR(pending->snap));
btrfs_reloc_post_snapshot(trans, pending);
btrfs_orphan_post_snapshot(trans, pending);
fail:
kfree(new_root_item);
trans->block_rsv = rsv;
......@@ -1043,7 +1002,7 @@ static void update_super_roots(struct btrfs_root *root)
super->root = root_item->bytenr;
super->generation = root_item->generation;
super->root_level = root_item->level;
if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE))
if (btrfs_test_opt(root, SPACE_CACHE))
super->cache_generation = root_item->generation;
}
......@@ -1168,14 +1127,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
btrfs_run_ordered_operations(root, 0);
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
/* make a pass through all the delayed refs we have so far
* any runnings procs may add more while we are here
*/
ret = btrfs_run_delayed_refs(trans, root, 0);
BUG_ON(ret);
btrfs_trans_release_metadata(trans, root);
cur_trans = trans->transaction;
/*
* set the flushing flag so procs in this transaction have to
......
......@@ -1013,8 +1013,13 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
}
BUG_ON(ret);
if (device->bytes_used > 0)
device->bytes_used -= btrfs_dev_extent_length(leaf, extent);
if (device->bytes_used > 0) {
u64 len = btrfs_dev_extent_length(leaf, extent);
device->bytes_used -= len;
spin_lock(&root->fs_info->free_chunk_lock);
root->fs_info->free_chunk_space += len;
spin_unlock(&root->fs_info->free_chunk_lock);
}
ret = btrfs_del_item(trans, root, path);
out:
......@@ -1356,6 +1361,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
if (ret)
goto error_undo;
spin_lock(&root->fs_info->free_chunk_lock);
root->fs_info->free_chunk_space = device->total_bytes -
device->bytes_used;
spin_unlock(&root->fs_info->free_chunk_lock);
device->in_fs_metadata = 0;
btrfs_scrub_cancel_dev(root, device);
......@@ -1691,6 +1701,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
root->fs_info->fs_devices->num_can_discard++;
root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
spin_lock(&root->fs_info->free_chunk_lock);
root->fs_info->free_chunk_space += device->total_bytes;
spin_unlock(&root->fs_info->free_chunk_lock);
if (!blk_queue_nonrot(bdev_get_queue(bdev)))
root->fs_info->fs_devices->rotating = 1;
......@@ -2192,8 +2206,12 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
lock_chunks(root);
device->total_bytes = new_size;
if (device->writeable)
if (device->writeable) {
device->fs_devices->total_rw_bytes -= diff;
spin_lock(&root->fs_info->free_chunk_lock);
root->fs_info->free_chunk_space -= diff;
spin_unlock(&root->fs_info->free_chunk_lock);
}
unlock_chunks(root);
again:
......@@ -2257,6 +2275,9 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
device->total_bytes = old_size;
if (device->writeable)
device->fs_devices->total_rw_bytes += diff;
spin_lock(&root->fs_info->free_chunk_lock);
root->fs_info->free_chunk_space += diff;
spin_unlock(&root->fs_info->free_chunk_lock);
unlock_chunks(root);
goto done;
}
......@@ -2615,6 +2636,11 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
index++;
}
spin_lock(&extent_root->fs_info->free_chunk_lock);
extent_root->fs_info->free_chunk_space -= (stripe_size *
map->num_stripes);
spin_unlock(&extent_root->fs_info->free_chunk_lock);
index = 0;
stripe = &chunk->stripe;
while (index < map->num_stripes) {
......@@ -3616,8 +3642,13 @@ static int read_one_dev(struct btrfs_root *root,
fill_device_from_item(leaf, dev_item, device);
device->dev_root = root->fs_info->dev_root;
device->in_fs_metadata = 1;
if (device->writeable)
if (device->writeable) {
device->fs_devices->total_rw_bytes += device->total_bytes;
spin_lock(&root->fs_info->free_chunk_lock);
root->fs_info->free_chunk_space += device->total_bytes -
device->bytes_used;
spin_unlock(&root->fs_info->free_chunk_lock);
}
ret = 0;
return ret;
}
......
......@@ -127,6 +127,17 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
again:
ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
name, name_len, value, size);
/*
* If we're setting an xattr to a new value but the new value is say
* exactly BTRFS_MAX_XATTR_SIZE, we could end up with EOVERFLOW getting
* back from split_leaf. This is because it thinks we'll be extending
* the existing item size, but we're asking for enough space to add the
* item itself. So if we get EOVERFLOW just set ret to EEXIST and let
* the rest of the function figure it out.
*/
if (ret == -EOVERFLOW)
ret = -EEXIST;
if (ret == -EEXIST) {
if (flags & XATTR_CREATE)
goto out;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
反馈
建议
客服 返回
顶部