提交 ff95acb6 编写于 作者: C Chris Mason

Merge branch 'integration' into for-linus

...@@ -34,6 +34,9 @@ struct btrfs_inode { ...@@ -34,6 +34,9 @@ struct btrfs_inode {
*/ */
struct btrfs_key location; struct btrfs_key location;
/* Lock for counters */
spinlock_t lock;
/* the extent_tree has caches of all the extent mappings to disk */ /* the extent_tree has caches of all the extent mappings to disk */
struct extent_map_tree extent_tree; struct extent_map_tree extent_tree;
...@@ -134,8 +137,8 @@ struct btrfs_inode { ...@@ -134,8 +137,8 @@ struct btrfs_inode {
* items we think we'll end up using, and reserved_extents is the number * items we think we'll end up using, and reserved_extents is the number
* of extent items we've reserved metadata for. * of extent items we've reserved metadata for.
*/ */
atomic_t outstanding_extents; unsigned outstanding_extents;
atomic_t reserved_extents; unsigned reserved_extents;
/* /*
* ordered_data_close is set by truncate when a file that used * ordered_data_close is set by truncate when a file that used
...@@ -184,4 +187,13 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size) ...@@ -184,4 +187,13 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size)
BTRFS_I(inode)->disk_i_size = size; BTRFS_I(inode)->disk_i_size = size;
} }
static inline bool btrfs_is_free_space_inode(struct btrfs_root *root,
struct inode *inode)
{
if (root == root->fs_info->tree_root ||
BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
return true;
return false;
}
#endif #endif
此差异已折叠。
...@@ -755,6 +755,8 @@ struct btrfs_space_info { ...@@ -755,6 +755,8 @@ struct btrfs_space_info {
chunks for this space */ chunks for this space */
unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
unsigned int flush:1; /* set if we are trying to make space */
unsigned int force_alloc; /* set if we need to force a chunk unsigned int force_alloc; /* set if we need to force a chunk
alloc for this space */ alloc for this space */
...@@ -764,7 +766,7 @@ struct btrfs_space_info { ...@@ -764,7 +766,7 @@ struct btrfs_space_info {
struct list_head block_groups[BTRFS_NR_RAID_TYPES]; struct list_head block_groups[BTRFS_NR_RAID_TYPES];
spinlock_t lock; spinlock_t lock;
struct rw_semaphore groups_sem; struct rw_semaphore groups_sem;
atomic_t caching_threads; wait_queue_head_t wait;
}; };
struct btrfs_block_rsv { struct btrfs_block_rsv {
...@@ -824,6 +826,7 @@ struct btrfs_caching_control { ...@@ -824,6 +826,7 @@ struct btrfs_caching_control {
struct list_head list; struct list_head list;
struct mutex mutex; struct mutex mutex;
wait_queue_head_t wait; wait_queue_head_t wait;
struct btrfs_work work;
struct btrfs_block_group_cache *block_group; struct btrfs_block_group_cache *block_group;
u64 progress; u64 progress;
atomic_t count; atomic_t count;
...@@ -1032,6 +1035,8 @@ struct btrfs_fs_info { ...@@ -1032,6 +1035,8 @@ struct btrfs_fs_info {
struct btrfs_workers endio_write_workers; struct btrfs_workers endio_write_workers;
struct btrfs_workers endio_freespace_worker; struct btrfs_workers endio_freespace_worker;
struct btrfs_workers submit_workers; struct btrfs_workers submit_workers;
struct btrfs_workers caching_workers;
/* /*
* fixup workers take dirty pages that didn't properly go through * fixup workers take dirty pages that didn't properly go through
* the cow mechanism and make them safe to write. It happens * the cow mechanism and make them safe to write. It happens
...@@ -2128,7 +2133,7 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) ...@@ -2128,7 +2133,7 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
/* extent-tree.c */ /* extent-tree.c */
static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
int num_items) unsigned num_items)
{ {
return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
3 * num_items; 3 * num_items;
...@@ -2222,9 +2227,6 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); ...@@ -2222,9 +2227,6 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info); void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
int btrfs_check_data_free_space(struct inode *inode, u64 bytes); int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int num_items);
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root); struct btrfs_root *root);
int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
...@@ -2330,7 +2332,7 @@ struct btrfs_path *btrfs_alloc_path(void); ...@@ -2330,7 +2332,7 @@ struct btrfs_path *btrfs_alloc_path(void);
void btrfs_free_path(struct btrfs_path *p); void btrfs_free_path(struct btrfs_path *p);
void btrfs_set_path_blocking(struct btrfs_path *p); void btrfs_set_path_blocking(struct btrfs_path *p);
void btrfs_clear_path_blocking(struct btrfs_path *p, void btrfs_clear_path_blocking(struct btrfs_path *p,
struct extent_buffer *held); struct extent_buffer *held, int held_rw);
void btrfs_unlock_up_safe(struct btrfs_path *p, int level); void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
......
...@@ -735,7 +735,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans, ...@@ -735,7 +735,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
} }
/* reset all the locked nodes in the patch to spinning locks. */ /* reset all the locked nodes in the patch to spinning locks. */
btrfs_clear_path_blocking(path, NULL); btrfs_clear_path_blocking(path, NULL, 0);
/* insert the keys of the items */ /* insert the keys of the items */
ret = setup_items_for_insert(trans, root, path, keys, data_size, ret = setup_items_for_insert(trans, root, path, keys, data_size,
......
...@@ -89,13 +89,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, ...@@ -89,13 +89,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
data_size = sizeof(*dir_item) + name_len + data_len; data_size = sizeof(*dir_item) + name_len + data_len;
dir_item = insert_with_overflow(trans, root, path, &key, data_size, dir_item = insert_with_overflow(trans, root, path, &key, data_size,
name, name_len); name, name_len);
/* if (IS_ERR(dir_item))
* FIXME: at some point we should handle xattr's that are larger than return PTR_ERR(dir_item);
* what we can fit in our leaf. We set location to NULL b/c we arent
* pointing at anything else, that will change if we store the xattr
* data in a separate inode.
*/
BUG_ON(IS_ERR(dir_item));
memset(&location, 0, sizeof(location)); memset(&location, 0, sizeof(location));
leaf = path->nodes[0]; leaf = path->nodes[0];
......
...@@ -100,38 +100,83 @@ struct async_submit_bio { ...@@ -100,38 +100,83 @@ struct async_submit_bio {
struct btrfs_work work; struct btrfs_work work;
}; };
/* These are used to set the lockdep class on the extent buffer locks. /*
* The class is set by the readpage_end_io_hook after the buffer has * Lockdep class keys for extent_buffer->lock's in this root. For a given
* passed csum validation but before the pages are unlocked. * eb, the lockdep key is determined by the btrfs_root it belongs to and
* the level the eb occupies in the tree.
*
* Different roots are used for different purposes and may nest inside each
* other and they require separate keysets. As lockdep keys should be
* static, assign keysets according to the purpose of the root as indicated
* by btrfs_root->objectid. This ensures that all special purpose roots
* have separate keysets.
* *
* The lockdep class is also set by btrfs_init_new_buffer on freshly * Lock-nesting across peer nodes is always done with the immediate parent
* allocated blocks. * node locked thus preventing deadlock. As lockdep doesn't know this, use
* subclass to avoid triggering lockdep warning in such cases.
* *
* The class is based on the level in the tree block, which allows lockdep * The key is set by the readpage_end_io_hook after the buffer has passed
* to know that lower nodes nest inside the locks of higher nodes. * csum validation but before the pages are unlocked. It is also set by
* btrfs_init_new_buffer on freshly allocated blocks.
* *
* We also add a check to make sure the highest level of the tree is * We also add a check to make sure the highest level of the tree is the
* the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code
* code needs update as well. * needs update as well.
*/ */
#ifdef CONFIG_DEBUG_LOCK_ALLOC #ifdef CONFIG_DEBUG_LOCK_ALLOC
# if BTRFS_MAX_LEVEL != 8 # if BTRFS_MAX_LEVEL != 8
# error # error
# endif # endif
static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1];
static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = { static struct btrfs_lockdep_keyset {
/* leaf */ u64 id; /* root objectid */
"btrfs-extent-00", const char *name_stem; /* lock name stem */
"btrfs-extent-01", char names[BTRFS_MAX_LEVEL + 1][20];
"btrfs-extent-02", struct lock_class_key keys[BTRFS_MAX_LEVEL + 1];
"btrfs-extent-03", } btrfs_lockdep_keysets[] = {
"btrfs-extent-04", { .id = BTRFS_ROOT_TREE_OBJECTID, .name_stem = "root" },
"btrfs-extent-05", { .id = BTRFS_EXTENT_TREE_OBJECTID, .name_stem = "extent" },
"btrfs-extent-06", { .id = BTRFS_CHUNK_TREE_OBJECTID, .name_stem = "chunk" },
"btrfs-extent-07", { .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" },
/* highest possible level */ { .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" },
"btrfs-extent-08", { .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" },
{ .id = BTRFS_ORPHAN_OBJECTID, .name_stem = "orphan" },
{ .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" },
{ .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
{ .id = 0, .name_stem = "tree" },
}; };
void __init btrfs_init_lockdep(void)
{
int i, j;
/* initialize lockdep class names */
for (i = 0; i < ARRAY_SIZE(btrfs_lockdep_keysets); i++) {
struct btrfs_lockdep_keyset *ks = &btrfs_lockdep_keysets[i];
for (j = 0; j < ARRAY_SIZE(ks->names); j++)
snprintf(ks->names[j], sizeof(ks->names[j]),
"btrfs-%s-%02d", ks->name_stem, j);
}
}
void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
int level)
{
struct btrfs_lockdep_keyset *ks;
BUG_ON(level >= ARRAY_SIZE(ks->keys));
/* find the matching keyset, id 0 is the default entry */
for (ks = btrfs_lockdep_keysets; ks->id; ks++)
if (ks->id == objectid)
break;
lockdep_set_class_and_name(&eb->lock,
&ks->keys[level], ks->names[level]);
}
#endif #endif
/* /*
...@@ -217,7 +262,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, ...@@ -217,7 +262,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
unsigned long len; unsigned long len;
unsigned long cur_len; unsigned long cur_len;
unsigned long offset = BTRFS_CSUM_SIZE; unsigned long offset = BTRFS_CSUM_SIZE;
char *map_token = NULL;
char *kaddr; char *kaddr;
unsigned long map_start; unsigned long map_start;
unsigned long map_len; unsigned long map_len;
...@@ -228,8 +272,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, ...@@ -228,8 +272,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
len = buf->len - offset; len = buf->len - offset;
while (len > 0) { while (len > 0) {
err = map_private_extent_buffer(buf, offset, 32, err = map_private_extent_buffer(buf, offset, 32,
&map_token, &kaddr, &kaddr, &map_start, &map_len);
&map_start, &map_len, KM_USER0);
if (err) if (err)
return 1; return 1;
cur_len = min(len, map_len - (offset - map_start)); cur_len = min(len, map_len - (offset - map_start));
...@@ -237,7 +280,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, ...@@ -237,7 +280,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
crc, cur_len); crc, cur_len);
len -= cur_len; len -= cur_len;
offset += cur_len; offset += cur_len;
unmap_extent_buffer(buf, map_token, KM_USER0);
} }
if (csum_size > sizeof(inline_result)) { if (csum_size > sizeof(inline_result)) {
result = kzalloc(csum_size * sizeof(char), GFP_NOFS); result = kzalloc(csum_size * sizeof(char), GFP_NOFS);
...@@ -494,15 +536,6 @@ static noinline int check_leaf(struct btrfs_root *root, ...@@ -494,15 +536,6 @@ static noinline int check_leaf(struct btrfs_root *root,
return 0; return 0;
} }
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
{
lockdep_set_class_and_name(&eb->lock,
&btrfs_eb_class[level],
btrfs_eb_name[level]);
}
#endif
static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state) struct extent_state *state)
{ {
...@@ -553,7 +586,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, ...@@ -553,7 +586,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
} }
found_level = btrfs_header_level(eb); found_level = btrfs_header_level(eb);
btrfs_set_buffer_lockdep_class(eb, found_level); btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
eb, found_level);
ret = csum_tree_block(root, eb, 1); ret = csum_tree_block(root, eb, 1);
if (ret) { if (ret) {
...@@ -1603,7 +1637,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, ...@@ -1603,7 +1637,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
goto fail_bdi; goto fail_bdi;
} }
fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS; mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->trans_list);
...@@ -1807,6 +1841,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, ...@@ -1807,6 +1841,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->thread_pool_size), fs_info->thread_pool_size),
&fs_info->generic_worker); &fs_info->generic_worker);
btrfs_init_workers(&fs_info->caching_workers, "cache",
2, &fs_info->generic_worker);
/* a higher idle thresh on the submit workers makes it much more /* a higher idle thresh on the submit workers makes it much more
* likely that bios will be send down in a sane order to the * likely that bios will be send down in a sane order to the
* devices * devices
...@@ -1860,6 +1897,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, ...@@ -1860,6 +1897,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_start_workers(&fs_info->endio_write_workers, 1); btrfs_start_workers(&fs_info->endio_write_workers, 1);
btrfs_start_workers(&fs_info->endio_freespace_worker, 1); btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
btrfs_start_workers(&fs_info->delayed_workers, 1); btrfs_start_workers(&fs_info->delayed_workers, 1);
btrfs_start_workers(&fs_info->caching_workers, 1);
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
...@@ -2117,6 +2155,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, ...@@ -2117,6 +2155,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_stop_workers(&fs_info->endio_freespace_worker); btrfs_stop_workers(&fs_info->endio_freespace_worker);
btrfs_stop_workers(&fs_info->submit_workers); btrfs_stop_workers(&fs_info->submit_workers);
btrfs_stop_workers(&fs_info->delayed_workers); btrfs_stop_workers(&fs_info->delayed_workers);
btrfs_stop_workers(&fs_info->caching_workers);
fail_alloc: fail_alloc:
kfree(fs_info->delayed_root); kfree(fs_info->delayed_root);
fail_iput: fail_iput:
...@@ -2584,6 +2623,7 @@ int close_ctree(struct btrfs_root *root) ...@@ -2584,6 +2623,7 @@ int close_ctree(struct btrfs_root *root)
btrfs_stop_workers(&fs_info->endio_freespace_worker); btrfs_stop_workers(&fs_info->endio_freespace_worker);
btrfs_stop_workers(&fs_info->submit_workers); btrfs_stop_workers(&fs_info->submit_workers);
btrfs_stop_workers(&fs_info->delayed_workers); btrfs_stop_workers(&fs_info->delayed_workers);
btrfs_stop_workers(&fs_info->caching_workers);
btrfs_close_devices(fs_info->fs_devices); btrfs_close_devices(fs_info->fs_devices);
btrfs_mapping_tree_free(&fs_info->mapping_tree); btrfs_mapping_tree_free(&fs_info->mapping_tree);
......
...@@ -87,10 +87,14 @@ int btree_lock_page_hook(struct page *page); ...@@ -87,10 +87,14 @@ int btree_lock_page_hook(struct page *page);
#ifdef CONFIG_DEBUG_LOCK_ALLOC #ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level); void btrfs_init_lockdep(void);
void btrfs_set_buffer_lockdep_class(u64 objectid,
struct extent_buffer *eb, int level);
#else #else
static inline void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, static inline void btrfs_init_lockdep(void)
int level) { }
static inline void btrfs_set_buffer_lockdep_class(u64 objectid,
struct extent_buffer *eb, int level)
{ {
} }
#endif #endif
......
...@@ -320,12 +320,12 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, ...@@ -320,12 +320,12 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
return total_added; return total_added;
} }
static int caching_kthread(void *data) static noinline void caching_thread(struct btrfs_work *work)
{ {
struct btrfs_block_group_cache *block_group = data; struct btrfs_block_group_cache *block_group;
struct btrfs_fs_info *fs_info = block_group->fs_info; struct btrfs_fs_info *fs_info;
struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; struct btrfs_caching_control *caching_ctl;
struct btrfs_root *extent_root = fs_info->extent_root; struct btrfs_root *extent_root;
struct btrfs_path *path; struct btrfs_path *path;
struct extent_buffer *leaf; struct extent_buffer *leaf;
struct btrfs_key key; struct btrfs_key key;
...@@ -334,9 +334,14 @@ static int caching_kthread(void *data) ...@@ -334,9 +334,14 @@ static int caching_kthread(void *data)
u32 nritems; u32 nritems;
int ret = 0; int ret = 0;
caching_ctl = container_of(work, struct btrfs_caching_control, work);
block_group = caching_ctl->block_group;
fs_info = block_group->fs_info;
extent_root = fs_info->extent_root;
path = btrfs_alloc_path(); path = btrfs_alloc_path();
if (!path) if (!path)
return -ENOMEM; goto out;
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
...@@ -433,13 +438,11 @@ static int caching_kthread(void *data) ...@@ -433,13 +438,11 @@ static int caching_kthread(void *data)
free_excluded_extents(extent_root, block_group); free_excluded_extents(extent_root, block_group);
mutex_unlock(&caching_ctl->mutex); mutex_unlock(&caching_ctl->mutex);
out:
wake_up(&caching_ctl->wait); wake_up(&caching_ctl->wait);
put_caching_control(caching_ctl); put_caching_control(caching_ctl);
atomic_dec(&block_group->space_info->caching_threads);
btrfs_put_block_group(block_group); btrfs_put_block_group(block_group);
return 0;
} }
static int cache_block_group(struct btrfs_block_group_cache *cache, static int cache_block_group(struct btrfs_block_group_cache *cache,
...@@ -449,7 +452,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, ...@@ -449,7 +452,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
{ {
struct btrfs_fs_info *fs_info = cache->fs_info; struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_caching_control *caching_ctl; struct btrfs_caching_control *caching_ctl;
struct task_struct *tsk;
int ret = 0; int ret = 0;
smp_mb(); smp_mb();
...@@ -501,6 +503,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, ...@@ -501,6 +503,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
caching_ctl->progress = cache->key.objectid; caching_ctl->progress = cache->key.objectid;
/* one for caching kthread, one for caching block group list */ /* one for caching kthread, one for caching block group list */
atomic_set(&caching_ctl->count, 2); atomic_set(&caching_ctl->count, 2);
caching_ctl->work.func = caching_thread;
spin_lock(&cache->lock); spin_lock(&cache->lock);
if (cache->cached != BTRFS_CACHE_NO) { if (cache->cached != BTRFS_CACHE_NO) {
...@@ -516,16 +519,9 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, ...@@ -516,16 +519,9 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
up_write(&fs_info->extent_commit_sem); up_write(&fs_info->extent_commit_sem);
atomic_inc(&cache->space_info->caching_threads);
btrfs_get_block_group(cache); btrfs_get_block_group(cache);
tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work);
cache->key.objectid);
if (IS_ERR(tsk)) {
ret = PTR_ERR(tsk);
printk(KERN_ERR "error running thread %d\n", ret);
BUG();
}
return ret; return ret;
} }
...@@ -2932,9 +2928,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, ...@@ -2932,9 +2928,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->full = 0; found->full = 0;
found->force_alloc = CHUNK_ALLOC_NO_FORCE; found->force_alloc = CHUNK_ALLOC_NO_FORCE;
found->chunk_alloc = 0; found->chunk_alloc = 0;
found->flush = 0;
init_waitqueue_head(&found->wait);
*space_info = found; *space_info = found;
list_add_rcu(&found->list, &info->space_info); list_add_rcu(&found->list, &info->space_info);
atomic_set(&found->caching_threads, 0);
return 0; return 0;
} }
...@@ -3314,6 +3311,14 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, ...@@ -3314,6 +3311,14 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
if (reserved == 0) if (reserved == 0)
return 0; return 0;
smp_mb();
if (root->fs_info->delalloc_bytes == 0) {
if (trans)
return 0;
btrfs_wait_ordered_extents(root, 0, 0);
return 0;
}
max_reclaim = min(reserved, to_reclaim); max_reclaim = min(reserved, to_reclaim);
while (loops < 1024) { while (loops < 1024) {
...@@ -3356,6 +3361,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, ...@@ -3356,6 +3361,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
} }
} }
if (reclaimed >= to_reclaim && !trans)
btrfs_wait_ordered_extents(root, 0, 0);
return reclaimed >= to_reclaim; return reclaimed >= to_reclaim;
} }
...@@ -3380,15 +3387,36 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, ...@@ -3380,15 +3387,36 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
u64 num_bytes = orig_bytes; u64 num_bytes = orig_bytes;
int retries = 0; int retries = 0;
int ret = 0; int ret = 0;
bool reserved = false;
bool committed = false; bool committed = false;
bool flushing = false;
again: again:
ret = -ENOSPC; ret = 0;
if (reserved)
num_bytes = 0;
spin_lock(&space_info->lock); spin_lock(&space_info->lock);
/*
* We only want to wait if somebody other than us is flushing and we are
* actually alloed to flush.
*/
while (flush && !flushing && space_info->flush) {
spin_unlock(&space_info->lock);
/*
* If we have a trans handle we can't wait because the flusher
* may have to commit the transaction, which would mean we would
* deadlock since we are waiting for the flusher to finish, but
* hold the current transaction open.
*/
if (trans)
return -EAGAIN;
ret = wait_event_interruptible(space_info->wait,
!space_info->flush);
/* Must have been interrupted, return */
if (ret)
return -EINTR;
spin_lock(&space_info->lock);
}
ret = -ENOSPC;
unused = space_info->bytes_used + space_info->bytes_reserved + unused = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly + space_info->bytes_pinned + space_info->bytes_readonly +
space_info->bytes_may_use; space_info->bytes_may_use;
...@@ -3403,8 +3431,7 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, ...@@ -3403,8 +3431,7 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
if (unused <= space_info->total_bytes) { if (unused <= space_info->total_bytes) {
unused = space_info->total_bytes - unused; unused = space_info->total_bytes - unused;
if (unused >= num_bytes) { if (unused >= num_bytes) {
if (!reserved) space_info->bytes_reserved += orig_bytes;
space_info->bytes_reserved += orig_bytes;
ret = 0; ret = 0;
} else { } else {
/* /*
...@@ -3429,17 +3456,14 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, ...@@ -3429,17 +3456,14 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
* to reclaim space we can actually use it instead of somebody else * to reclaim space we can actually use it instead of somebody else
* stealing it from us. * stealing it from us.
*/ */
if (ret && !reserved) { if (ret && flush) {
space_info->bytes_reserved += orig_bytes; flushing = true;
reserved = true; space_info->flush = 1;
} }
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
if (!ret) if (!ret || !flush)
return 0;
if (!flush)
goto out; goto out;
/* /*
...@@ -3447,11 +3471,11 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, ...@@ -3447,11 +3471,11 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
* metadata until after the IO is completed. * metadata until after the IO is completed.
*/ */
ret = shrink_delalloc(trans, root, num_bytes, 1); ret = shrink_delalloc(trans, root, num_bytes, 1);
if (ret > 0) if (ret < 0)
return 0;
else if (ret < 0)
goto out; goto out;
ret = 0;
/* /*
* So if we were overcommitted it's possible that somebody else flushed * So if we were overcommitted it's possible that somebody else flushed
* out enough space and we simply didn't have enough space to reclaim, * out enough space and we simply didn't have enough space to reclaim,
...@@ -3462,11 +3486,11 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, ...@@ -3462,11 +3486,11 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
goto again; goto again;
} }
spin_lock(&space_info->lock);
/* /*
* Not enough space to be reclaimed, don't bother committing the * Not enough space to be reclaimed, don't bother committing the
* transaction. * transaction.
*/ */
spin_lock(&space_info->lock);
if (space_info->bytes_pinned < orig_bytes) if (space_info->bytes_pinned < orig_bytes)
ret = -ENOSPC; ret = -ENOSPC;
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
...@@ -3474,10 +3498,13 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, ...@@ -3474,10 +3498,13 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
goto out; goto out;
ret = -EAGAIN; ret = -EAGAIN;
if (trans || committed) if (trans)
goto out; goto out;
ret = -ENOSPC; ret = -ENOSPC;
if (committed)
goto out;
trans = btrfs_join_transaction(root); trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) if (IS_ERR(trans))
goto out; goto out;
...@@ -3489,12 +3516,12 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, ...@@ -3489,12 +3516,12 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
} }
out: out:
if (reserved) { if (flushing) {
spin_lock(&space_info->lock); spin_lock(&space_info->lock);
space_info->bytes_reserved -= orig_bytes; space_info->flush = 0;
wake_up_all(&space_info->wait);
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
} }
return ret; return ret;
} }
...@@ -3704,7 +3731,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, ...@@ -3704,7 +3731,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
if (commit_trans) { if (commit_trans) {
if (trans) if (trans)
return -EAGAIN; return -EAGAIN;
trans = btrfs_join_transaction(root); trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans)); BUG_ON(IS_ERR(trans));
ret = btrfs_commit_transaction(trans, root); ret = btrfs_commit_transaction(trans, root);
...@@ -3874,26 +3900,6 @@ int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, ...@@ -3874,26 +3900,6 @@ int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
return 0; return 0;
} }
int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int num_items)
{
u64 num_bytes;
int ret;
if (num_items == 0 || root->fs_info->chunk_root == root)
return 0;
num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
num_bytes);
if (!ret) {
trans->bytes_reserved += num_bytes;
trans->block_rsv = &root->fs_info->trans_block_rsv;
}
return ret;
}
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root) struct btrfs_root *root)
{ {
...@@ -3944,6 +3950,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, ...@@ -3944,6 +3950,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
} }
static unsigned drop_outstanding_extent(struct inode *inode)
{
unsigned dropped_extents = 0;
spin_lock(&BTRFS_I(inode)->lock);
BUG_ON(!BTRFS_I(inode)->outstanding_extents);
BTRFS_I(inode)->outstanding_extents--;
/*
* If we have more or the same amount of outsanding extents than we have
* reserved then we need to leave the reserved extents count alone.
*/
if (BTRFS_I(inode)->outstanding_extents >=
BTRFS_I(inode)->reserved_extents)
goto out;
dropped_extents = BTRFS_I(inode)->reserved_extents -
BTRFS_I(inode)->outstanding_extents;
BTRFS_I(inode)->reserved_extents -= dropped_extents;
out:
spin_unlock(&BTRFS_I(inode)->lock);
return dropped_extents;
}
static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes)
{ {
return num_bytes >>= 3; return num_bytes >>= 3;
...@@ -3953,9 +3983,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) ...@@ -3953,9 +3983,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
{ {
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
u64 to_reserve; u64 to_reserve = 0;
int nr_extents; unsigned nr_extents = 0;
int reserved_extents;
int ret; int ret;
if (btrfs_transaction_in_commit(root->fs_info)) if (btrfs_transaction_in_commit(root->fs_info))
...@@ -3963,66 +3992,49 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) ...@@ -3963,66 +3992,49 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
num_bytes = ALIGN(num_bytes, root->sectorsize); num_bytes = ALIGN(num_bytes, root->sectorsize);
nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; spin_lock(&BTRFS_I(inode)->lock);
reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); BTRFS_I(inode)->outstanding_extents++;
if (BTRFS_I(inode)->outstanding_extents >
BTRFS_I(inode)->reserved_extents) {
nr_extents = BTRFS_I(inode)->outstanding_extents -
BTRFS_I(inode)->reserved_extents;
BTRFS_I(inode)->reserved_extents += nr_extents;
if (nr_extents > reserved_extents) {
nr_extents -= reserved_extents;
to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
} else {
nr_extents = 0;
to_reserve = 0;
} }
spin_unlock(&BTRFS_I(inode)->lock);
to_reserve += calc_csum_metadata_size(inode, num_bytes); to_reserve += calc_csum_metadata_size(inode, num_bytes);
ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
if (ret) if (ret) {
unsigned dropped;
/*
* We don't need the return value since our reservation failed,
* we just need to clean up our counter.
*/
dropped = drop_outstanding_extent(inode);
WARN_ON(dropped > 1);
return ret; return ret;
}
atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents);
atomic_inc(&BTRFS_I(inode)->outstanding_extents);
block_rsv_add_bytes(block_rsv, to_reserve, 1); block_rsv_add_bytes(block_rsv, to_reserve, 1);
if (block_rsv->size > 512 * 1024 * 1024)
shrink_delalloc(NULL, root, to_reserve, 0);
return 0; return 0;
} }
void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
{ {
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
u64 to_free; u64 to_free = 0;
int nr_extents; unsigned dropped;
int reserved_extents;
num_bytes = ALIGN(num_bytes, root->sectorsize); num_bytes = ALIGN(num_bytes, root->sectorsize);
atomic_dec(&BTRFS_I(inode)->outstanding_extents); dropped = drop_outstanding_extent(inode);
WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
do {
int old, new;
nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
if (nr_extents >= reserved_extents) {
nr_extents = 0;
break;
}
old = reserved_extents;
nr_extents = reserved_extents - nr_extents;
new = reserved_extents - nr_extents;
old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents,
reserved_extents, new);
if (likely(old == reserved_extents))
break;
reserved_extents = old;
} while (1);
to_free = calc_csum_metadata_size(inode, num_bytes); to_free = calc_csum_metadata_size(inode, num_bytes);
if (nr_extents > 0) if (dropped > 0)
to_free += btrfs_calc_trans_metadata_size(root, nr_extents); to_free += btrfs_calc_trans_metadata_size(root, dropped);
btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
to_free); to_free);
...@@ -4990,14 +5002,10 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ...@@ -4990,14 +5002,10 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
} }
/* /*
* We only want to start kthread caching if we are at * The caching workers are limited to 2 threads, so we
* the point where we will wait for caching to make * can queue as much work as we care to.
* progress, or if our ideal search is over and we've
* found somebody to start caching.
*/ */
if (loop > LOOP_CACHING_NOWAIT || if (loop > LOOP_FIND_IDEAL) {
(loop > LOOP_FIND_IDEAL &&
atomic_read(&space_info->caching_threads) < 2)) {
ret = cache_block_group(block_group, trans, ret = cache_block_group(block_group, trans,
orig_root, 0); orig_root, 0);
BUG_ON(ret); BUG_ON(ret);
...@@ -5219,8 +5227,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ...@@ -5219,8 +5227,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
found_uncached_bg = false; found_uncached_bg = false;
loop++; loop++;
if (!ideal_cache_percent && if (!ideal_cache_percent)
atomic_read(&space_info->caching_threads))
goto search; goto search;
/* /*
...@@ -5623,7 +5630,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, ...@@ -5623,7 +5630,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
if (!buf) if (!buf)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
btrfs_set_header_generation(buf, trans->transid); btrfs_set_header_generation(buf, trans->transid);
btrfs_set_buffer_lockdep_class(buf, level); btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
btrfs_tree_lock(buf); btrfs_tree_lock(buf);
clean_tree_block(trans, root, buf); clean_tree_block(trans, root, buf);
...@@ -5910,7 +5917,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, ...@@ -5910,7 +5917,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
return 1; return 1;
if (path->locks[level] && !wc->keep_locks) { if (path->locks[level] && !wc->keep_locks) {
btrfs_tree_unlock(eb); btrfs_tree_unlock_rw(eb, path->locks[level]);
path->locks[level] = 0; path->locks[level] = 0;
} }
return 0; return 0;
...@@ -5934,7 +5941,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, ...@@ -5934,7 +5941,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
* keep the tree lock * keep the tree lock
*/ */
if (path->locks[level] && level > 0) { if (path->locks[level] && level > 0) {
btrfs_tree_unlock(eb); btrfs_tree_unlock_rw(eb, path->locks[level]);
path->locks[level] = 0; path->locks[level] = 0;
} }
return 0; return 0;
...@@ -6047,7 +6054,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, ...@@ -6047,7 +6054,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
BUG_ON(level != btrfs_header_level(next)); BUG_ON(level != btrfs_header_level(next));
path->nodes[level] = next; path->nodes[level] = next;
path->slots[level] = 0; path->slots[level] = 0;
path->locks[level] = 1; path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
wc->level = level; wc->level = level;
if (wc->level == 1) if (wc->level == 1)
wc->reada_slot = 0; wc->reada_slot = 0;
...@@ -6118,7 +6125,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, ...@@ -6118,7 +6125,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
BUG_ON(level == 0); BUG_ON(level == 0);
btrfs_tree_lock(eb); btrfs_tree_lock(eb);
btrfs_set_lock_blocking(eb); btrfs_set_lock_blocking(eb);
path->locks[level] = 1; path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
ret = btrfs_lookup_extent_info(trans, root, ret = btrfs_lookup_extent_info(trans, root,
eb->start, eb->len, eb->start, eb->len,
...@@ -6127,8 +6134,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, ...@@ -6127,8 +6134,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
BUG_ON(ret); BUG_ON(ret);
BUG_ON(wc->refs[level] == 0); BUG_ON(wc->refs[level] == 0);
if (wc->refs[level] == 1) { if (wc->refs[level] == 1) {
btrfs_tree_unlock(eb); btrfs_tree_unlock_rw(eb, path->locks[level]);
path->locks[level] = 0;
return 1; return 1;
} }
} }
...@@ -6150,7 +6156,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, ...@@ -6150,7 +6156,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
btrfs_header_generation(eb) == trans->transid) { btrfs_header_generation(eb) == trans->transid) {
btrfs_tree_lock(eb); btrfs_tree_lock(eb);
btrfs_set_lock_blocking(eb); btrfs_set_lock_blocking(eb);
path->locks[level] = 1; path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
} }
clean_tree_block(trans, root, eb); clean_tree_block(trans, root, eb);
} }
...@@ -6229,7 +6235,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, ...@@ -6229,7 +6235,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
return 0; return 0;
if (path->locks[level]) { if (path->locks[level]) {
btrfs_tree_unlock(path->nodes[level]); btrfs_tree_unlock_rw(path->nodes[level],
path->locks[level]);
path->locks[level] = 0; path->locks[level] = 0;
} }
free_extent_buffer(path->nodes[level]); free_extent_buffer(path->nodes[level]);
...@@ -6281,7 +6288,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, ...@@ -6281,7 +6288,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
path->nodes[level] = btrfs_lock_root_node(root); path->nodes[level] = btrfs_lock_root_node(root);
btrfs_set_lock_blocking(path->nodes[level]); btrfs_set_lock_blocking(path->nodes[level]);
path->slots[level] = 0; path->slots[level] = 0;
path->locks[level] = 1; path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
memset(&wc->update_progress, 0, memset(&wc->update_progress, 0,
sizeof(wc->update_progress)); sizeof(wc->update_progress));
} else { } else {
...@@ -6449,7 +6456,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, ...@@ -6449,7 +6456,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
level = btrfs_header_level(node); level = btrfs_header_level(node);
path->nodes[level] = node; path->nodes[level] = node;
path->slots[level] = 0; path->slots[level] = 0;
path->locks[level] = 1; path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
wc->refs[parent_level] = 1; wc->refs[parent_level] = 1;
wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
...@@ -6524,15 +6531,28 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) ...@@ -6524,15 +6531,28 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
return flags; return flags;
} }
static int set_block_group_ro(struct btrfs_block_group_cache *cache) static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
{ {
struct btrfs_space_info *sinfo = cache->space_info; struct btrfs_space_info *sinfo = cache->space_info;
u64 num_bytes; u64 num_bytes;
u64 min_allocable_bytes;
int ret = -ENOSPC; int ret = -ENOSPC;
if (cache->ro) if (cache->ro)
return 0; return 0;
/*
* We need some metadata space and system metadata space for
* allocating chunks in some corner cases until we force to set
* it to be readonly.
*/
if ((sinfo->flags &
(BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
!force)
min_allocable_bytes = 1 * 1024 * 1024;
else
min_allocable_bytes = 0;
spin_lock(&sinfo->lock); spin_lock(&sinfo->lock);
spin_lock(&cache->lock); spin_lock(&cache->lock);
num_bytes = cache->key.offset - cache->reserved - cache->pinned - num_bytes = cache->key.offset - cache->reserved - cache->pinned -
...@@ -6540,7 +6560,8 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache) ...@@ -6540,7 +6560,8 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache)
if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
sinfo->bytes_may_use + sinfo->bytes_readonly + sinfo->bytes_may_use + sinfo->bytes_readonly +
cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { cache->reserved_pinned + num_bytes + min_allocable_bytes <=
sinfo->total_bytes) {
sinfo->bytes_readonly += num_bytes; sinfo->bytes_readonly += num_bytes;
sinfo->bytes_reserved += cache->reserved_pinned; sinfo->bytes_reserved += cache->reserved_pinned;
cache->reserved_pinned = 0; cache->reserved_pinned = 0;
...@@ -6571,7 +6592,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, ...@@ -6571,7 +6592,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
CHUNK_ALLOC_FORCE); CHUNK_ALLOC_FORCE);
ret = set_block_group_ro(cache); ret = set_block_group_ro(cache, 0);
if (!ret) if (!ret)
goto out; goto out;
alloc_flags = get_alloc_profile(root, cache->space_info->flags); alloc_flags = get_alloc_profile(root, cache->space_info->flags);
...@@ -6579,7 +6600,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, ...@@ -6579,7 +6600,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
CHUNK_ALLOC_FORCE); CHUNK_ALLOC_FORCE);
if (ret < 0) if (ret < 0)
goto out; goto out;
ret = set_block_group_ro(cache); ret = set_block_group_ro(cache, 0);
out: out:
btrfs_end_transaction(trans, root); btrfs_end_transaction(trans, root);
return ret; return ret;
...@@ -7016,7 +7037,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) ...@@ -7016,7 +7037,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
set_avail_alloc_bits(root->fs_info, cache->flags); set_avail_alloc_bits(root->fs_info, cache->flags);
if (btrfs_chunk_readonly(root, cache->key.objectid)) if (btrfs_chunk_readonly(root, cache->key.objectid))
set_block_group_ro(cache); set_block_group_ro(cache, 1);
} }
list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
...@@ -7030,9 +7051,9 @@ int btrfs_read_block_groups(struct btrfs_root *root) ...@@ -7030,9 +7051,9 @@ int btrfs_read_block_groups(struct btrfs_root *root)
* mirrored block groups. * mirrored block groups.
*/ */
list_for_each_entry(cache, &space_info->block_groups[3], list) list_for_each_entry(cache, &space_info->block_groups[3], list)
set_block_group_ro(cache); set_block_group_ro(cache, 1);
list_for_each_entry(cache, &space_info->block_groups[4], list) list_for_each_entry(cache, &space_info->block_groups[4], list)
set_block_group_ro(cache); set_block_group_ro(cache, 1);
} }
init_global_block_rsv(info); init_global_block_rsv(info);
......
...@@ -281,11 +281,10 @@ static int merge_state(struct extent_io_tree *tree, ...@@ -281,11 +281,10 @@ static int merge_state(struct extent_io_tree *tree,
if (other->start == state->end + 1 && if (other->start == state->end + 1 &&
other->state == state->state) { other->state == state->state) {
merge_cb(tree, state, other); merge_cb(tree, state, other);
other->start = state->start; state->end = other->end;
state->tree = NULL; other->tree = NULL;
rb_erase(&state->rb_node, &tree->state); rb_erase(&other->rb_node, &tree->state);
free_extent_state(state); free_extent_state(other);
state = NULL;
} }
} }
...@@ -351,7 +350,6 @@ static int insert_state(struct extent_io_tree *tree, ...@@ -351,7 +350,6 @@ static int insert_state(struct extent_io_tree *tree,
"%llu %llu\n", (unsigned long long)found->start, "%llu %llu\n", (unsigned long long)found->start,
(unsigned long long)found->end, (unsigned long long)found->end,
(unsigned long long)start, (unsigned long long)end); (unsigned long long)start, (unsigned long long)end);
free_extent_state(state);
return -EEXIST; return -EEXIST;
} }
state->tree = tree; state->tree = tree;
...@@ -500,7 +498,8 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -500,7 +498,8 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
cached_state = NULL; cached_state = NULL;
} }
if (cached && cached->tree && cached->start == start) { if (cached && cached->tree && cached->start <= start &&
cached->end > start) {
if (clear) if (clear)
atomic_dec(&cached->refs); atomic_dec(&cached->refs);
state = cached; state = cached;
...@@ -742,7 +741,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -742,7 +741,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
spin_lock(&tree->lock); spin_lock(&tree->lock);
if (cached_state && *cached_state) { if (cached_state && *cached_state) {
state = *cached_state; state = *cached_state;
if (state->start == start && state->tree) { if (state->start <= start && state->end > start &&
state->tree) {
node = &state->rb_node; node = &state->rb_node;
goto hit_next; goto hit_next;
} }
...@@ -783,13 +783,13 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -783,13 +783,13 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
if (err) if (err)
goto out; goto out;
next_node = rb_next(node);
cache_state(state, cached_state); cache_state(state, cached_state);
merge_state(tree, state); merge_state(tree, state);
if (last_end == (u64)-1) if (last_end == (u64)-1)
goto out; goto out;
start = last_end + 1; start = last_end + 1;
next_node = rb_next(&state->rb_node);
if (next_node && start < end && prealloc && !need_resched()) { if (next_node && start < end && prealloc && !need_resched()) {
state = rb_entry(next_node, struct extent_state, state = rb_entry(next_node, struct extent_state,
rb_node); rb_node);
...@@ -862,7 +862,6 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -862,7 +862,6 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
* Avoid to free 'prealloc' if it can be merged with * Avoid to free 'prealloc' if it can be merged with
* the later extent. * the later extent.
*/ */
atomic_inc(&prealloc->refs);
err = insert_state(tree, prealloc, start, this_end, err = insert_state(tree, prealloc, start, this_end,
&bits); &bits);
BUG_ON(err == -EEXIST); BUG_ON(err == -EEXIST);
...@@ -872,7 +871,6 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -872,7 +871,6 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
goto out; goto out;
} }
cache_state(prealloc, cached_state); cache_state(prealloc, cached_state);
free_extent_state(prealloc);
prealloc = NULL; prealloc = NULL;
start = this_end + 1; start = this_end + 1;
goto search_again; goto search_again;
...@@ -1564,7 +1562,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -1564,7 +1562,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
int bitset = 0; int bitset = 0;
spin_lock(&tree->lock); spin_lock(&tree->lock);
if (cached && cached->tree && cached->start == start) if (cached && cached->tree && cached->start <= start &&
cached->end > start)
node = &cached->rb_node; node = &cached->rb_node;
else else
node = tree_search(tree, start); node = tree_search(tree, start);
...@@ -2432,6 +2431,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, ...@@ -2432,6 +2431,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
pgoff_t index; pgoff_t index;
pgoff_t end; /* Inclusive */ pgoff_t end; /* Inclusive */
int scanned = 0; int scanned = 0;
int tag;
pagevec_init(&pvec, 0); pagevec_init(&pvec, 0);
if (wbc->range_cyclic) { if (wbc->range_cyclic) {
...@@ -2442,11 +2442,16 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, ...@@ -2442,11 +2442,16 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
end = wbc->range_end >> PAGE_CACHE_SHIFT; end = wbc->range_end >> PAGE_CACHE_SHIFT;
scanned = 1; scanned = 1;
} }
if (wbc->sync_mode == WB_SYNC_ALL)
tag = PAGECACHE_TAG_TOWRITE;
else
tag = PAGECACHE_TAG_DIRTY;
retry: retry:
if (wbc->sync_mode == WB_SYNC_ALL)
tag_pages_for_writeback(mapping, index, end);
while (!done && !nr_to_write_done && (index <= end) && while (!done && !nr_to_write_done && (index <= end) &&
(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
PAGECACHE_TAG_DIRTY, min(end - index, min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
(pgoff_t)PAGEVEC_SIZE-1) + 1))) {
unsigned i; unsigned i;
scanned = 1; scanned = 1;
...@@ -3022,8 +3027,15 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, ...@@ -3022,8 +3027,15 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
return NULL; return NULL;
eb->start = start; eb->start = start;
eb->len = len; eb->len = len;
spin_lock_init(&eb->lock); rwlock_init(&eb->lock);
init_waitqueue_head(&eb->lock_wq); atomic_set(&eb->write_locks, 0);
atomic_set(&eb->read_locks, 0);
atomic_set(&eb->blocking_readers, 0);
atomic_set(&eb->blocking_writers, 0);
atomic_set(&eb->spinning_readers, 0);
atomic_set(&eb->spinning_writers, 0);
init_waitqueue_head(&eb->write_lock_wq);
init_waitqueue_head(&eb->read_lock_wq);
#if LEAK_DEBUG #if LEAK_DEBUG
spin_lock_irqsave(&leak_lock, flags); spin_lock_irqsave(&leak_lock, flags);
...@@ -3119,7 +3131,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, ...@@ -3119,7 +3131,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
i = 0; i = 0;
} }
for (; i < num_pages; i++, index++) { for (; i < num_pages; i++, index++) {
p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM); p = find_or_create_page(mapping, index, GFP_NOFS);
if (!p) { if (!p) {
WARN_ON(1); WARN_ON(1);
goto free_eb; goto free_eb;
...@@ -3266,6 +3278,22 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, ...@@ -3266,6 +3278,22 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree,
return was_dirty; return was_dirty;
} }
static int __eb_straddles_pages(u64 start, u64 len)
{
if (len < PAGE_CACHE_SIZE)
return 1;
if (start & (PAGE_CACHE_SIZE - 1))
return 1;
if ((start + len) & (PAGE_CACHE_SIZE - 1))
return 1;
return 0;
}
static int eb_straddles_pages(struct extent_buffer *eb)
{
return __eb_straddles_pages(eb->start, eb->len);
}
int clear_extent_buffer_uptodate(struct extent_io_tree *tree, int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
struct extent_buffer *eb, struct extent_buffer *eb,
struct extent_state **cached_state) struct extent_state **cached_state)
...@@ -3277,8 +3305,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, ...@@ -3277,8 +3305,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
num_pages = num_extent_pages(eb->start, eb->len); num_pages = num_extent_pages(eb->start, eb->len);
clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, if (eb_straddles_pages(eb)) {
cached_state, GFP_NOFS); clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
cached_state, GFP_NOFS);
}
for (i = 0; i < num_pages; i++) { for (i = 0; i < num_pages; i++) {
page = extent_buffer_page(eb, i); page = extent_buffer_page(eb, i);
if (page) if (page)
...@@ -3296,8 +3326,10 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree, ...@@ -3296,8 +3326,10 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree,
num_pages = num_extent_pages(eb->start, eb->len); num_pages = num_extent_pages(eb->start, eb->len);
set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, if (eb_straddles_pages(eb)) {
NULL, GFP_NOFS); set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
NULL, GFP_NOFS);
}
for (i = 0; i < num_pages; i++) { for (i = 0; i < num_pages; i++) {
page = extent_buffer_page(eb, i); page = extent_buffer_page(eb, i);
if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
...@@ -3320,9 +3352,12 @@ int extent_range_uptodate(struct extent_io_tree *tree, ...@@ -3320,9 +3352,12 @@ int extent_range_uptodate(struct extent_io_tree *tree,
int uptodate; int uptodate;
unsigned long index; unsigned long index;
ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL); if (__eb_straddles_pages(start, end - start + 1)) {
if (ret) ret = test_range_bit(tree, start, end,
return 1; EXTENT_UPTODATE, 1, NULL);
if (ret)
return 1;
}
while (start <= end) { while (start <= end) {
index = start >> PAGE_CACHE_SHIFT; index = start >> PAGE_CACHE_SHIFT;
page = find_get_page(tree->mapping, index); page = find_get_page(tree->mapping, index);
...@@ -3350,10 +3385,12 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, ...@@ -3350,10 +3385,12 @@ int extent_buffer_uptodate(struct extent_io_tree *tree,
if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return 1; return 1;
ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, if (eb_straddles_pages(eb)) {
EXTENT_UPTODATE, 1, cached_state); ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
if (ret) EXTENT_UPTODATE, 1, cached_state);
return ret; if (ret)
return ret;
}
num_pages = num_extent_pages(eb->start, eb->len); num_pages = num_extent_pages(eb->start, eb->len);
for (i = 0; i < num_pages; i++) { for (i = 0; i < num_pages; i++) {
...@@ -3386,9 +3423,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, ...@@ -3386,9 +3423,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return 0; return 0;
if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, if (eb_straddles_pages(eb)) {
EXTENT_UPTODATE, 1, NULL)) { if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
return 0; EXTENT_UPTODATE, 1, NULL)) {
return 0;
}
} }
if (start) { if (start) {
...@@ -3492,9 +3531,8 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, ...@@ -3492,9 +3531,8 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
page = extent_buffer_page(eb, i); page = extent_buffer_page(eb, i);
cur = min(len, (PAGE_CACHE_SIZE - offset)); cur = min(len, (PAGE_CACHE_SIZE - offset));
kaddr = kmap_atomic(page, KM_USER1); kaddr = page_address(page);
memcpy(dst, kaddr + offset, cur); memcpy(dst, kaddr + offset, cur);
kunmap_atomic(kaddr, KM_USER1);
dst += cur; dst += cur;
len -= cur; len -= cur;
...@@ -3504,9 +3542,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, ...@@ -3504,9 +3542,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
} }
int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
unsigned long min_len, char **token, char **map, unsigned long min_len, char **map,
unsigned long *map_start, unsigned long *map_start,
unsigned long *map_len, int km) unsigned long *map_len)
{ {
size_t offset = start & (PAGE_CACHE_SIZE - 1); size_t offset = start & (PAGE_CACHE_SIZE - 1);
char *kaddr; char *kaddr;
...@@ -3536,42 +3574,12 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, ...@@ -3536,42 +3574,12 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
} }
p = extent_buffer_page(eb, i); p = extent_buffer_page(eb, i);
kaddr = kmap_atomic(p, km); kaddr = page_address(p);
*token = kaddr;
*map = kaddr + offset; *map = kaddr + offset;
*map_len = PAGE_CACHE_SIZE - offset; *map_len = PAGE_CACHE_SIZE - offset;
return 0; return 0;
} }
int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
unsigned long min_len,
char **token, char **map,
unsigned long *map_start,
unsigned long *map_len, int km)
{
int err;
int save = 0;
if (eb->map_token) {
unmap_extent_buffer(eb, eb->map_token, km);
eb->map_token = NULL;
save = 1;
}
err = map_private_extent_buffer(eb, start, min_len, token, map,
map_start, map_len, km);
if (!err && save) {
eb->map_token = *token;
eb->kaddr = *map;
eb->map_start = *map_start;
eb->map_len = *map_len;
}
return err;
}
void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
{
kunmap_atomic(token, km);
}
int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
unsigned long start, unsigned long start,
unsigned long len) unsigned long len)
...@@ -3595,9 +3603,8 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, ...@@ -3595,9 +3603,8 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
cur = min(len, (PAGE_CACHE_SIZE - offset)); cur = min(len, (PAGE_CACHE_SIZE - offset));
kaddr = kmap_atomic(page, KM_USER0); kaddr = page_address(page);
ret = memcmp(ptr, kaddr + offset, cur); ret = memcmp(ptr, kaddr + offset, cur);
kunmap_atomic(kaddr, KM_USER0);
if (ret) if (ret)
break; break;
...@@ -3630,9 +3637,8 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, ...@@ -3630,9 +3637,8 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
WARN_ON(!PageUptodate(page)); WARN_ON(!PageUptodate(page));
cur = min(len, PAGE_CACHE_SIZE - offset); cur = min(len, PAGE_CACHE_SIZE - offset);
kaddr = kmap_atomic(page, KM_USER1); kaddr = page_address(page);
memcpy(kaddr + offset, src, cur); memcpy(kaddr + offset, src, cur);
kunmap_atomic(kaddr, KM_USER1);
src += cur; src += cur;
len -= cur; len -= cur;
...@@ -3661,9 +3667,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, ...@@ -3661,9 +3667,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
WARN_ON(!PageUptodate(page)); WARN_ON(!PageUptodate(page));
cur = min(len, PAGE_CACHE_SIZE - offset); cur = min(len, PAGE_CACHE_SIZE - offset);
kaddr = kmap_atomic(page, KM_USER0); kaddr = page_address(page);
memset(kaddr + offset, c, cur); memset(kaddr + offset, c, cur);
kunmap_atomic(kaddr, KM_USER0);
len -= cur; len -= cur;
offset = 0; offset = 0;
...@@ -3694,9 +3699,8 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, ...@@ -3694,9 +3699,8 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
kaddr = kmap_atomic(page, KM_USER0); kaddr = page_address(page);
read_extent_buffer(src, kaddr + offset, src_offset, cur); read_extent_buffer(src, kaddr + offset, src_offset, cur);
kunmap_atomic(kaddr, KM_USER0);
src_offset += cur; src_offset += cur;
len -= cur; len -= cur;
...@@ -3709,20 +3713,17 @@ static void move_pages(struct page *dst_page, struct page *src_page, ...@@ -3709,20 +3713,17 @@ static void move_pages(struct page *dst_page, struct page *src_page,
unsigned long dst_off, unsigned long src_off, unsigned long dst_off, unsigned long src_off,
unsigned long len) unsigned long len)
{ {
char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); char *dst_kaddr = page_address(dst_page);
if (dst_page == src_page) { if (dst_page == src_page) {
memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
} else { } else {
char *src_kaddr = kmap_atomic(src_page, KM_USER1); char *src_kaddr = page_address(src_page);
char *p = dst_kaddr + dst_off + len; char *p = dst_kaddr + dst_off + len;
char *s = src_kaddr + src_off + len; char *s = src_kaddr + src_off + len;
while (len--) while (len--)
*--p = *--s; *--p = *--s;
kunmap_atomic(src_kaddr, KM_USER1);
} }
kunmap_atomic(dst_kaddr, KM_USER0);
} }
static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
...@@ -3735,20 +3736,17 @@ static void copy_pages(struct page *dst_page, struct page *src_page, ...@@ -3735,20 +3736,17 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
unsigned long dst_off, unsigned long src_off, unsigned long dst_off, unsigned long src_off,
unsigned long len) unsigned long len)
{ {
char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); char *dst_kaddr = page_address(dst_page);
char *src_kaddr; char *src_kaddr;
if (dst_page != src_page) { if (dst_page != src_page) {
src_kaddr = kmap_atomic(src_page, KM_USER1); src_kaddr = page_address(src_page);
} else { } else {
src_kaddr = dst_kaddr; src_kaddr = dst_kaddr;
BUG_ON(areas_overlap(src_off, dst_off, len)); BUG_ON(areas_overlap(src_off, dst_off, len));
} }
memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
kunmap_atomic(dst_kaddr, KM_USER0);
if (dst_page != src_page)
kunmap_atomic(src_kaddr, KM_USER1);
} }
void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
......
...@@ -120,8 +120,6 @@ struct extent_state { ...@@ -120,8 +120,6 @@ struct extent_state {
struct extent_buffer { struct extent_buffer {
u64 start; u64 start;
unsigned long len; unsigned long len;
char *map_token;
char *kaddr;
unsigned long map_start; unsigned long map_start;
unsigned long map_len; unsigned long map_len;
struct page *first_page; struct page *first_page;
...@@ -130,14 +128,26 @@ struct extent_buffer { ...@@ -130,14 +128,26 @@ struct extent_buffer {
struct rcu_head rcu_head; struct rcu_head rcu_head;
atomic_t refs; atomic_t refs;
/* the spinlock is used to protect most operations */ /* count of read lock holders on the extent buffer */
spinlock_t lock; atomic_t write_locks;
atomic_t read_locks;
atomic_t blocking_writers;
atomic_t blocking_readers;
atomic_t spinning_readers;
atomic_t spinning_writers;
/* protects write locks */
rwlock_t lock;
/* /* readers use lock_wq while they wait for the write
* when we keep the lock held while blocking, waiters go onto * lock holders to unlock
* the wq
*/ */
wait_queue_head_t lock_wq; wait_queue_head_t write_lock_wq;
/* writers use read_lock_wq while they wait for readers
* to unlock
*/
wait_queue_head_t read_lock_wq;
}; };
static inline void extent_set_compress_type(unsigned long *bio_flags, static inline void extent_set_compress_type(unsigned long *bio_flags,
...@@ -279,15 +289,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, ...@@ -279,15 +289,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
int extent_buffer_uptodate(struct extent_io_tree *tree, int extent_buffer_uptodate(struct extent_io_tree *tree,
struct extent_buffer *eb, struct extent_buffer *eb,
struct extent_state *cached_state); struct extent_state *cached_state);
int map_extent_buffer(struct extent_buffer *eb, unsigned long offset,
unsigned long min_len, char **token, char **map,
unsigned long *map_start,
unsigned long *map_len, int km);
int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
unsigned long min_len, char **token, char **map, unsigned long min_len, char **map,
unsigned long *map_start, unsigned long *map_start,
unsigned long *map_len, int km); unsigned long *map_len);
void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km);
int extent_range_uptodate(struct extent_io_tree *tree, int extent_range_uptodate(struct extent_io_tree *tree,
u64 start, u64 end); u64 start, u64 end);
int extent_clear_unlock_delalloc(struct inode *inode, int extent_clear_unlock_delalloc(struct inode *inode,
......
...@@ -177,6 +177,15 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, ...@@ -177,6 +177,15 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
WARN_ON(bio->bi_vcnt <= 0); WARN_ON(bio->bi_vcnt <= 0);
/*
* the free space stuff is only read when it hasn't been
* updated in the current transaction. So, we can safely
* read from the commit root and sidestep a nasty deadlock
* between reading the free space cache and updating the csum tree.
*/
if (btrfs_is_free_space_inode(root, inode))
path->search_commit_root = 1;
disk_bytenr = (u64)bio->bi_sector << 9; disk_bytenr = (u64)bio->bi_sector << 9;
if (dio) if (dio)
offset = logical_offset; offset = logical_offset;
...@@ -664,10 +673,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, ...@@ -664,10 +673,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_sector_sum *sector_sum; struct btrfs_sector_sum *sector_sum;
u32 nritems; u32 nritems;
u32 ins_size; u32 ins_size;
char *eb_map;
char *eb_token;
unsigned long map_len;
unsigned long map_start;
u16 csum_size = u16 csum_size =
btrfs_super_csum_size(&root->fs_info->super_copy); btrfs_super_csum_size(&root->fs_info->super_copy);
...@@ -814,30 +819,9 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, ...@@ -814,30 +819,9 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
btrfs_item_size_nr(leaf, path->slots[0])); btrfs_item_size_nr(leaf, path->slots[0]));
eb_token = NULL;
next_sector: next_sector:
if (!eb_token || write_extent_buffer(leaf, &sector_sum->sum, (unsigned long)item, csum_size);
(unsigned long)item + csum_size >= map_start + map_len) {
int err;
if (eb_token)
unmap_extent_buffer(leaf, eb_token, KM_USER1);
eb_token = NULL;
err = map_private_extent_buffer(leaf, (unsigned long)item,
csum_size,
&eb_token, &eb_map,
&map_start, &map_len, KM_USER1);
if (err)
eb_token = NULL;
}
if (eb_token) {
memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)),
&sector_sum->sum, csum_size);
} else {
write_extent_buffer(leaf, &sector_sum->sum,
(unsigned long)item, csum_size);
}
total_bytes += root->sectorsize; total_bytes += root->sectorsize;
sector_sum++; sector_sum++;
...@@ -850,10 +834,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, ...@@ -850,10 +834,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
goto next_sector; goto next_sector;
} }
} }
if (eb_token) {
unmap_extent_buffer(leaf, eb_token, KM_USER1);
eb_token = NULL;
}
btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_mark_buffer_dirty(path->nodes[0]);
if (total_bytes < sums->len) { if (total_bytes < sums->len) {
btrfs_release_path(path); btrfs_release_path(path);
......
...@@ -1081,7 +1081,8 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, ...@@ -1081,7 +1081,8 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
again: again:
for (i = 0; i < num_pages; i++) { for (i = 0; i < num_pages; i++) {
pages[i] = grab_cache_page(inode->i_mapping, index + i); pages[i] = find_or_create_page(inode->i_mapping, index + i,
GFP_NOFS);
if (!pages[i]) { if (!pages[i]) {
faili = i - 1; faili = i - 1;
err = -ENOMEM; err = -ENOMEM;
...@@ -1238,9 +1239,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, ...@@ -1238,9 +1239,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
* managed to copy. * managed to copy.
*/ */
if (num_pages > dirty_pages) { if (num_pages > dirty_pages) {
if (copied > 0) if (copied > 0) {
atomic_inc( spin_lock(&BTRFS_I(inode)->lock);
&BTRFS_I(inode)->outstanding_extents); BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
}
btrfs_delalloc_release_space(inode, btrfs_delalloc_release_space(inode,
(num_pages - dirty_pages) << (num_pages - dirty_pages) <<
PAGE_CACHE_SHIFT); PAGE_CACHE_SHIFT);
......
...@@ -98,6 +98,12 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, ...@@ -98,6 +98,12 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
return inode; return inode;
spin_lock(&block_group->lock); spin_lock(&block_group->lock);
if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) {
printk(KERN_INFO "Old style space inode found, converting.\n");
BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM;
block_group->disk_cache_state = BTRFS_DC_CLEAR;
}
if (!btrfs_fs_closing(root->fs_info)) { if (!btrfs_fs_closing(root->fs_info)) {
block_group->inode = igrab(inode); block_group->inode = igrab(inode);
block_group->iref = 1; block_group->iref = 1;
...@@ -135,7 +141,7 @@ int __create_free_space_inode(struct btrfs_root *root, ...@@ -135,7 +141,7 @@ int __create_free_space_inode(struct btrfs_root *root,
btrfs_set_inode_gid(leaf, inode_item, 0); btrfs_set_inode_gid(leaf, inode_item, 0);
btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM); BTRFS_INODE_PREALLOC);
btrfs_set_inode_nlink(leaf, inode_item, 1); btrfs_set_inode_nlink(leaf, inode_item, 1);
btrfs_set_inode_transid(leaf, inode_item, trans->transid); btrfs_set_inode_transid(leaf, inode_item, trans->transid);
btrfs_set_inode_block_group(leaf, inode_item, offset); btrfs_set_inode_block_group(leaf, inode_item, offset);
...@@ -239,17 +245,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, ...@@ -239,17 +245,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_free_space_header *header; struct btrfs_free_space_header *header;
struct extent_buffer *leaf; struct extent_buffer *leaf;
struct page *page; struct page *page;
u32 *checksums = NULL, *crc;
char *disk_crcs = NULL;
struct btrfs_key key; struct btrfs_key key;
struct list_head bitmaps; struct list_head bitmaps;
u64 num_entries; u64 num_entries;
u64 num_bitmaps; u64 num_bitmaps;
u64 generation; u64 generation;
u32 cur_crc = ~(u32)0;
pgoff_t index = 0; pgoff_t index = 0;
unsigned long first_page_offset;
int num_checksums;
int ret = 0; int ret = 0;
INIT_LIST_HEAD(&bitmaps); INIT_LIST_HEAD(&bitmaps);
...@@ -292,16 +293,6 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, ...@@ -292,16 +293,6 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
if (!num_entries) if (!num_entries)
goto out; goto out;
/* Setup everything for doing checksumming */
num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
if (!checksums)
goto out;
first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
if (!disk_crcs)
goto out;
ret = readahead_cache(inode); ret = readahead_cache(inode);
if (ret) if (ret)
goto out; goto out;
...@@ -311,18 +302,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, ...@@ -311,18 +302,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_free_space *e; struct btrfs_free_space *e;
void *addr; void *addr;
unsigned long offset = 0; unsigned long offset = 0;
unsigned long start_offset = 0;
int need_loop = 0; int need_loop = 0;
if (!num_entries && !num_bitmaps) if (!num_entries && !num_bitmaps)
break; break;
if (index == 0) { page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
start_offset = first_page_offset;
offset = start_offset;
}
page = grab_cache_page(inode->i_mapping, index);
if (!page) if (!page)
goto free_cache; goto free_cache;
...@@ -342,8 +327,15 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, ...@@ -342,8 +327,15 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
if (index == 0) { if (index == 0) {
u64 *gen; u64 *gen;
memcpy(disk_crcs, addr, first_page_offset); /*
gen = addr + (sizeof(u32) * num_checksums); * We put a bogus crc in the front of the first page in
* case old kernels try to mount a fs with the new
* format to make sure they discard the cache.
*/
addr += sizeof(u64);
offset += sizeof(u64);
gen = addr;
if (*gen != BTRFS_I(inode)->generation) { if (*gen != BTRFS_I(inode)->generation) {
printk(KERN_ERR "btrfs: space cache generation" printk(KERN_ERR "btrfs: space cache generation"
" (%llu) does not match inode (%llu)\n", " (%llu) does not match inode (%llu)\n",
...@@ -355,24 +347,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, ...@@ -355,24 +347,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
page_cache_release(page); page_cache_release(page);
goto free_cache; goto free_cache;
} }
crc = (u32 *)disk_crcs; addr += sizeof(u64);
} offset += sizeof(u64);
entry = addr + start_offset;
/* First lets check our crc before we do anything fun */
cur_crc = ~(u32)0;
cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
PAGE_CACHE_SIZE - start_offset);
btrfs_csum_final(cur_crc, (char *)&cur_crc);
if (cur_crc != *crc) {
printk(KERN_ERR "btrfs: crc mismatch for page %lu\n",
index);
kunmap(page);
unlock_page(page);
page_cache_release(page);
goto free_cache;
} }
crc++; entry = addr;
while (1) { while (1) {
if (!num_entries) if (!num_entries)
...@@ -470,8 +448,6 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, ...@@ -470,8 +448,6 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
ret = 1; ret = 1;
out: out:
kfree(checksums);
kfree(disk_crcs);
return ret; return ret;
free_cache: free_cache:
__btrfs_remove_free_space_cache(ctl); __btrfs_remove_free_space_cache(ctl);
...@@ -569,8 +545,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, ...@@ -569,8 +545,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_key key; struct btrfs_key key;
u64 start, end, len; u64 start, end, len;
u64 bytes = 0; u64 bytes = 0;
u32 *crc, *checksums; u32 crc = ~(u32)0;
unsigned long first_page_offset;
int index = 0, num_pages = 0; int index = 0, num_pages = 0;
int entries = 0; int entries = 0;
int bitmaps = 0; int bitmaps = 0;
...@@ -590,34 +565,13 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, ...@@ -590,34 +565,13 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT; PAGE_CACHE_SHIFT;
/* Since the first page has all of our checksums and our generation we
* need to calculate the offset into the page that we can start writing
* our entries.
*/
first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
filemap_write_and_wait(inode->i_mapping); filemap_write_and_wait(inode->i_mapping);
btrfs_wait_ordered_range(inode, inode->i_size & btrfs_wait_ordered_range(inode, inode->i_size &
~(root->sectorsize - 1), (u64)-1); ~(root->sectorsize - 1), (u64)-1);
/* make sure we don't overflow that first page */
if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) {
/* this is really the same as running out of space, where we also return 0 */
printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n");
ret = 0;
goto out_update;
}
/* We need a checksum per page. */
crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
if (!crc)
return -1;
pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
if (!pages) { if (!pages)
kfree(crc);
return -1; return -1;
}
/* Get the cluster for this block_group if it exists */ /* Get the cluster for this block_group if it exists */
if (block_group && !list_empty(&block_group->cluster_list)) if (block_group && !list_empty(&block_group->cluster_list))
...@@ -640,7 +594,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, ...@@ -640,7 +594,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
* know and don't freak out. * know and don't freak out.
*/ */
while (index < num_pages) { while (index < num_pages) {
page = grab_cache_page(inode->i_mapping, index); page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
if (!page) { if (!page) {
int i; int i;
...@@ -648,7 +602,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, ...@@ -648,7 +602,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
unlock_page(pages[i]); unlock_page(pages[i]);
page_cache_release(pages[i]); page_cache_release(pages[i]);
} }
goto out_free; goto out;
} }
pages[index] = page; pages[index] = page;
index++; index++;
...@@ -668,17 +622,11 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, ...@@ -668,17 +622,11 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
/* Write out the extent entries */ /* Write out the extent entries */
do { do {
struct btrfs_free_space_entry *entry; struct btrfs_free_space_entry *entry;
void *addr; void *addr, *orig;
unsigned long offset = 0; unsigned long offset = 0;
unsigned long start_offset = 0;
next_page = false; next_page = false;
if (index == 0) {
start_offset = first_page_offset;
offset = start_offset;
}
if (index >= num_pages) { if (index >= num_pages) {
out_of_space = true; out_of_space = true;
break; break;
...@@ -686,10 +634,26 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, ...@@ -686,10 +634,26 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
page = pages[index]; page = pages[index];
addr = kmap(page); orig = addr = kmap(page);
entry = addr + start_offset; if (index == 0) {
u64 *gen;
memset(addr, 0, PAGE_CACHE_SIZE); /*
* We're going to put in a bogus crc for this page to
* make sure that old kernels who aren't aware of this
* format will be sure to discard the cache.
*/
addr += sizeof(u64);
offset += sizeof(u64);
gen = addr;
*gen = trans->transid;
addr += sizeof(u64);
offset += sizeof(u64);
}
entry = addr;
memset(addr, 0, PAGE_CACHE_SIZE - offset);
while (node && !next_page) { while (node && !next_page) {
struct btrfs_free_space *e; struct btrfs_free_space *e;
...@@ -752,13 +716,19 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, ...@@ -752,13 +716,19 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
next_page = true; next_page = true;
entry++; entry++;
} }
*crc = ~(u32)0;
*crc = btrfs_csum_data(root, addr + start_offset, *crc,
PAGE_CACHE_SIZE - start_offset);
kunmap(page);
btrfs_csum_final(*crc, (char *)crc); /* Generate bogus crc value */
crc++; if (index == 0) {
u32 *tmp;
crc = btrfs_csum_data(root, orig + sizeof(u64), crc,
PAGE_CACHE_SIZE - sizeof(u64));
btrfs_csum_final(crc, (char *)&crc);
crc++;
tmp = orig;
*tmp = crc;
}
kunmap(page);
bytes += PAGE_CACHE_SIZE; bytes += PAGE_CACHE_SIZE;
...@@ -779,11 +749,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, ...@@ -779,11 +749,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
addr = kmap(page); addr = kmap(page);
memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
*crc = ~(u32)0;
*crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
kunmap(page); kunmap(page);
btrfs_csum_final(*crc, (char *)crc);
crc++;
bytes += PAGE_CACHE_SIZE; bytes += PAGE_CACHE_SIZE;
list_del_init(&entry->list); list_del_init(&entry->list);
...@@ -796,7 +762,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, ...@@ -796,7 +762,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
i_size_read(inode) - 1, &cached_state, i_size_read(inode) - 1, &cached_state,
GFP_NOFS); GFP_NOFS);
ret = 0; ret = 0;
goto out_free; goto out;
} }
/* Zero out the rest of the pages just to make sure */ /* Zero out the rest of the pages just to make sure */
...@@ -811,20 +777,6 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, ...@@ -811,20 +777,6 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
index++; index++;
} }
/* Write the checksums and trans id to the first page */
{
void *addr;
u64 *gen;
page = pages[0];
addr = kmap(page);
memcpy(addr, checksums, sizeof(u32) * num_pages);
gen = addr + (sizeof(u32) * num_pages);
*gen = trans->transid;
kunmap(page);
}
ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
bytes, &cached_state); bytes, &cached_state);
btrfs_drop_pages(pages, num_pages); btrfs_drop_pages(pages, num_pages);
...@@ -833,7 +785,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, ...@@ -833,7 +785,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
if (ret) { if (ret) {
ret = 0; ret = 0;
goto out_free; goto out;
} }
BTRFS_I(inode)->generation = trans->transid; BTRFS_I(inode)->generation = trans->transid;
...@@ -850,7 +802,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, ...@@ -850,7 +802,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
goto out_free; goto out;
} }
leaf = path->nodes[0]; leaf = path->nodes[0];
if (ret > 0) { if (ret > 0) {
...@@ -866,7 +818,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, ...@@ -866,7 +818,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
EXTENT_DO_ACCOUNTING, 0, 0, NULL, EXTENT_DO_ACCOUNTING, 0, 0, NULL,
GFP_NOFS); GFP_NOFS);
btrfs_release_path(path); btrfs_release_path(path);
goto out_free; goto out;
} }
} }
header = btrfs_item_ptr(leaf, path->slots[0], header = btrfs_item_ptr(leaf, path->slots[0],
...@@ -879,11 +831,8 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, ...@@ -879,11 +831,8 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
ret = 1; ret = 1;
out_free: out:
kfree(checksums);
kfree(pages); kfree(pages);
out_update:
if (ret != 1) { if (ret != 1) {
invalidate_inode_pages2_range(inode->i_mapping, 0, index); invalidate_inode_pages2_range(inode->i_mapping, 0, index);
BTRFS_I(inode)->generation = 0; BTRFS_I(inode)->generation = 0;
......
...@@ -750,15 +750,6 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start, ...@@ -750,15 +750,6 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
return alloc_hint; return alloc_hint;
} }
static inline bool is_free_space_inode(struct btrfs_root *root,
struct inode *inode)
{
if (root == root->fs_info->tree_root ||
BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
return true;
return false;
}
/* /*
* when extent_io.c finds a delayed allocation range in the file, * when extent_io.c finds a delayed allocation range in the file,
* the call backs end up in this code. The basic idea is to * the call backs end up in this code. The basic idea is to
...@@ -791,7 +782,7 @@ static noinline int cow_file_range(struct inode *inode, ...@@ -791,7 +782,7 @@ static noinline int cow_file_range(struct inode *inode,
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0; int ret = 0;
BUG_ON(is_free_space_inode(root, inode)); BUG_ON(btrfs_is_free_space_inode(root, inode));
trans = btrfs_join_transaction(root); trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans)); BUG_ON(IS_ERR(trans));
trans->block_rsv = &root->fs_info->delalloc_block_rsv; trans->block_rsv = &root->fs_info->delalloc_block_rsv;
...@@ -1072,7 +1063,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, ...@@ -1072,7 +1063,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
path = btrfs_alloc_path(); path = btrfs_alloc_path();
BUG_ON(!path); BUG_ON(!path);
nolock = is_free_space_inode(root, inode); nolock = btrfs_is_free_space_inode(root, inode);
if (nolock) if (nolock)
trans = btrfs_join_transaction_nolock(root); trans = btrfs_join_transaction_nolock(root);
...@@ -1298,7 +1289,9 @@ static int btrfs_split_extent_hook(struct inode *inode, ...@@ -1298,7 +1289,9 @@ static int btrfs_split_extent_hook(struct inode *inode,
if (!(orig->state & EXTENT_DELALLOC)) if (!(orig->state & EXTENT_DELALLOC))
return 0; return 0;
atomic_inc(&BTRFS_I(inode)->outstanding_extents); spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
return 0; return 0;
} }
...@@ -1316,7 +1309,9 @@ static int btrfs_merge_extent_hook(struct inode *inode, ...@@ -1316,7 +1309,9 @@ static int btrfs_merge_extent_hook(struct inode *inode,
if (!(other->state & EXTENT_DELALLOC)) if (!(other->state & EXTENT_DELALLOC))
return 0; return 0;
atomic_dec(&BTRFS_I(inode)->outstanding_extents); spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents--;
spin_unlock(&BTRFS_I(inode)->lock);
return 0; return 0;
} }
...@@ -1337,12 +1332,15 @@ static int btrfs_set_bit_hook(struct inode *inode, ...@@ -1337,12 +1332,15 @@ static int btrfs_set_bit_hook(struct inode *inode,
if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start; u64 len = state->end + 1 - state->start;
bool do_list = !is_free_space_inode(root, inode); bool do_list = !btrfs_is_free_space_inode(root, inode);
if (*bits & EXTENT_FIRST_DELALLOC) if (*bits & EXTENT_FIRST_DELALLOC) {
*bits &= ~EXTENT_FIRST_DELALLOC; *bits &= ~EXTENT_FIRST_DELALLOC;
else } else {
atomic_inc(&BTRFS_I(inode)->outstanding_extents); spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
}
spin_lock(&root->fs_info->delalloc_lock); spin_lock(&root->fs_info->delalloc_lock);
BTRFS_I(inode)->delalloc_bytes += len; BTRFS_I(inode)->delalloc_bytes += len;
...@@ -1370,12 +1368,15 @@ static int btrfs_clear_bit_hook(struct inode *inode, ...@@ -1370,12 +1368,15 @@ static int btrfs_clear_bit_hook(struct inode *inode,
if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start; u64 len = state->end + 1 - state->start;
bool do_list = !is_free_space_inode(root, inode); bool do_list = !btrfs_is_free_space_inode(root, inode);
if (*bits & EXTENT_FIRST_DELALLOC) if (*bits & EXTENT_FIRST_DELALLOC) {
*bits &= ~EXTENT_FIRST_DELALLOC; *bits &= ~EXTENT_FIRST_DELALLOC;
else if (!(*bits & EXTENT_DO_ACCOUNTING)) } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
atomic_dec(&BTRFS_I(inode)->outstanding_extents); spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents--;
spin_unlock(&BTRFS_I(inode)->lock);
}
if (*bits & EXTENT_DO_ACCOUNTING) if (*bits & EXTENT_DO_ACCOUNTING)
btrfs_delalloc_release_metadata(inode, len); btrfs_delalloc_release_metadata(inode, len);
...@@ -1477,7 +1478,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, ...@@ -1477,7 +1478,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
if (is_free_space_inode(root, inode)) if (btrfs_is_free_space_inode(root, inode))
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
else else
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
...@@ -1726,7 +1727,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ...@@ -1726,7 +1727,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
return 0; return 0;
BUG_ON(!ordered_extent); BUG_ON(!ordered_extent);
nolock = is_free_space_inode(root, inode); nolock = btrfs_is_free_space_inode(root, inode);
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
BUG_ON(!list_empty(&ordered_extent->list)); BUG_ON(!list_empty(&ordered_extent->list));
...@@ -2531,13 +2532,6 @@ static void btrfs_read_locked_inode(struct inode *inode) ...@@ -2531,13 +2532,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
inode_item = btrfs_item_ptr(leaf, path->slots[0], inode_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_item); struct btrfs_inode_item);
if (!leaf->map_token)
map_private_extent_buffer(leaf, (unsigned long)inode_item,
sizeof(struct btrfs_inode_item),
&leaf->map_token, &leaf->kaddr,
&leaf->map_start, &leaf->map_len,
KM_USER1);
inode->i_mode = btrfs_inode_mode(leaf, inode_item); inode->i_mode = btrfs_inode_mode(leaf, inode_item);
inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
inode->i_uid = btrfs_inode_uid(leaf, inode_item); inode->i_uid = btrfs_inode_uid(leaf, inode_item);
...@@ -2575,11 +2569,6 @@ static void btrfs_read_locked_inode(struct inode *inode) ...@@ -2575,11 +2569,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
if (!maybe_acls) if (!maybe_acls)
cache_no_acl(inode); cache_no_acl(inode);
if (leaf->map_token) {
unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
leaf->map_token = NULL;
}
btrfs_free_path(path); btrfs_free_path(path);
switch (inode->i_mode & S_IFMT) { switch (inode->i_mode & S_IFMT) {
...@@ -2624,13 +2613,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, ...@@ -2624,13 +2613,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_inode_item *item, struct btrfs_inode_item *item,
struct inode *inode) struct inode *inode)
{ {
if (!leaf->map_token)
map_private_extent_buffer(leaf, (unsigned long)item,
sizeof(struct btrfs_inode_item),
&leaf->map_token, &leaf->kaddr,
&leaf->map_start, &leaf->map_len,
KM_USER1);
btrfs_set_inode_uid(leaf, item, inode->i_uid); btrfs_set_inode_uid(leaf, item, inode->i_uid);
btrfs_set_inode_gid(leaf, item, inode->i_gid); btrfs_set_inode_gid(leaf, item, inode->i_gid);
btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
...@@ -2659,11 +2641,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, ...@@ -2659,11 +2641,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_inode_rdev(leaf, item, inode->i_rdev); btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
btrfs_set_inode_block_group(leaf, item, 0); btrfs_set_inode_block_group(leaf, item, 0);
if (leaf->map_token) {
unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
leaf->map_token = NULL;
}
} }
/* /*
...@@ -2684,7 +2661,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, ...@@ -2684,7 +2661,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
* The data relocation inode should also be directly updated * The data relocation inode should also be directly updated
* without delay * without delay
*/ */
if (!is_free_space_inode(root, inode) if (!btrfs_is_free_space_inode(root, inode)
&& root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
ret = btrfs_delayed_update_inode(trans, root, inode); ret = btrfs_delayed_update_inode(trans, root, inode);
if (!ret) if (!ret)
...@@ -3398,7 +3375,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) ...@@ -3398,7 +3375,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
ret = -ENOMEM; ret = -ENOMEM;
again: again:
page = grab_cache_page(mapping, index); page = find_or_create_page(mapping, index, GFP_NOFS);
if (!page) { if (!page) {
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
goto out; goto out;
...@@ -3634,7 +3611,7 @@ void btrfs_evict_inode(struct inode *inode) ...@@ -3634,7 +3611,7 @@ void btrfs_evict_inode(struct inode *inode)
truncate_inode_pages(&inode->i_data, 0); truncate_inode_pages(&inode->i_data, 0);
if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
is_free_space_inode(root, inode))) btrfs_is_free_space_inode(root, inode)))
goto no_delete; goto no_delete;
if (is_bad_inode(inode)) { if (is_bad_inode(inode)) {
...@@ -4277,7 +4254,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) ...@@ -4277,7 +4254,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
if (BTRFS_I(inode)->dummy_inode) if (BTRFS_I(inode)->dummy_inode)
return 0; return 0;
if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode)) if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode))
nolock = true; nolock = true;
if (wbc->sync_mode == WB_SYNC_ALL) { if (wbc->sync_mode == WB_SYNC_ALL) {
...@@ -6735,8 +6712,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ...@@ -6735,8 +6712,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->index_cnt = (u64)-1; ei->index_cnt = (u64)-1;
ei->last_unlink_trans = 0; ei->last_unlink_trans = 0;
atomic_set(&ei->outstanding_extents, 0); spin_lock_init(&ei->lock);
atomic_set(&ei->reserved_extents, 0); ei->outstanding_extents = 0;
ei->reserved_extents = 0;
ei->ordered_data_close = 0; ei->ordered_data_close = 0;
ei->orphan_meta_reserved = 0; ei->orphan_meta_reserved = 0;
...@@ -6774,8 +6752,8 @@ void btrfs_destroy_inode(struct inode *inode) ...@@ -6774,8 +6752,8 @@ void btrfs_destroy_inode(struct inode *inode)
WARN_ON(!list_empty(&inode->i_dentry)); WARN_ON(!list_empty(&inode->i_dentry));
WARN_ON(inode->i_data.nrpages); WARN_ON(inode->i_data.nrpages);
WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); WARN_ON(BTRFS_I(inode)->outstanding_extents);
WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents)); WARN_ON(BTRFS_I(inode)->reserved_extents);
/* /*
* This can happen where we create an inode, but somebody else also * This can happen where we create an inode, but somebody else also
...@@ -6830,7 +6808,7 @@ int btrfs_drop_inode(struct inode *inode) ...@@ -6830,7 +6808,7 @@ int btrfs_drop_inode(struct inode *inode)
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
if (btrfs_root_refs(&root->root_item) == 0 && if (btrfs_root_refs(&root->root_item) == 0 &&
!is_free_space_inode(root, inode)) !btrfs_is_free_space_inode(root, inode))
return 1; return 1;
else else
return generic_drop_inode(inode); return generic_drop_inode(inode);
......
...@@ -867,8 +867,8 @@ static int cluster_pages_for_defrag(struct inode *inode, ...@@ -867,8 +867,8 @@ static int cluster_pages_for_defrag(struct inode *inode,
/* step one, lock all the pages */ /* step one, lock all the pages */
for (i = 0; i < num_pages; i++) { for (i = 0; i < num_pages; i++) {
struct page *page; struct page *page;
page = grab_cache_page(inode->i_mapping, page = find_or_create_page(inode->i_mapping,
start_index + i); start_index + i, GFP_NOFS);
if (!page) if (!page)
break; break;
...@@ -938,7 +938,9 @@ static int cluster_pages_for_defrag(struct inode *inode, ...@@ -938,7 +938,9 @@ static int cluster_pages_for_defrag(struct inode *inode,
GFP_NOFS); GFP_NOFS);
if (i_done != num_pages) { if (i_done != num_pages) {
atomic_inc(&BTRFS_I(inode)->outstanding_extents); spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
btrfs_delalloc_release_space(inode, btrfs_delalloc_release_space(inode,
(num_pages - i_done) << PAGE_CACHE_SHIFT); (num_pages - i_done) << PAGE_CACHE_SHIFT);
} }
......
...@@ -24,185 +24,197 @@ ...@@ -24,185 +24,197 @@
#include "extent_io.h" #include "extent_io.h"
#include "locking.h" #include "locking.h"
static inline void spin_nested(struct extent_buffer *eb) void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
{
spin_lock(&eb->lock);
}
/* /*
* Setting a lock to blocking will drop the spinlock and set the * if we currently have a spinning reader or writer lock
* flag that forces other procs who want the lock to wait. After * (indicated by the rw flag) this will bump the count
* this you can safely schedule with the lock held. * of blocking holders and drop the spinlock.
*/ */
void btrfs_set_lock_blocking(struct extent_buffer *eb) void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
{ {
if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { if (rw == BTRFS_WRITE_LOCK) {
set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); if (atomic_read(&eb->blocking_writers) == 0) {
spin_unlock(&eb->lock); WARN_ON(atomic_read(&eb->spinning_writers) != 1);
atomic_dec(&eb->spinning_writers);
btrfs_assert_tree_locked(eb);
atomic_inc(&eb->blocking_writers);
write_unlock(&eb->lock);
}
} else if (rw == BTRFS_READ_LOCK) {
btrfs_assert_tree_read_locked(eb);
atomic_inc(&eb->blocking_readers);
WARN_ON(atomic_read(&eb->spinning_readers) == 0);
atomic_dec(&eb->spinning_readers);
read_unlock(&eb->lock);
} }
/* exit with the spin lock released and the bit set */ return;
} }
/* /*
* clearing the blocking flag will take the spinlock again. * if we currently have a blocking lock, take the spinlock
* After this you can't safely schedule * and drop our blocking count
*/ */
void btrfs_clear_lock_blocking(struct extent_buffer *eb) void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
{ {
if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { if (rw == BTRFS_WRITE_LOCK_BLOCKING) {
spin_nested(eb); BUG_ON(atomic_read(&eb->blocking_writers) != 1);
clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); write_lock(&eb->lock);
smp_mb__after_clear_bit(); WARN_ON(atomic_read(&eb->spinning_writers));
atomic_inc(&eb->spinning_writers);
if (atomic_dec_and_test(&eb->blocking_writers))
wake_up(&eb->write_lock_wq);
} else if (rw == BTRFS_READ_LOCK_BLOCKING) {
BUG_ON(atomic_read(&eb->blocking_readers) == 0);
read_lock(&eb->lock);
atomic_inc(&eb->spinning_readers);
if (atomic_dec_and_test(&eb->blocking_readers))
wake_up(&eb->read_lock_wq);
} }
/* exit with the spin lock held */ return;
} }
/* /*
* unfortunately, many of the places that currently set a lock to blocking * take a spinning read lock. This will wait for any blocking
* don't end up blocking for very long, and often they don't block * writers
* at all. For a dbench 50 run, if we don't spin on the blocking bit
* at all, the context switch rate can jump up to 400,000/sec or more.
*
* So, we're still stuck with this crummy spin on the blocking bit,
* at least until the most common causes of the short blocks
* can be dealt with.
*/ */
static int btrfs_spin_on_block(struct extent_buffer *eb) void btrfs_tree_read_lock(struct extent_buffer *eb)
{ {
int i; again:
wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
for (i = 0; i < 512; i++) { read_lock(&eb->lock);
if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) if (atomic_read(&eb->blocking_writers)) {
return 1; read_unlock(&eb->lock);
if (need_resched()) wait_event(eb->write_lock_wq,
break; atomic_read(&eb->blocking_writers) == 0);
cpu_relax(); goto again;
} }
return 0; atomic_inc(&eb->read_locks);
atomic_inc(&eb->spinning_readers);
} }
/* /*
* This is somewhat different from trylock. It will take the * returns 1 if we get the read lock and 0 if we don't
* spinlock but if it finds the lock is set to blocking, it will * this won't wait for blocking writers
* return without the lock held.
*
* returns 1 if it was able to take the lock and zero otherwise
*
* After this call, scheduling is not safe without first calling
* btrfs_set_lock_blocking()
*/ */
int btrfs_try_spin_lock(struct extent_buffer *eb) int btrfs_try_tree_read_lock(struct extent_buffer *eb)
{ {
int i; if (atomic_read(&eb->blocking_writers))
return 0;
if (btrfs_spin_on_block(eb)) { read_lock(&eb->lock);
spin_nested(eb); if (atomic_read(&eb->blocking_writers)) {
if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) read_unlock(&eb->lock);
return 1; return 0;
spin_unlock(&eb->lock);
} }
/* spin for a bit on the BLOCKING flag */ atomic_inc(&eb->read_locks);
for (i = 0; i < 2; i++) { atomic_inc(&eb->spinning_readers);
cpu_relax(); return 1;
if (!btrfs_spin_on_block(eb))
break;
spin_nested(eb);
if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
return 1;
spin_unlock(&eb->lock);
}
return 0;
} }
/* /*
* the autoremove wake function will return 0 if it tried to wake up * returns 1 if we get the read lock and 0 if we don't
* a process that was already awake, which means that process won't * this won't wait for blocking writers or readers
* count as an exclusive wakeup. The waitq code will continue waking
* procs until it finds one that was actually sleeping.
*
* For btrfs, this isn't quite what we want. We want a single proc
* to be notified that the lock is ready for taking. If that proc
* already happen to be awake, great, it will loop around and try for
* the lock.
*
* So, btrfs_wake_function always returns 1, even when the proc that we
* tried to wake up was already awake.
*/ */
static int btrfs_wake_function(wait_queue_t *wait, unsigned mode, int btrfs_try_tree_write_lock(struct extent_buffer *eb)
int sync, void *key)
{ {
autoremove_wake_function(wait, mode, sync, key); if (atomic_read(&eb->blocking_writers) ||
atomic_read(&eb->blocking_readers))
return 0;
write_lock(&eb->lock);
if (atomic_read(&eb->blocking_writers) ||
atomic_read(&eb->blocking_readers)) {
write_unlock(&eb->lock);
return 0;
}
atomic_inc(&eb->write_locks);
atomic_inc(&eb->spinning_writers);
return 1; return 1;
} }
/* /*
* returns with the extent buffer spinlocked. * drop a spinning read lock
* */
* This will spin and/or wait as required to take the lock, and then void btrfs_tree_read_unlock(struct extent_buffer *eb)
* return with the spinlock held. {
* btrfs_assert_tree_read_locked(eb);
* After this call, scheduling is not safe without first calling WARN_ON(atomic_read(&eb->spinning_readers) == 0);
* btrfs_set_lock_blocking() atomic_dec(&eb->spinning_readers);
atomic_dec(&eb->read_locks);
read_unlock(&eb->lock);
}
/*
* drop a blocking read lock
*/
void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
{
btrfs_assert_tree_read_locked(eb);
WARN_ON(atomic_read(&eb->blocking_readers) == 0);
if (atomic_dec_and_test(&eb->blocking_readers))
wake_up(&eb->read_lock_wq);
atomic_dec(&eb->read_locks);
}
/*
* take a spinning write lock. This will wait for both
* blocking readers or writers
*/ */
int btrfs_tree_lock(struct extent_buffer *eb) int btrfs_tree_lock(struct extent_buffer *eb)
{ {
DEFINE_WAIT(wait); again:
wait.func = btrfs_wake_function; wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
if (!btrfs_spin_on_block(eb)) write_lock(&eb->lock);
goto sleep; if (atomic_read(&eb->blocking_readers)) {
write_unlock(&eb->lock);
while(1) { wait_event(eb->read_lock_wq,
spin_nested(eb); atomic_read(&eb->blocking_readers) == 0);
goto again;
/* nobody is blocking, exit with the spinlock held */
if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
return 0;
/*
* we have the spinlock, but the real owner is blocking.
* wait for them
*/
spin_unlock(&eb->lock);
/*
* spin for a bit, and if the blocking flag goes away,
* loop around
*/
cpu_relax();
if (btrfs_spin_on_block(eb))
continue;
sleep:
prepare_to_wait_exclusive(&eb->lock_wq, &wait,
TASK_UNINTERRUPTIBLE);
if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
schedule();
finish_wait(&eb->lock_wq, &wait);
} }
if (atomic_read(&eb->blocking_writers)) {
write_unlock(&eb->lock);
wait_event(eb->write_lock_wq,
atomic_read(&eb->blocking_writers) == 0);
goto again;
}
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_inc(&eb->spinning_writers);
atomic_inc(&eb->write_locks);
return 0; return 0;
} }
/*
* drop a spinning or a blocking write lock.
*/
int btrfs_tree_unlock(struct extent_buffer *eb) int btrfs_tree_unlock(struct extent_buffer *eb)
{ {
/* int blockers = atomic_read(&eb->blocking_writers);
* if we were a blocking owner, we don't have the spinlock held
* just clear the bit and look for waiters BUG_ON(blockers > 1);
*/
if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) btrfs_assert_tree_locked(eb);
smp_mb__after_clear_bit(); atomic_dec(&eb->write_locks);
else
spin_unlock(&eb->lock); if (blockers) {
WARN_ON(atomic_read(&eb->spinning_writers));
if (waitqueue_active(&eb->lock_wq)) atomic_dec(&eb->blocking_writers);
wake_up(&eb->lock_wq); smp_wmb();
wake_up(&eb->write_lock_wq);
} else {
WARN_ON(atomic_read(&eb->spinning_writers) != 1);
atomic_dec(&eb->spinning_writers);
write_unlock(&eb->lock);
}
return 0; return 0;
} }
void btrfs_assert_tree_locked(struct extent_buffer *eb) void btrfs_assert_tree_locked(struct extent_buffer *eb)
{ {
if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) BUG_ON(!atomic_read(&eb->write_locks));
assert_spin_locked(&eb->lock); }
void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
{
BUG_ON(!atomic_read(&eb->read_locks));
} }
...@@ -19,11 +19,43 @@ ...@@ -19,11 +19,43 @@
#ifndef __BTRFS_LOCKING_ #ifndef __BTRFS_LOCKING_
#define __BTRFS_LOCKING_ #define __BTRFS_LOCKING_
#define BTRFS_WRITE_LOCK 1
#define BTRFS_READ_LOCK 2
#define BTRFS_WRITE_LOCK_BLOCKING 3
#define BTRFS_READ_LOCK_BLOCKING 4
int btrfs_tree_lock(struct extent_buffer *eb); int btrfs_tree_lock(struct extent_buffer *eb);
int btrfs_tree_unlock(struct extent_buffer *eb); int btrfs_tree_unlock(struct extent_buffer *eb);
int btrfs_try_spin_lock(struct extent_buffer *eb); int btrfs_try_spin_lock(struct extent_buffer *eb);
void btrfs_set_lock_blocking(struct extent_buffer *eb); void btrfs_tree_read_lock(struct extent_buffer *eb);
void btrfs_clear_lock_blocking(struct extent_buffer *eb); void btrfs_tree_read_unlock(struct extent_buffer *eb);
void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb);
void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw);
void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw);
void btrfs_assert_tree_locked(struct extent_buffer *eb); void btrfs_assert_tree_locked(struct extent_buffer *eb);
int btrfs_try_tree_read_lock(struct extent_buffer *eb);
int btrfs_try_tree_write_lock(struct extent_buffer *eb);
static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
{
if (rw == BTRFS_WRITE_LOCK || rw == BTRFS_WRITE_LOCK_BLOCKING)
btrfs_tree_unlock(eb);
else if (rw == BTRFS_READ_LOCK_BLOCKING)
btrfs_tree_read_unlock_blocking(eb);
else if (rw == BTRFS_READ_LOCK)
btrfs_tree_read_unlock(eb);
else
BUG();
}
static inline void btrfs_set_lock_blocking(struct extent_buffer *eb)
{
btrfs_set_lock_blocking_rw(eb, BTRFS_WRITE_LOCK);
}
static inline void btrfs_clear_lock_blocking(struct extent_buffer *eb)
{
btrfs_clear_lock_blocking_rw(eb, BTRFS_WRITE_LOCK_BLOCKING);
}
#endif #endif
...@@ -2955,7 +2955,8 @@ static int relocate_file_extent_cluster(struct inode *inode, ...@@ -2955,7 +2955,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
page_cache_sync_readahead(inode->i_mapping, page_cache_sync_readahead(inode->i_mapping,
ra, NULL, index, ra, NULL, index,
last_index + 1 - index); last_index + 1 - index);
page = grab_cache_page(inode->i_mapping, index); page = find_or_create_page(inode->i_mapping, index,
GFP_NOFS);
if (!page) { if (!page) {
btrfs_delalloc_release_metadata(inode, btrfs_delalloc_release_metadata(inode,
PAGE_CACHE_SIZE); PAGE_CACHE_SIZE);
......
...@@ -50,36 +50,22 @@ u##bits btrfs_##name(struct extent_buffer *eb, \ ...@@ -50,36 +50,22 @@ u##bits btrfs_##name(struct extent_buffer *eb, \
unsigned long part_offset = (unsigned long)s; \ unsigned long part_offset = (unsigned long)s; \
unsigned long offset = part_offset + offsetof(type, member); \ unsigned long offset = part_offset + offsetof(type, member); \
type *p; \ type *p; \
/* ugly, but we want the fast path here */ \ int err; \
if (eb->map_token && offset >= eb->map_start && \ char *kaddr; \
offset + sizeof(((type *)0)->member) <= eb->map_start + \ unsigned long map_start; \
eb->map_len) { \ unsigned long map_len; \
p = (type *)(eb->kaddr + part_offset - eb->map_start); \ u##bits res; \
return le##bits##_to_cpu(p->member); \ err = map_private_extent_buffer(eb, offset, \
} \ sizeof(((type *)0)->member), \
{ \ &kaddr, &map_start, &map_len); \
int err; \ if (err) { \
char *map_token; \ __le##bits leres; \
char *kaddr; \ read_eb_member(eb, s, type, member, &leres); \
int unmap_on_exit = (eb->map_token == NULL); \ return le##bits##_to_cpu(leres); \
unsigned long map_start; \ } \
unsigned long map_len; \ p = (type *)(kaddr + part_offset - map_start); \
u##bits res; \ res = le##bits##_to_cpu(p->member); \
err = map_extent_buffer(eb, offset, \ return res; \
sizeof(((type *)0)->member), \
&map_token, &kaddr, \
&map_start, &map_len, KM_USER1); \
if (err) { \
__le##bits leres; \
read_eb_member(eb, s, type, member, &leres); \
return le##bits##_to_cpu(leres); \
} \
p = (type *)(kaddr + part_offset - map_start); \
res = le##bits##_to_cpu(p->member); \
if (unmap_on_exit) \
unmap_extent_buffer(eb, map_token, KM_USER1); \
return res; \
} \
} \ } \
void btrfs_set_##name(struct extent_buffer *eb, \ void btrfs_set_##name(struct extent_buffer *eb, \
type *s, u##bits val) \ type *s, u##bits val) \
...@@ -87,36 +73,21 @@ void btrfs_set_##name(struct extent_buffer *eb, \ ...@@ -87,36 +73,21 @@ void btrfs_set_##name(struct extent_buffer *eb, \
unsigned long part_offset = (unsigned long)s; \ unsigned long part_offset = (unsigned long)s; \
unsigned long offset = part_offset + offsetof(type, member); \ unsigned long offset = part_offset + offsetof(type, member); \
type *p; \ type *p; \
/* ugly, but we want the fast path here */ \ int err; \
if (eb->map_token && offset >= eb->map_start && \ char *kaddr; \
offset + sizeof(((type *)0)->member) <= eb->map_start + \ unsigned long map_start; \
eb->map_len) { \ unsigned long map_len; \
p = (type *)(eb->kaddr + part_offset - eb->map_start); \ err = map_private_extent_buffer(eb, offset, \
p->member = cpu_to_le##bits(val); \ sizeof(((type *)0)->member), \
return; \ &kaddr, &map_start, &map_len); \
} \ if (err) { \
{ \ __le##bits val2; \
int err; \ val2 = cpu_to_le##bits(val); \
char *map_token; \ write_eb_member(eb, s, type, member, &val2); \
char *kaddr; \ return; \
int unmap_on_exit = (eb->map_token == NULL); \ } \
unsigned long map_start; \ p = (type *)(kaddr + part_offset - map_start); \
unsigned long map_len; \ p->member = cpu_to_le##bits(val); \
err = map_extent_buffer(eb, offset, \
sizeof(((type *)0)->member), \
&map_token, &kaddr, \
&map_start, &map_len, KM_USER1); \
if (err) { \
__le##bits val2; \
val2 = cpu_to_le##bits(val); \
write_eb_member(eb, s, type, member, &val2); \
return; \
} \
p = (type *)(kaddr + part_offset - map_start); \
p->member = cpu_to_le##bits(val); \
if (unmap_on_exit) \
unmap_extent_buffer(eb, map_token, KM_USER1); \
} \
} }
#include "ctree.h" #include "ctree.h"
...@@ -125,15 +96,6 @@ void btrfs_node_key(struct extent_buffer *eb, ...@@ -125,15 +96,6 @@ void btrfs_node_key(struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr) struct btrfs_disk_key *disk_key, int nr)
{ {
unsigned long ptr = btrfs_node_key_ptr_offset(nr); unsigned long ptr = btrfs_node_key_ptr_offset(nr);
if (eb->map_token && ptr >= eb->map_start &&
ptr + sizeof(*disk_key) <= eb->map_start + eb->map_len) {
memcpy(disk_key, eb->kaddr + ptr - eb->map_start,
sizeof(*disk_key));
return;
} else if (eb->map_token) {
unmap_extent_buffer(eb, eb->map_token, KM_USER1);
eb->map_token = NULL;
}
read_eb_member(eb, (struct btrfs_key_ptr *)ptr, read_eb_member(eb, (struct btrfs_key_ptr *)ptr,
struct btrfs_key_ptr, key, disk_key); struct btrfs_key_ptr, key, disk_key);
} }
...@@ -260,7 +260,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, ...@@ -260,7 +260,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
{ {
struct btrfs_trans_handle *h; struct btrfs_trans_handle *h;
struct btrfs_transaction *cur_trans; struct btrfs_transaction *cur_trans;
int retries = 0; u64 num_bytes = 0;
int ret; int ret;
if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
...@@ -274,6 +274,19 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, ...@@ -274,6 +274,19 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
h->block_rsv = NULL; h->block_rsv = NULL;
goto got_it; goto got_it;
} }
/*
* Do the reservation before we join the transaction so we can do all
* the appropriate flushing if need be.
*/
if (num_items > 0 && root != root->fs_info->chunk_root) {
num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
ret = btrfs_block_rsv_add(NULL, root,
&root->fs_info->trans_block_rsv,
num_bytes);
if (ret)
return ERR_PTR(ret);
}
again: again:
h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
if (!h) if (!h)
...@@ -310,24 +323,9 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, ...@@ -310,24 +323,9 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
goto again; goto again;
} }
if (num_items > 0) { if (num_bytes) {
ret = btrfs_trans_reserve_metadata(h, root, num_items); h->block_rsv = &root->fs_info->trans_block_rsv;
if (ret == -EAGAIN && !retries) { h->bytes_reserved = num_bytes;
retries++;
btrfs_commit_transaction(h, root);
goto again;
} else if (ret == -EAGAIN) {
/*
* We have already retried and got EAGAIN, so really we
* don't have space, so set ret to -ENOSPC.
*/
ret = -ENOSPC;
}
if (ret < 0) {
btrfs_end_transaction(h, root);
return ERR_PTR(ret);
}
} }
got_it: got_it:
...@@ -499,10 +497,17 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, ...@@ -499,10 +497,17 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
} }
if (lock && cur_trans->blocked && !cur_trans->in_commit) { if (lock && cur_trans->blocked && !cur_trans->in_commit) {
if (throttle) if (throttle) {
/*
* We may race with somebody else here so end up having
* to call end_transaction on ourselves again, so inc
* our use_count.
*/
trans->use_count++;
return btrfs_commit_transaction(trans, root); return btrfs_commit_transaction(trans, root);
else } else {
wake_up_process(info->transaction_kthread); wake_up_process(info->transaction_kthread);
}
} }
WARN_ON(cur_trans != info->running_transaction); WARN_ON(cur_trans != info->running_transaction);
......
...@@ -1730,8 +1730,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, ...@@ -1730,8 +1730,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
btrfs_read_buffer(next, ptr_gen); btrfs_read_buffer(next, ptr_gen);
btrfs_tree_lock(next); btrfs_tree_lock(next);
clean_tree_block(trans, root, next);
btrfs_set_lock_blocking(next); btrfs_set_lock_blocking(next);
clean_tree_block(trans, root, next);
btrfs_wait_tree_block_writeback(next); btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next); btrfs_tree_unlock(next);
...@@ -1796,8 +1796,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, ...@@ -1796,8 +1796,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
next = path->nodes[*level]; next = path->nodes[*level];
btrfs_tree_lock(next); btrfs_tree_lock(next);
clean_tree_block(trans, root, next);
btrfs_set_lock_blocking(next); btrfs_set_lock_blocking(next);
clean_tree_block(trans, root, next);
btrfs_wait_tree_block_writeback(next); btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next); btrfs_tree_unlock(next);
...@@ -1864,8 +1864,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, ...@@ -1864,8 +1864,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
next = path->nodes[orig_level]; next = path->nodes[orig_level];
btrfs_tree_lock(next); btrfs_tree_lock(next);
clean_tree_block(trans, log, next);
btrfs_set_lock_blocking(next); btrfs_set_lock_blocking(next);
clean_tree_block(trans, log, next);
btrfs_wait_tree_block_writeback(next); btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next); btrfs_tree_unlock(next);
......
...@@ -3595,7 +3595,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) ...@@ -3595,7 +3595,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
if (!sb) if (!sb)
return -ENOMEM; return -ENOMEM;
btrfs_set_buffer_uptodate(sb); btrfs_set_buffer_uptodate(sb);
btrfs_set_buffer_lockdep_class(sb, 0); btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
array_size = btrfs_super_sys_array_size(super_copy); array_size = btrfs_super_sys_array_size(super_copy);
......
...@@ -102,43 +102,57 @@ static int do_setxattr(struct btrfs_trans_handle *trans, ...@@ -102,43 +102,57 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
if (!path) if (!path)
return -ENOMEM; return -ENOMEM;
/* first lets see if we already have this xattr */ if (flags & XATTR_REPLACE) {
di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name, di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
strlen(name), -1); name_len, -1);
if (IS_ERR(di)) { if (IS_ERR(di)) {
ret = PTR_ERR(di); ret = PTR_ERR(di);
goto out; goto out;
} } else if (!di) {
ret = -ENODATA;
/* ok we already have this xattr, lets remove it */
if (di) {
/* if we want create only exit */
if (flags & XATTR_CREATE) {
ret = -EEXIST;
goto out; goto out;
} }
ret = btrfs_delete_one_dir_name(trans, root, path, di); ret = btrfs_delete_one_dir_name(trans, root, path, di);
BUG_ON(ret); if (ret)
goto out;
btrfs_release_path(path); btrfs_release_path(path);
}
/* if we don't have a value then we are removing the xattr */ again:
if (!value) ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
name, name_len, value, size);
if (ret == -EEXIST) {
if (flags & XATTR_CREATE)
goto out; goto out;
} else { /*
* We can't use the path we already have since we won't have the
* proper locking for a delete, so release the path and
* re-lookup to delete the thing.
*/
btrfs_release_path(path); btrfs_release_path(path);
di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
name, name_len, -1);
if (IS_ERR(di)) {
ret = PTR_ERR(di);
goto out;
} else if (!di) {
/* Shouldn't happen but just in case... */
btrfs_release_path(path);
goto again;
}
if (flags & XATTR_REPLACE) { ret = btrfs_delete_one_dir_name(trans, root, path, di);
/* we couldn't find the attr to replace */ if (ret)
ret = -ENODATA;
goto out; goto out;
/*
* We have a value to set, so go back and try to insert it now.
*/
if (value) {
btrfs_release_path(path);
goto again;
} }
} }
/* ok we have to create a completely new xattr */
ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
name, name_len, value, size);
BUG_ON(ret);
out: out:
btrfs_free_path(path); btrfs_free_path(path);
return ret; return ret;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册