提交 d3f71ae7 编写于 作者: L Linus Torvalds

Merge branch 'for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
 "Dave had a small collection of fixes to the new free space tree code,
  one of which was keeping our sysfs files more up to date with feature
  bits as different things get enabled (lzo, raid5/6, etc).

  I should have kept the sysfs stuff for rc3, since we always manage to
  trip over something.  This time it was GFP_KERNEL from somewhere that
  is NOFS only.  Instead of rebasing it out I've put a revert in, and
  we'll fix it properly for rc3.

  Otherwise, Filipe fixed a btrfs DIO race and Qu Wenruo fixed up a
  use-after-free in our tracepoints that Dave Jones reported"

* 'for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Revert "btrfs: synchronize incompat feature bits with sysfs files"
  btrfs: don't use GFP_HIGHMEM for free-space-tree bitmap kzalloc
  btrfs: sysfs: check initialization state before updating features
  Revert "btrfs: clear PF_NOFREEZE in cleaner_kthread()"
  btrfs: async-thread: Fix a use-after-free error for trace
  Btrfs: fix race between fsync and lockless direct IO writes
  btrfs: add free space tree to the cow-only list
  btrfs: add free space tree to lockdep classes
  btrfs: tweak free space tree bitmap allocation
  btrfs: tests: switch to GFP_KERNEL
  btrfs: synchronize incompat feature bits with sysfs files
  btrfs: sysfs: introduce helper for syncing bits with sysfs files
  btrfs: sysfs: add free-space-tree bit attribute
  btrfs: sysfs: fix typo in compat_ro attribute definition
......@@ -328,8 +328,8 @@ static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq,
list_add_tail(&work->ordered_list, &wq->ordered_list);
spin_unlock_irqrestore(&wq->list_lock, flags);
}
queue_work(wq->normal_wq, &work->normal_work);
trace_btrfs_work_queued(work);
queue_work(wq->normal_wq, &work->normal_work);
}
void btrfs_queue_work(struct btrfs_workqueue *wq,
......
......@@ -182,6 +182,7 @@ static struct btrfs_lockdep_keyset {
{ .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
{ .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" },
{ .id = BTRFS_FREE_SPACE_TREE_OBJECTID, .name_stem = "free-space" },
{ .id = 0, .name_stem = "tree" },
};
......@@ -1787,7 +1788,6 @@ static int cleaner_kthread(void *arg)
int again;
struct btrfs_trans_handle *trans;
set_freezable();
do {
again = 0;
......
......@@ -153,6 +153,20 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
static unsigned long *alloc_bitmap(u32 bitmap_size)
{
void *mem;
/*
* The allocation size varies, observed numbers were < 4K up to 16K.
* Using vmalloc unconditionally would be too heavy, we'll try
* contiguous allocations first.
*/
if (bitmap_size <= PAGE_SIZE)
return kzalloc(bitmap_size, GFP_NOFS);
mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN);
if (mem)
return mem;
return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO,
PAGE_KERNEL);
}
......@@ -289,7 +303,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
ret = 0;
out:
vfree(bitmap);
kvfree(bitmap);
if (ret)
btrfs_abort_transaction(trans, root, ret);
return ret;
......@@ -438,7 +452,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
ret = 0;
out:
vfree(bitmap);
kvfree(bitmap);
if (ret)
btrfs_abort_transaction(trans, root, ret);
return ret;
......
......@@ -7116,21 +7116,41 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
if (ret)
return ERR_PTR(ret);
em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
ins.offset, ins.offset, ins.offset, 0);
if (IS_ERR(em)) {
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
return em;
}
/*
* Create the ordered extent before the extent map. This is to avoid
* races with the fast fsync path that would lead to it logging file
* extent items that point to disk extents that were not yet written to.
* The fast fsync path collects ordered extents into a local list and
* then collects all the new extent maps, so we must create the ordered
* extent first and make sure the fast fsync path collects any new
* ordered extents after collecting new extent maps as well.
* The fsync path simply can not rely on inode_dio_wait() because it
* causes deadlock with AIO.
*/
ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
ins.offset, ins.offset, 0);
if (ret) {
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
free_extent_map(em);
return ERR_PTR(ret);
}
em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
ins.offset, ins.offset, ins.offset, 0);
if (IS_ERR(em)) {
struct btrfs_ordered_extent *oe;
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
oe = btrfs_lookup_ordered_extent(inode, start);
ASSERT(oe);
if (WARN_ON(!oe))
return em;
set_bit(BTRFS_ORDERED_IOERR, &oe->flags);
set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags);
btrfs_remove_ordered_extent(inode, oe);
/* Once for our lookup and once for the ordered extents tree. */
btrfs_put_ordered_extent(oe);
btrfs_put_ordered_extent(oe);
}
return em;
}
......
......@@ -575,7 +575,8 @@ static int is_cowonly_root(u64 root_objectid)
root_objectid == BTRFS_TREE_LOG_OBJECTID ||
root_objectid == BTRFS_CSUM_TREE_OBJECTID ||
root_objectid == BTRFS_UUID_TREE_OBJECTID ||
root_objectid == BTRFS_QUOTA_TREE_OBJECTID)
root_objectid == BTRFS_QUOTA_TREE_OBJECTID ||
root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
return 1;
return 0;
}
......
......@@ -202,6 +202,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF);
BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
static struct attribute *btrfs_supported_feature_attrs[] = {
BTRFS_FEAT_ATTR_PTR(mixed_backref),
......@@ -213,6 +214,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
BTRFS_FEAT_ATTR_PTR(raid56),
BTRFS_FEAT_ATTR_PTR(skinny_metadata),
BTRFS_FEAT_ATTR_PTR(no_holes),
BTRFS_FEAT_ATTR_PTR(free_space_tree),
NULL
};
......@@ -780,6 +782,39 @@ int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info)
return error;
}
/*
* Change per-fs features in /sys/fs/btrfs/UUID/features to match current
* values in superblock. Call after any changes to incompat/compat_ro flags
*/
void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
u64 bit, enum btrfs_feature_set set)
{
struct btrfs_fs_devices *fs_devs;
struct kobject *fsid_kobj;
u64 features;
int ret;
if (!fs_info)
return;
features = get_features(fs_info, set);
ASSERT(bit & supported_feature_masks[set]);
fs_devs = fs_info->fs_devices;
fsid_kobj = &fs_devs->fsid_kobj;
if (!fsid_kobj->state_initialized)
return;
/*
* FIXME: this is too heavy to update just one value, ideally we'd like
* to use sysfs_update_group but some refactoring is needed first.
*/
sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group);
ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group);
}
static int btrfs_init_debugfs(void)
{
#ifdef CONFIG_DEBUG_FS
......
......@@ -56,7 +56,7 @@ static struct btrfs_feature_attr btrfs_attr_##_name = { \
#define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
#define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \
BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature)
BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT_RO, feature)
#define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \
BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
......@@ -90,4 +90,7 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
struct kobject *parent);
int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs);
void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
u64 bit, enum btrfs_feature_set set);
#endif /* _BTRFS_SYSFS_H_ */
......@@ -82,18 +82,18 @@ void btrfs_destroy_test_fs(void)
struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void)
{
struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
GFP_NOFS);
GFP_KERNEL);
if (!fs_info)
return fs_info;
fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
GFP_NOFS);
GFP_KERNEL);
if (!fs_info->fs_devices) {
kfree(fs_info);
return NULL;
}
fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block),
GFP_NOFS);
GFP_KERNEL);
if (!fs_info->super_copy) {
kfree(fs_info->fs_devices);
kfree(fs_info);
......@@ -180,11 +180,11 @@ btrfs_alloc_dummy_block_group(unsigned long length)
{
struct btrfs_block_group_cache *cache;
cache = kzalloc(sizeof(*cache), GFP_NOFS);
cache = kzalloc(sizeof(*cache), GFP_KERNEL);
if (!cache)
return NULL;
cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
GFP_NOFS);
GFP_KERNEL);
if (!cache->free_space_ctl) {
kfree(cache);
return NULL;
......
......@@ -94,7 +94,7 @@ static int test_find_delalloc(void)
* test.
*/
for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
if (!page) {
test_msg("Failed to allocate test page\n");
ret = -ENOMEM;
......@@ -113,7 +113,7 @@ static int test_find_delalloc(void)
* |--- delalloc ---|
* |--- search ---|
*/
set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS);
set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_KERNEL);
start = 0;
end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
......@@ -144,7 +144,7 @@ static int test_find_delalloc(void)
test_msg("Couldn't find the locked page\n");
goto out_bits;
}
set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS);
set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_KERNEL);
start = test_start;
end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
......@@ -199,7 +199,7 @@ static int test_find_delalloc(void)
*
* We are re-using our test_start from above since it works out well.
*/
set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS);
set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_KERNEL);
start = test_start;
end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
......@@ -262,7 +262,7 @@ static int test_find_delalloc(void)
}
ret = 0;
out_bits:
clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS);
clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL);
out:
if (locked_page)
page_cache_release(locked_page);
......@@ -360,7 +360,7 @@ static int test_eb_bitmaps(void)
test_msg("Running extent buffer bitmap tests\n");
bitmap = kmalloc(len, GFP_NOFS);
bitmap = kmalloc(len, GFP_KERNEL);
if (!bitmap) {
test_msg("Couldn't allocate test bitmap\n");
return -ENOMEM;
......
......@@ -974,7 +974,7 @@ static int test_extent_accounting(void)
(BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
EXTENT_DELALLOC | EXTENT_DIRTY |
EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
NULL, GFP_NOFS);
NULL, GFP_KERNEL);
if (ret) {
test_msg("clear_extent_bit returned %d\n", ret);
goto out;
......@@ -1045,7 +1045,7 @@ static int test_extent_accounting(void)
BTRFS_MAX_EXTENT_SIZE+8191,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
NULL, GFP_NOFS);
NULL, GFP_KERNEL);
if (ret) {
test_msg("clear_extent_bit returned %d\n", ret);
goto out;
......@@ -1079,7 +1079,7 @@ static int test_extent_accounting(void)
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
NULL, GFP_NOFS);
NULL, GFP_KERNEL);
if (ret) {
test_msg("clear_extent_bit returned %d\n", ret);
goto out;
......@@ -1096,7 +1096,7 @@ static int test_extent_accounting(void)
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
NULL, GFP_NOFS);
NULL, GFP_KERNEL);
iput(inode);
btrfs_free_dummy_root(root);
return ret;
......
......@@ -4127,7 +4127,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
struct inode *inode,
struct btrfs_path *path,
struct list_head *logged_list,
struct btrfs_log_ctx *ctx)
struct btrfs_log_ctx *ctx,
const u64 start,
const u64 end)
{
struct extent_map *em, *n;
struct list_head extents;
......@@ -4166,7 +4168,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
}
list_sort(NULL, &extents, extent_cmp);
/*
* Collect any new ordered extents within the range. This is to
* prevent logging file extent items without waiting for the disk
* location they point to being written. We do this only to deal
* with races against concurrent lockless direct IO writes.
*/
btrfs_get_logged_extents(inode, logged_list, start, end);
process:
while (!list_empty(&extents)) {
em = list_entry(extents.next, struct extent_map, list);
......@@ -4701,7 +4709,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
goto out_unlock;
}
ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
&logged_list, ctx);
&logged_list, ctx, start, end);
if (ret) {
err = ret;
goto out_unlock;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册