提交 890e730d 编写于 作者: L Li Lingfeng

dm thin: Fix ABBA deadlock by resetting dm_bufio_client

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I79ZEK
CVE: NA

--------------------------------

As described in commit d0dcee7d ("dm thin: Fix ABBA deadlock between
shrink_slab and dm_pool_abort_metadata"), ABBA deadlock will be triggered
since shrinker_rwsem need to be held when operations failed on dm pool
metadata.

We have noticed the following three problem scenarios:
1) Described by commit d0dcee7d ("dm thin: Fix ABBA deadlock between
shrink_slab and dm_pool_abort_metadata")

2) shrinker_rwsem and throttle->lock
          P1(drop cache)                        P2(kworker)
drop_caches_sysctl_handler
 drop_slab
  shrink_slab
   down_read(&shrinker_rwsem)  - LOCK A
   do_shrink_slab
    super_cache_scan
     prune_icache_sb
      dispose_list
       evict
        ext4_evict_inode
         ext4_clear_inode
          ext4_discard_preallocations
           ext4_mb_load_buddy_gfp
            ext4_mb_init_cache
             ext4_wait_block_bitmap
              __ext4_error
               ext4_handle_error
                ext4_commit_super
                 ...
                 dm_submit_bio
                                     do_worker
                                      throttle_work_update
                                       down_write(&t->lock) -- LOCK B
                                      process_deferred_bios
                                       commit
                                        metadata_operation_failed
                                         dm_pool_abort_metadata
                                          dm_block_manager_create
                                           dm_bufio_client_create
                                            register_shrinker
                                             down_write(&shrinker_rwsem)
                                             -- LOCK A
                 thin_map
                  thin_bio_map
                   thin_defer_bio_with_throttle
                    throttle_lock
                     down_read(&t->lock)  - LOCK B

3) shrinker_rwsem and wait_on_buffer
          P1(drop cache)                            P2(kworker)
drop_caches_sysctl_handler
 drop_slab
  shrink_slab
   down_read(&shrinker_rwsem)  - LOCK A
   do_shrink_slab
   ...
    ext4_wait_block_bitmap
     __ext4_error
      ext4_handle_error
       jbd2_journal_abort
        jbd2_journal_update_sb_errno
         jbd2_write_superblock
          submit_bh
           // LOCK B
           // RELEASE B
                             do_worker
                              throttle_work_update
                               down_write(&t->lock) - LOCK B
                              process_deferred_bios
                               process_bio
                               commit
                                metadata_operation_failed
                                 dm_pool_abort_metadata
                                  dm_block_manager_create
                                   dm_bufio_client_create
                                    register_shrinker
                                     register_shrinker_prepared
                                      down_write(&shrinker_rwsem)  - LOCK A
                               bio_endio
      wait_on_buffer
       __wait_on_buffer

Fix these by resetting dm_bufio_client without holding shrinker_rwsem.
Signed-off-by: NLi Lingfeng <lilingfeng3@huawei.com>
上级 6bc20a2c
......@@ -1883,6 +1883,13 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c)
}
EXPORT_SYMBOL_GPL(dm_bufio_client_destroy);
void dm_bufio_client_reset(struct dm_bufio_client *c)
{
drop_buffers(c);
flush_work(&c->shrink_work);
}
EXPORT_SYMBOL_GPL(dm_bufio_client_reset);
void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start)
{
c->start = start;
......
......@@ -574,6 +574,8 @@ static int __format_metadata(struct dm_pool_metadata *pmd)
r = dm_tm_create_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION,
&pmd->tm, &pmd->metadata_sm);
if (r < 0) {
pmd->tm = NULL;
pmd->metadata_sm = NULL;
DMERR("tm_create_with_sm failed");
return r;
}
......@@ -582,6 +584,7 @@ static int __format_metadata(struct dm_pool_metadata *pmd)
if (IS_ERR(pmd->data_sm)) {
DMERR("sm_disk_create failed");
r = PTR_ERR(pmd->data_sm);
pmd->data_sm = NULL;
goto bad_cleanup_tm;
}
......@@ -612,11 +615,15 @@ static int __format_metadata(struct dm_pool_metadata *pmd)
bad_cleanup_nb_tm:
dm_tm_destroy(pmd->nb_tm);
pmd->nb_tm = NULL;
bad_cleanup_data_sm:
dm_sm_destroy(pmd->data_sm);
pmd->data_sm = NULL;
bad_cleanup_tm:
dm_tm_destroy(pmd->tm);
pmd->tm = NULL;
dm_sm_destroy(pmd->metadata_sm);
pmd->metadata_sm = NULL;
return r;
}
......@@ -682,6 +689,8 @@ static int __open_metadata(struct dm_pool_metadata *pmd)
sizeof(disk_super->metadata_space_map_root),
&pmd->tm, &pmd->metadata_sm);
if (r < 0) {
pmd->tm = NULL;
pmd->metadata_sm = NULL;
DMERR("tm_open_with_sm failed");
goto bad_unlock_sblock;
}
......@@ -691,6 +700,7 @@ static int __open_metadata(struct dm_pool_metadata *pmd)
if (IS_ERR(pmd->data_sm)) {
DMERR("sm_disk_open failed");
r = PTR_ERR(pmd->data_sm);
pmd->data_sm = NULL;
goto bad_cleanup_tm;
}
......@@ -717,9 +727,12 @@ static int __open_metadata(struct dm_pool_metadata *pmd)
bad_cleanup_data_sm:
dm_sm_destroy(pmd->data_sm);
pmd->data_sm = NULL;
bad_cleanup_tm:
dm_tm_destroy(pmd->tm);
pmd->tm = NULL;
dm_sm_destroy(pmd->metadata_sm);
pmd->metadata_sm = NULL;
bad_unlock_sblock:
dm_bm_unlock(sblock);
......@@ -766,9 +779,13 @@ static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd,
bool destroy_bm)
{
dm_sm_destroy(pmd->data_sm);
pmd->data_sm = NULL;
dm_sm_destroy(pmd->metadata_sm);
pmd->metadata_sm = NULL;
dm_tm_destroy(pmd->nb_tm);
pmd->nb_tm = NULL;
dm_tm_destroy(pmd->tm);
pmd->tm = NULL;
if (destroy_bm)
dm_block_manager_destroy(pmd->bm);
}
......@@ -976,8 +993,7 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd)
__func__, r);
}
pmd_write_unlock(pmd);
if (!pmd->fail_io)
__destroy_persistent_data_objects(pmd, true);
__destroy_persistent_data_objects(pmd, true);
kfree(pmd);
return 0;
......@@ -1884,53 +1900,30 @@ static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd)
int dm_pool_abort_metadata(struct dm_pool_metadata *pmd)
{
int r = -EINVAL;
struct dm_block_manager *old_bm = NULL, *new_bm = NULL;
/* fail_io is double-checked with pmd->root_lock held below */
if (unlikely(pmd->fail_io))
return r;
/*
* Replacement block manager (new_bm) is created and old_bm destroyed outside of
* pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
* shrinker associated with the block manager's bufio client vs pmd root_lock).
* - must take shrinker_rwsem without holding pmd->root_lock
*/
new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
THIN_MAX_CONCURRENT_LOCKS);
pmd_write_lock(pmd);
if (pmd->fail_io) {
pmd_write_unlock(pmd);
goto out;
return r;
}
__set_abort_with_changes_flags(pmd);
/* destroy data_sm/metadata_sm/nb_tm/tm */
__destroy_persistent_data_objects(pmd, false);
old_bm = pmd->bm;
if (IS_ERR(new_bm)) {
DMERR("could not create block manager during abort");
pmd->bm = NULL;
r = PTR_ERR(new_bm);
goto out_unlock;
}
pmd->bm = new_bm;
/* reset bm */
dm_block_manager_reset(pmd->bm);
/* rebuild data_sm/metadata_sm/nb_tm/tm */
r = __open_or_format_metadata(pmd, false);
if (r) {
pmd->bm = NULL;
goto out_unlock;
}
new_bm = NULL;
out_unlock:
if (r)
pmd->fail_io = true;
pmd_write_unlock(pmd);
dm_block_manager_destroy(old_bm);
out:
if (new_bm && !IS_ERR(new_bm))
dm_block_manager_destroy(new_bm);
pmd_write_unlock(pmd);
return r;
}
......
......@@ -414,6 +414,12 @@ void dm_block_manager_destroy(struct dm_block_manager *bm)
}
EXPORT_SYMBOL_GPL(dm_block_manager_destroy);
void dm_block_manager_reset(struct dm_block_manager *bm)
{
dm_bufio_client_reset(bm->bufio);
}
EXPORT_SYMBOL_GPL(dm_block_manager_reset);
unsigned dm_bm_block_size(struct dm_block_manager *bm)
{
return dm_bufio_get_block_size(bm->bufio);
......
......@@ -35,6 +35,7 @@ struct dm_block_manager *dm_block_manager_create(
struct block_device *bdev, unsigned block_size,
unsigned max_held_per_thread);
void dm_block_manager_destroy(struct dm_block_manager *bm);
void dm_block_manager_reset(struct dm_block_manager *bm);
unsigned dm_bm_block_size(struct dm_block_manager *bm);
dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm);
......
......@@ -76,7 +76,8 @@ struct dm_space_map {
static inline void dm_sm_destroy(struct dm_space_map *sm)
{
sm->destroy(sm);
if (sm)
sm->destroy(sm);
}
static inline int dm_sm_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
......
......@@ -197,6 +197,9 @@ EXPORT_SYMBOL_GPL(dm_tm_create_non_blocking_clone);
void dm_tm_destroy(struct dm_transaction_manager *tm)
{
if (!tm)
return;
if (!tm->is_clone)
wipe_shadow_table(tm);
......
......@@ -31,6 +31,8 @@ dm_bufio_client_create(struct block_device *bdev, unsigned block_size,
*/
void dm_bufio_client_destroy(struct dm_bufio_client *c);
void dm_bufio_client_reset(struct dm_bufio_client *c);
/*
* Set the sector range.
* When this function is called, there must be no I/O in progress on the bufio
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册