提交 fb4c3a27 编写于 作者: G Gao Xiang 提交者: Xie XiuQi

staging: erofs: fix race when the managed cache is enabled

commit 51232df5 upstream.

When the managed cache is enabled, the last reference count
of a workgroup must be used for its workstation.

Otherwise, it could lead to incorrect (un)freezes in
the reclaim path, and it would be harmful.

A typical race as follows:

Thread 1 (In the reclaim path)  Thread 2
workgroup_freeze(grp, 1)                                refcnt = 1
...
workgroup_unfreeze(grp, 1)                              refcnt = 1
                                workgroup_get(grp)      refcnt = 2 (x)
workgroup_put(grp)                                      refcnt = 1 (x)
                                ...unexpected behaviors

* grp is detached but still used, which violates cache-managed
  freeze constraint.
Reviewed-by: NChao Yu <yuchao0@huawei.com>
Signed-off-by: NGao Xiang <gaoxiang25@huawei.com>
Signed-off-by: NGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
上级 66cf9622
......@@ -260,6 +260,7 @@ static inline bool erofs_workgroup_get(struct erofs_workgroup *grp, int *ocnt)
}
#define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount)
#define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount)
extern int erofs_workgroup_put(struct erofs_workgroup *grp);
......
......@@ -87,12 +87,21 @@ int erofs_register_workgroup(struct super_block *sb,
grp = (void *)((unsigned long)grp |
1UL << RADIX_TREE_EXCEPTIONAL_SHIFT);
err = radix_tree_insert(&sbi->workstn_tree,
grp->index, grp);
/*
* Bump up reference count before making this workgroup
* visible to other users in order to avoid potential UAF
* without serialized by erofs_workstn_lock.
*/
__erofs_workgroup_get(grp);
if (!err) {
__erofs_workgroup_get(grp);
}
err = radix_tree_insert(&sbi->workstn_tree,
grp->index, grp);
if (unlikely(err))
/*
* it's safe to decrease since the workgroup isn't visible
* and refcount >= 2 (cannot be freezed).
*/
__erofs_workgroup_put(grp);
erofs_workstn_unlock(sbi);
radix_tree_preload_end();
......@@ -101,19 +110,99 @@ int erofs_register_workgroup(struct super_block *sb,
extern void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
static void __erofs_workgroup_free(struct erofs_workgroup *grp)
{
atomic_long_dec(&erofs_global_shrink_cnt);
erofs_workgroup_free_rcu(grp);
}
int erofs_workgroup_put(struct erofs_workgroup *grp)
{
int count = atomic_dec_return(&grp->refcount);
if (count == 1)
atomic_long_inc(&erofs_global_shrink_cnt);
else if (!count) {
atomic_long_dec(&erofs_global_shrink_cnt);
erofs_workgroup_free_rcu(grp);
}
else if (!count)
__erofs_workgroup_free(grp);
return count;
}
#ifdef EROFS_FS_HAS_MANAGED_CACHE
/* for cache-managed case, customized reclaim paths exist */
static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp)
{
erofs_workgroup_unfreeze(grp, 0);
__erofs_workgroup_free(grp);
}
bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
struct erofs_workgroup *grp,
bool cleanup)
{
void *entry;
/*
* for managed cache enabled, the refcount of workgroups
* themselves could be < 0 (freezed). So there is no guarantee
* that all refcount > 0 if managed cache is enabled.
*/
if (!erofs_workgroup_try_to_freeze(grp, 1))
return false;
/*
* note that all cached pages should be unlinked
* before delete it from the radix tree.
* Otherwise some cached pages of an orphan old workgroup
* could be still linked after the new one is available.
*/
if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
erofs_workgroup_unfreeze(grp, 1);
return false;
}
/*
* it is impossible to fail after the workgroup is freezed,
* however in order to avoid some race conditions, add a
* DBG_BUGON to observe this in advance.
*/
entry = radix_tree_delete(&sbi->workstn_tree, grp->index);
DBG_BUGON((void *)((unsigned long)entry &
~RADIX_TREE_EXCEPTIONAL_ENTRY) != grp);
/*
* if managed cache is enable, the last refcount
* should indicate the related workstation.
*/
erofs_workgroup_unfreeze_final(grp);
return true;
}
#else
/* for nocache case, no customized reclaim path at all */
bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
struct erofs_workgroup *grp,
bool cleanup)
{
int cnt = atomic_read(&grp->refcount);
void *entry;
DBG_BUGON(cnt <= 0);
DBG_BUGON(cleanup && cnt != 1);
if (cnt > 1)
return false;
entry = radix_tree_delete(&sbi->workstn_tree, grp->index);
DBG_BUGON((void *)((unsigned long)entry &
~RADIX_TREE_EXCEPTIONAL_ENTRY) != grp);
/* (rarely) could be grabbed again when freeing */
erofs_workgroup_put(grp);
return true;
}
#endif
unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
unsigned long nr_shrink,
bool cleanup)
......@@ -130,44 +219,16 @@ unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
batch, first_index, PAGEVEC_SIZE);
for (i = 0; i < found; ++i) {
int cnt;
struct erofs_workgroup *grp = (void *)
((unsigned long)batch[i] &
~RADIX_TREE_EXCEPTIONAL_ENTRY);
first_index = grp->index + 1;
cnt = atomic_read(&grp->refcount);
BUG_ON(cnt <= 0);
if (cleanup)
BUG_ON(cnt != 1);
#ifndef EROFS_FS_HAS_MANAGED_CACHE
else if (cnt > 1)
#else
if (!erofs_workgroup_try_to_freeze(grp, 1))
#endif
/* try to shrink each valid workgroup */
if (!erofs_try_to_release_workgroup(sbi, grp, cleanup))
continue;
if (radix_tree_delete(&sbi->workstn_tree,
grp->index) != grp) {
#ifdef EROFS_FS_HAS_MANAGED_CACHE
skip:
erofs_workgroup_unfreeze(grp, 1);
#endif
continue;
}
#ifdef EROFS_FS_HAS_MANAGED_CACHE
if (erofs_try_to_free_all_cached_pages(sbi, grp))
goto skip;
erofs_workgroup_unfreeze(grp, 1);
#endif
/* (rarely) grabbed again when freeing */
erofs_workgroup_put(grp);
++freed;
if (unlikely(!--nr_shrink))
break;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册