提交 d3b577e2 编写于 作者: K Kirill A. Shutemov 提交者: Yang Yingliang

mm/filemap.c: fix a data race in filemap_fault()

mainline inclusion
from mainline-v5.9-rc1
commit e630bfac
category: bugfix
bugzilla: 41542
CVE: NA

-----------------------------------------------

struct file_ra_state ra.mmap_miss could be accessed concurrently during
page faults as noticed by KCSAN,

 BUG: KCSAN: data-race in filemap_fault / filemap_map_pages

 write to 0xffff9b1700a2c1b4 of 4 bytes by task 3292 on cpu 30:
  filemap_fault+0x920/0xfc0
  do_sync_mmap_readahead at mm/filemap.c:2384
  (inlined by) filemap_fault at mm/filemap.c:2486
  __xfs_filemap_fault+0x112/0x3e0 [xfs]
  xfs_filemap_fault+0x74/0x90 [xfs]
  __do_fault+0x9e/0x220
  do_fault+0x4a0/0x920
  __handle_mm_fault+0xc69/0xd00
  handle_mm_fault+0xfc/0x2f0
  do_page_fault+0x263/0x6f9
  page_fault+0x34/0x40

 read to 0xffff9b1700a2c1b4 of 4 bytes by task 3313 on cpu 32:
  filemap_map_pages+0xc2e/0xd80
  filemap_map_pages at mm/filemap.c:2625
  do_fault+0x3da/0x920
  __handle_mm_fault+0xc69/0xd00
  handle_mm_fault+0xfc/0x2f0
  do_page_fault+0x263/0x6f9
  page_fault+0x34/0x40

 Reported by Kernel Concurrency Sanitizer on:
 CPU: 32 PID: 3313 Comm: systemd-udevd Tainted: G        W    L 5.5.0-next-20200210+ #1
 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019

ra.mmap_miss is used to contribute the readahead decisions, a data race
could be undesirable.  Both the read and write is only under non-exclusive
mmap_sem, two concurrent writers could even underflow the counter.  Fix
the underflow by writing to a local variable before committing a final
store to ra.mmap_miss given a small inaccuracy of the counter should be
acceptable.
Signed-off-by: NKirill A. Shutemov <kirill@shutemov.name>
Signed-off-by: NQian Cai <cai@lca.pw>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Tested-by: NQian Cai <cai@lca.pw>
Reviewed-by: NMatthew Wilcox (Oracle) <willy@infradead.org>
Cc: Marco Elver <elver@google.com>
Link: http://lkml.kernel.org/r/20200211030134.1847-1-cai@lca.pwSigned-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
Conflicts:
	mm/filemap.c
[Peng Liu: cherry-pick from e630bfac]
Signed-off-by: NPeng Liu <liupeng256@huawei.com>
Reviewed-by: Ntong tiangen <tongtiangen@huawei.com>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
上级 08b8578f
...@@ -2530,6 +2530,7 @@ static void do_sync_mmap_readahead(struct vm_area_struct *vma, ...@@ -2530,6 +2530,7 @@ static void do_sync_mmap_readahead(struct vm_area_struct *vma,
pgoff_t offset) pgoff_t offset)
{ {
struct address_space *mapping = file->f_mapping; struct address_space *mapping = file->f_mapping;
unsigned int mmap_miss;
/* If we don't want any read-ahead, don't bother */ /* If we don't want any read-ahead, don't bother */
if (vma->vm_flags & VM_RAND_READ) if (vma->vm_flags & VM_RAND_READ)
...@@ -2544,14 +2545,15 @@ static void do_sync_mmap_readahead(struct vm_area_struct *vma, ...@@ -2544,14 +2545,15 @@ static void do_sync_mmap_readahead(struct vm_area_struct *vma,
} }
/* Avoid banging the cache line if not needed */ /* Avoid banging the cache line if not needed */
if (ra->mmap_miss < MMAP_LOTSAMISS * 10) mmap_miss = READ_ONCE(ra->mmap_miss);
ra->mmap_miss++; if (mmap_miss < MMAP_LOTSAMISS * 10)
WRITE_ONCE(ra->mmap_miss, ++mmap_miss);
/* /*
* Do we miss much more than hit in this file? If so, * Do we miss much more than hit in this file? If so,
* stop bothering with read-ahead. It will only hurt. * stop bothering with read-ahead. It will only hurt.
*/ */
if (ra->mmap_miss > MMAP_LOTSAMISS) if (mmap_miss > MMAP_LOTSAMISS)
return; return;
/* /*
...@@ -2574,12 +2576,14 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma, ...@@ -2574,12 +2576,14 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma,
pgoff_t offset) pgoff_t offset)
{ {
struct address_space *mapping = file->f_mapping; struct address_space *mapping = file->f_mapping;
unsigned int mmap_miss;
/* If we don't want any read-ahead, don't bother */ /* If we don't want any read-ahead, don't bother */
if (vma->vm_flags & VM_RAND_READ) if (vma->vm_flags & VM_RAND_READ)
return; return;
if (ra->mmap_miss > 0) mmap_miss = READ_ONCE(ra->mmap_miss);
ra->mmap_miss--; if (mmap_miss)
WRITE_ONCE(ra->mmap_miss, --mmap_miss);
if (PageReadahead(page)) if (PageReadahead(page))
page_cache_async_readahead(mapping, ra, file, page_cache_async_readahead(mapping, ra, file,
page, offset, ra->ra_pages); page, offset, ra->ra_pages);
...@@ -2739,6 +2743,7 @@ void filemap_map_pages(struct vm_fault *vmf, ...@@ -2739,6 +2743,7 @@ void filemap_map_pages(struct vm_fault *vmf,
pgoff_t last_pgoff = start_pgoff; pgoff_t last_pgoff = start_pgoff;
unsigned long max_idx; unsigned long max_idx;
struct page *head, *page; struct page *head, *page;
unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
rcu_read_lock(); rcu_read_lock();
radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start_pgoff) { radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start_pgoff) {
...@@ -2786,8 +2791,8 @@ void filemap_map_pages(struct vm_fault *vmf, ...@@ -2786,8 +2791,8 @@ void filemap_map_pages(struct vm_fault *vmf,
if (page->index >= max_idx) if (page->index >= max_idx)
goto unlock; goto unlock;
if (file->f_ra.mmap_miss > 0) if (mmap_miss > 0)
file->f_ra.mmap_miss--; mmap_miss--;
vmf->address += (iter.index - last_pgoff) << PAGE_SHIFT; vmf->address += (iter.index - last_pgoff) << PAGE_SHIFT;
if (vmf->pte) if (vmf->pte)
...@@ -2809,6 +2814,7 @@ void filemap_map_pages(struct vm_fault *vmf, ...@@ -2809,6 +2814,7 @@ void filemap_map_pages(struct vm_fault *vmf,
break; break;
} }
rcu_read_unlock(); rcu_read_unlock();
WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss);
} }
EXPORT_SYMBOL(filemap_map_pages); EXPORT_SYMBOL(filemap_map_pages);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册