提交 56c29979 编写于 作者: L Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "10 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  mm: move pcp and lru-pcp draining into single wq
  mailmap: update Yakir Yang email address
  mm, swap_cgroup: reschedule when neeed in swap_cgroup_swapoff()
  dax: fix radix tree insertion race
  mm, thp: fix setting of defer+madvise thp defrag mode
  ptrace: fix PTRACE_LISTEN race corrupting task->state
  vmlinux.lds: add missing VMLINUX_SYMBOL macros
  mm/page_alloc.c: fix print order in show_free_areas()
  userfaultfd: report actual registered features in fdinfo
  mm: fix page_vma_mapped_walk() for ksm pages
...@@ -171,6 +171,7 @@ Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com> ...@@ -171,6 +171,7 @@ Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com>
Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@virtuozzo.com> Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@virtuozzo.com>
Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@parallels.com> Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@parallels.com>
Takashi YOSHII <takashi.yoshii.zj@renesas.com> Takashi YOSHII <takashi.yoshii.zj@renesas.com>
Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com>
Yusuke Goda <goda.yusuke@renesas.com> Yusuke Goda <goda.yusuke@renesas.com>
Gustavo Padovan <gustavo@las.ic.unicamp.br> Gustavo Padovan <gustavo@las.ic.unicamp.br>
Gustavo Padovan <padovan@profusion.mobi> Gustavo Padovan <padovan@profusion.mobi>
...@@ -373,6 +373,22 @@ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index, ...@@ -373,6 +373,22 @@ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
} }
spin_lock_irq(&mapping->tree_lock); spin_lock_irq(&mapping->tree_lock);
if (!entry) {
/*
* We needed to drop the page_tree lock while calling
* radix_tree_preload() and we didn't have an entry to
* lock. See if another thread inserted an entry at
* our index during this time.
*/
entry = __radix_tree_lookup(&mapping->page_tree, index,
NULL, &slot);
if (entry) {
radix_tree_preload_end();
spin_unlock_irq(&mapping->tree_lock);
goto restart;
}
}
if (pmd_downgrade) { if (pmd_downgrade) {
radix_tree_delete(&mapping->page_tree, index); radix_tree_delete(&mapping->page_tree, index);
mapping->nrexceptional--; mapping->nrexceptional--;
...@@ -388,19 +404,12 @@ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index, ...@@ -388,19 +404,12 @@ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
if (err) { if (err) {
spin_unlock_irq(&mapping->tree_lock); spin_unlock_irq(&mapping->tree_lock);
/* /*
* Someone already created the entry? This is a * Our insertion of a DAX entry failed, most likely
* normal failure when inserting PMDs in a range * because we were inserting a PMD entry and it
* that already contains PTEs. In that case we want * collided with a PTE sized entry at a different
* to return -EEXIST immediately. * index in the PMD range. We haven't inserted
*/ * anything into the radix tree and have no waiters to
if (err == -EEXIST && !(size_flag & RADIX_DAX_PMD)) * wake.
goto restart;
/*
* Our insertion of a DAX PMD entry failed, most
* likely because it collided with a PTE sized entry
* at a different index in the PMD range. We haven't
* inserted anything into the radix tree and have no
* waiters to wake.
*/ */
return ERR_PTR(err); return ERR_PTR(err);
} }
......
...@@ -1756,7 +1756,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f) ...@@ -1756,7 +1756,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
* protocols: aa:... bb:... * protocols: aa:... bb:...
*/ */
seq_printf(m, "pending:\t%lu\ntotal:\t%lu\nAPI:\t%Lx:%x:%Lx\n", seq_printf(m, "pending:\t%lu\ntotal:\t%lu\nAPI:\t%Lx:%x:%Lx\n",
pending, total, UFFD_API, UFFD_API_FEATURES, pending, total, UFFD_API, ctx->features,
UFFD_API_IOCTLS|UFFD_API_RANGE_IOCTLS); UFFD_API_IOCTLS|UFFD_API_RANGE_IOCTLS);
} }
#endif #endif
......
...@@ -261,9 +261,9 @@ ...@@ -261,9 +261,9 @@
*/ */
#ifndef RO_AFTER_INIT_DATA #ifndef RO_AFTER_INIT_DATA
#define RO_AFTER_INIT_DATA \ #define RO_AFTER_INIT_DATA \
__start_ro_after_init = .; \ VMLINUX_SYMBOL(__start_ro_after_init) = .; \
*(.data..ro_after_init) \ *(.data..ro_after_init) \
__end_ro_after_init = .; VMLINUX_SYMBOL(__end_ro_after_init) = .;
#endif #endif
/* /*
......
...@@ -184,11 +184,17 @@ static void ptrace_unfreeze_traced(struct task_struct *task) ...@@ -184,11 +184,17 @@ static void ptrace_unfreeze_traced(struct task_struct *task)
WARN_ON(!task->ptrace || task->parent != current); WARN_ON(!task->ptrace || task->parent != current);
/*
* PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely.
* Recheck state under the lock to close this race.
*/
spin_lock_irq(&task->sighand->siglock); spin_lock_irq(&task->sighand->siglock);
if (__fatal_signal_pending(task)) if (task->state == __TASK_TRACED) {
wake_up_state(task, __TASK_TRACED); if (__fatal_signal_pending(task))
else wake_up_state(task, __TASK_TRACED);
task->state = TASK_TRACED; else
task->state = TASK_TRACED;
}
spin_unlock_irq(&task->sighand->siglock); spin_unlock_irq(&task->sighand->siglock);
} }
......
...@@ -240,18 +240,18 @@ static ssize_t defrag_store(struct kobject *kobj, ...@@ -240,18 +240,18 @@ static ssize_t defrag_store(struct kobject *kobj,
clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
} else if (!memcmp("defer", buf,
min(sizeof("defer")-1, count))) {
clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
} else if (!memcmp("defer+madvise", buf, } else if (!memcmp("defer+madvise", buf,
min(sizeof("defer+madvise")-1, count))) { min(sizeof("defer+madvise")-1, count))) {
clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
} else if (!memcmp("defer", buf,
min(sizeof("defer")-1, count))) {
clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
} else if (!memcmp("madvise", buf, } else if (!memcmp("madvise", buf,
min(sizeof("madvise")-1, count))) { min(sizeof("madvise")-1, count))) {
clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
......
...@@ -481,6 +481,13 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, ...@@ -481,6 +481,13 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
enum ttu_flags; enum ttu_flags;
struct tlbflush_unmap_batch; struct tlbflush_unmap_batch;
/*
* only for MM internal work items which do not depend on
* any allocations or locks which might depend on allocations
*/
extern struct workqueue_struct *mm_percpu_wq;
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
void try_to_unmap_flush(void); void try_to_unmap_flush(void);
void try_to_unmap_flush_dirty(void); void try_to_unmap_flush_dirty(void);
......
...@@ -2373,6 +2373,13 @@ void drain_all_pages(struct zone *zone) ...@@ -2373,6 +2373,13 @@ void drain_all_pages(struct zone *zone)
*/ */
static cpumask_t cpus_with_pcps; static cpumask_t cpus_with_pcps;
/*
* Make sure nobody triggers this path before mm_percpu_wq is fully
* initialized.
*/
if (WARN_ON_ONCE(!mm_percpu_wq))
return;
/* Workqueues cannot recurse */ /* Workqueues cannot recurse */
if (current->flags & PF_WQ_WORKER) if (current->flags & PF_WQ_WORKER)
return; return;
...@@ -2422,7 +2429,7 @@ void drain_all_pages(struct zone *zone) ...@@ -2422,7 +2429,7 @@ void drain_all_pages(struct zone *zone)
for_each_cpu(cpu, &cpus_with_pcps) { for_each_cpu(cpu, &cpus_with_pcps) {
struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu); struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu);
INIT_WORK(work, drain_local_pages_wq); INIT_WORK(work, drain_local_pages_wq);
schedule_work_on(cpu, work); queue_work_on(cpu, mm_percpu_wq, work);
} }
for_each_cpu(cpu, &cpus_with_pcps) for_each_cpu(cpu, &cpus_with_pcps)
flush_work(per_cpu_ptr(&pcpu_drain, cpu)); flush_work(per_cpu_ptr(&pcpu_drain, cpu));
...@@ -4519,13 +4526,13 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) ...@@ -4519,13 +4526,13 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(node_page_state(pgdat, NR_FILE_MAPPED)), K(node_page_state(pgdat, NR_FILE_MAPPED)),
K(node_page_state(pgdat, NR_FILE_DIRTY)), K(node_page_state(pgdat, NR_FILE_DIRTY)),
K(node_page_state(pgdat, NR_WRITEBACK)), K(node_page_state(pgdat, NR_WRITEBACK)),
K(node_page_state(pgdat, NR_SHMEM)),
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
K(node_page_state(pgdat, NR_SHMEM_THPS) * HPAGE_PMD_NR), K(node_page_state(pgdat, NR_SHMEM_THPS) * HPAGE_PMD_NR),
K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)
* HPAGE_PMD_NR), * HPAGE_PMD_NR),
K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR), K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR),
#endif #endif
K(node_page_state(pgdat, NR_SHMEM)),
K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
K(node_page_state(pgdat, NR_UNSTABLE_NFS)), K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
node_page_state(pgdat, NR_PAGES_SCANNED), node_page_state(pgdat, NR_PAGES_SCANNED),
......
...@@ -111,12 +111,8 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) ...@@ -111,12 +111,8 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
if (pvmw->pmd && !pvmw->pte) if (pvmw->pmd && !pvmw->pte)
return not_found(pvmw); return not_found(pvmw);
/* Only for THP, seek to next pte entry makes sense */ if (pvmw->pte)
if (pvmw->pte) {
if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
return not_found(pvmw);
goto next_pte; goto next_pte;
}
if (unlikely(PageHuge(pvmw->page))) { if (unlikely(PageHuge(pvmw->page))) {
/* when pud is not present, pte will be NULL */ /* when pud is not present, pte will be NULL */
...@@ -165,9 +161,14 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) ...@@ -165,9 +161,14 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
while (1) { while (1) {
if (check_pte(pvmw)) if (check_pte(pvmw))
return true; return true;
next_pte: do { next_pte:
/* Seek to next pte only makes sense for THP */
if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
return not_found(pvmw);
do {
pvmw->address += PAGE_SIZE; pvmw->address += PAGE_SIZE;
if (pvmw->address >= if (pvmw->address >= pvmw->vma->vm_end ||
pvmw->address >=
__vma_address(pvmw->page, pvmw->vma) + __vma_address(pvmw->page, pvmw->vma) +
hpage_nr_pages(pvmw->page) * PAGE_SIZE) hpage_nr_pages(pvmw->page) * PAGE_SIZE)
return not_found(pvmw); return not_found(pvmw);
......
...@@ -670,30 +670,19 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy) ...@@ -670,30 +670,19 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
/*
* lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM
* workqueue, aiding in getting memory freed.
*/
static struct workqueue_struct *lru_add_drain_wq;
static int __init lru_init(void)
{
lru_add_drain_wq = alloc_workqueue("lru-add-drain", WQ_MEM_RECLAIM, 0);
if (WARN(!lru_add_drain_wq,
"Failed to create workqueue lru_add_drain_wq"))
return -ENOMEM;
return 0;
}
early_initcall(lru_init);
void lru_add_drain_all(void) void lru_add_drain_all(void)
{ {
static DEFINE_MUTEX(lock); static DEFINE_MUTEX(lock);
static struct cpumask has_work; static struct cpumask has_work;
int cpu; int cpu;
/*
* Make sure nobody triggers this path before mm_percpu_wq is fully
* initialized.
*/
if (WARN_ON(!mm_percpu_wq))
return;
mutex_lock(&lock); mutex_lock(&lock);
get_online_cpus(); get_online_cpus();
cpumask_clear(&has_work); cpumask_clear(&has_work);
...@@ -707,7 +696,7 @@ void lru_add_drain_all(void) ...@@ -707,7 +696,7 @@ void lru_add_drain_all(void)
pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) || pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
need_activate_page_drain(cpu)) { need_activate_page_drain(cpu)) {
INIT_WORK(work, lru_add_drain_per_cpu); INIT_WORK(work, lru_add_drain_per_cpu);
queue_work_on(cpu, lru_add_drain_wq, work); queue_work_on(cpu, mm_percpu_wq, work);
cpumask_set_cpu(cpu, &has_work); cpumask_set_cpu(cpu, &has_work);
} }
} }
......
...@@ -201,6 +201,8 @@ void swap_cgroup_swapoff(int type) ...@@ -201,6 +201,8 @@ void swap_cgroup_swapoff(int type)
struct page *page = map[i]; struct page *page = map[i];
if (page) if (page)
__free_page(page); __free_page(page);
if (!(i % SWAP_CLUSTER_MAX))
cond_resched();
} }
vfree(map); vfree(map);
} }
......
...@@ -1552,7 +1552,6 @@ static const struct file_operations proc_vmstat_file_operations = { ...@@ -1552,7 +1552,6 @@ static const struct file_operations proc_vmstat_file_operations = {
#endif /* CONFIG_PROC_FS */ #endif /* CONFIG_PROC_FS */
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static struct workqueue_struct *vmstat_wq;
static DEFINE_PER_CPU(struct delayed_work, vmstat_work); static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
int sysctl_stat_interval __read_mostly = HZ; int sysctl_stat_interval __read_mostly = HZ;
...@@ -1623,7 +1622,7 @@ static void vmstat_update(struct work_struct *w) ...@@ -1623,7 +1622,7 @@ static void vmstat_update(struct work_struct *w)
* to occur in the future. Keep on running the * to occur in the future. Keep on running the
* update worker thread. * update worker thread.
*/ */
queue_delayed_work_on(smp_processor_id(), vmstat_wq, queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
this_cpu_ptr(&vmstat_work), this_cpu_ptr(&vmstat_work),
round_jiffies_relative(sysctl_stat_interval)); round_jiffies_relative(sysctl_stat_interval));
} }
...@@ -1702,7 +1701,7 @@ static void vmstat_shepherd(struct work_struct *w) ...@@ -1702,7 +1701,7 @@ static void vmstat_shepherd(struct work_struct *w)
struct delayed_work *dw = &per_cpu(vmstat_work, cpu); struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
if (!delayed_work_pending(dw) && need_update(cpu)) if (!delayed_work_pending(dw) && need_update(cpu))
queue_delayed_work_on(cpu, vmstat_wq, dw, 0); queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
} }
put_online_cpus(); put_online_cpus();
...@@ -1718,7 +1717,6 @@ static void __init start_shepherd_timer(void) ...@@ -1718,7 +1717,6 @@ static void __init start_shepherd_timer(void)
INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu), INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
vmstat_update); vmstat_update);
vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
schedule_delayed_work(&shepherd, schedule_delayed_work(&shepherd,
round_jiffies_relative(sysctl_stat_interval)); round_jiffies_relative(sysctl_stat_interval));
} }
...@@ -1764,11 +1762,16 @@ static int vmstat_cpu_dead(unsigned int cpu) ...@@ -1764,11 +1762,16 @@ static int vmstat_cpu_dead(unsigned int cpu)
#endif #endif
struct workqueue_struct *mm_percpu_wq;
void __init init_mm_internals(void) void __init init_mm_internals(void)
{ {
#ifdef CONFIG_SMP int ret __maybe_unused;
int ret;
mm_percpu_wq = alloc_workqueue("mm_percpu_wq",
WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
#ifdef CONFIG_SMP
ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead", ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
NULL, vmstat_cpu_dead); NULL, vmstat_cpu_dead);
if (ret < 0) if (ret < 0)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册