“0b708c87f66a15190fb43661c2320fd48c4dc6c8”上不存在“README.md”
提交 bca1d4de 编写于 作者: L Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc mm fixes from Andrew Morton:
 "15 patches.

  VM subsystems affected by this patch series: userfaultfd, kfence,
  highmem, pagealloc, memblock, pagecache, secretmem, pagemap, and
  hugetlbfs"

* akpm:
  hugetlbfs: fix mount mode command line processing
  mm: fix the deadlock in finish_fault()
  mm: mmap_lock: fix disabling preemption directly
  mm/secretmem: wire up ->set_page_dirty
  writeback, cgroup: do not reparent dax inodes
  writeback, cgroup: remove wb from offline list before releasing refcnt
  memblock: make for_each_mem_range() traverse MEMBLOCK_HOTPLUG regions
  mm: page_alloc: fix page_poison=1 / INIT_ON_ALLOC_DEFAULT_ON interaction
  mm: use kmap_local_page in memzero_page
  mm: call flush_dcache_page() in memcpy_to_page() and memzero_page()
  kfence: skip all GFP_ZONEMASK allocations
  kfence: move the size check to the beginning of __kfence_alloc()
  kfence: defer kfence_test_init to ensure that kunit debugfs is created
  selftest: use mmap instead of posix_memalign to allocate memory
  userfaultfd: do not untag user pointers
...@@ -45,8 +45,9 @@ how the user addresses are used by the kernel: ...@@ -45,8 +45,9 @@ how the user addresses are used by the kernel:
1. User addresses not accessed by the kernel but used for address space 1. User addresses not accessed by the kernel but used for address space
management (e.g. ``mprotect()``, ``madvise()``). The use of valid management (e.g. ``mprotect()``, ``madvise()``). The use of valid
tagged pointers in this context is allowed with the exception of tagged pointers in this context is allowed with these exceptions:
``brk()``, ``mmap()`` and the ``new_address`` argument to
- ``brk()``, ``mmap()`` and the ``new_address`` argument to
``mremap()`` as these have the potential to alias with existing ``mremap()`` as these have the potential to alias with existing
user addresses. user addresses.
...@@ -54,6 +55,15 @@ how the user addresses are used by the kernel: ...@@ -54,6 +55,15 @@ how the user addresses are used by the kernel:
incorrectly accept valid tagged pointers for the ``brk()``, incorrectly accept valid tagged pointers for the ``brk()``,
``mmap()`` and ``mremap()`` system calls. ``mmap()`` and ``mremap()`` system calls.
- The ``range.start``, ``start`` and ``dst`` arguments to the
``UFFDIO_*`` ``ioctl()``s used on a file descriptor obtained from
``userfaultfd()``, as fault addresses subsequently obtained by reading
the file descriptor will be untagged, which may otherwise confuse
tag-unaware programs.
NOTE: This behaviour changed in v5.14 and so some earlier kernels may
incorrectly accept valid tagged pointers for this system call.
2. User addresses accessed by the kernel (e.g. ``write()``). This ABI 2. User addresses accessed by the kernel (e.g. ``write()``). This ABI
relaxation is disabled by default and the application thread needs to relaxation is disabled by default and the application thread needs to
explicitly enable it via ``prctl()`` as follows: explicitly enable it via ``prctl()`` as follows:
......
...@@ -521,6 +521,9 @@ static bool inode_prepare_wbs_switch(struct inode *inode, ...@@ -521,6 +521,9 @@ static bool inode_prepare_wbs_switch(struct inode *inode,
*/ */
smp_mb(); smp_mb();
if (IS_DAX(inode))
return false;
/* while holding I_WB_SWITCH, no one else can update the association */ /* while holding I_WB_SWITCH, no one else can update the association */
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
if (!(inode->i_sb->s_flags & SB_ACTIVE) || if (!(inode->i_sb->s_flags & SB_ACTIVE) ||
......
...@@ -77,7 +77,7 @@ enum hugetlb_param { ...@@ -77,7 +77,7 @@ enum hugetlb_param {
static const struct fs_parameter_spec hugetlb_fs_parameters[] = { static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
fsparam_u32 ("gid", Opt_gid), fsparam_u32 ("gid", Opt_gid),
fsparam_string("min_size", Opt_min_size), fsparam_string("min_size", Opt_min_size),
fsparam_u32 ("mode", Opt_mode), fsparam_u32oct("mode", Opt_mode),
fsparam_string("nr_inodes", Opt_nr_inodes), fsparam_string("nr_inodes", Opt_nr_inodes),
fsparam_string("pagesize", Opt_pagesize), fsparam_string("pagesize", Opt_pagesize),
fsparam_string("size", Opt_size), fsparam_string("size", Opt_size),
......
...@@ -1236,23 +1236,21 @@ static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx, ...@@ -1236,23 +1236,21 @@ static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
} }
static __always_inline int validate_range(struct mm_struct *mm, static __always_inline int validate_range(struct mm_struct *mm,
__u64 *start, __u64 len) __u64 start, __u64 len)
{ {
__u64 task_size = mm->task_size; __u64 task_size = mm->task_size;
*start = untagged_addr(*start); if (start & ~PAGE_MASK)
if (*start & ~PAGE_MASK)
return -EINVAL; return -EINVAL;
if (len & ~PAGE_MASK) if (len & ~PAGE_MASK)
return -EINVAL; return -EINVAL;
if (!len) if (!len)
return -EINVAL; return -EINVAL;
if (*start < mmap_min_addr) if (start < mmap_min_addr)
return -EINVAL; return -EINVAL;
if (*start >= task_size) if (start >= task_size)
return -EINVAL; return -EINVAL;
if (len > task_size - *start) if (len > task_size - start)
return -EINVAL; return -EINVAL;
return 0; return 0;
} }
...@@ -1316,7 +1314,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, ...@@ -1316,7 +1314,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
vm_flags |= VM_UFFD_MINOR; vm_flags |= VM_UFFD_MINOR;
} }
ret = validate_range(mm, &uffdio_register.range.start, ret = validate_range(mm, uffdio_register.range.start,
uffdio_register.range.len); uffdio_register.range.len);
if (ret) if (ret)
goto out; goto out;
...@@ -1522,7 +1520,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, ...@@ -1522,7 +1520,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister))) if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister)))
goto out; goto out;
ret = validate_range(mm, &uffdio_unregister.start, ret = validate_range(mm, uffdio_unregister.start,
uffdio_unregister.len); uffdio_unregister.len);
if (ret) if (ret)
goto out; goto out;
...@@ -1671,7 +1669,7 @@ static int userfaultfd_wake(struct userfaultfd_ctx *ctx, ...@@ -1671,7 +1669,7 @@ static int userfaultfd_wake(struct userfaultfd_ctx *ctx,
if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake))) if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake)))
goto out; goto out;
ret = validate_range(ctx->mm, &uffdio_wake.start, uffdio_wake.len); ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len);
if (ret) if (ret)
goto out; goto out;
...@@ -1711,7 +1709,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, ...@@ -1711,7 +1709,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
sizeof(uffdio_copy)-sizeof(__s64))) sizeof(uffdio_copy)-sizeof(__s64)))
goto out; goto out;
ret = validate_range(ctx->mm, &uffdio_copy.dst, uffdio_copy.len); ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len);
if (ret) if (ret)
goto out; goto out;
/* /*
...@@ -1768,7 +1766,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, ...@@ -1768,7 +1766,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
sizeof(uffdio_zeropage)-sizeof(__s64))) sizeof(uffdio_zeropage)-sizeof(__s64)))
goto out; goto out;
ret = validate_range(ctx->mm, &uffdio_zeropage.range.start, ret = validate_range(ctx->mm, uffdio_zeropage.range.start,
uffdio_zeropage.range.len); uffdio_zeropage.range.len);
if (ret) if (ret)
goto out; goto out;
...@@ -1818,7 +1816,7 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx, ...@@ -1818,7 +1816,7 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
sizeof(struct uffdio_writeprotect))) sizeof(struct uffdio_writeprotect)))
return -EFAULT; return -EFAULT;
ret = validate_range(ctx->mm, &uffdio_wp.range.start, ret = validate_range(ctx->mm, uffdio_wp.range.start,
uffdio_wp.range.len); uffdio_wp.range.len);
if (ret) if (ret)
return ret; return ret;
...@@ -1866,7 +1864,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg) ...@@ -1866,7 +1864,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
sizeof(uffdio_continue) - (sizeof(__s64)))) sizeof(uffdio_continue) - (sizeof(__s64))))
goto out; goto out;
ret = validate_range(ctx->mm, &uffdio_continue.range.start, ret = validate_range(ctx->mm, uffdio_continue.range.start,
uffdio_continue.range.len); uffdio_continue.range.len);
if (ret) if (ret)
goto out; goto out;
......
...@@ -318,14 +318,16 @@ static inline void memcpy_to_page(struct page *page, size_t offset, ...@@ -318,14 +318,16 @@ static inline void memcpy_to_page(struct page *page, size_t offset,
VM_BUG_ON(offset + len > PAGE_SIZE); VM_BUG_ON(offset + len > PAGE_SIZE);
memcpy(to + offset, from, len); memcpy(to + offset, from, len);
flush_dcache_page(page);
kunmap_local(to); kunmap_local(to);
} }
static inline void memzero_page(struct page *page, size_t offset, size_t len) static inline void memzero_page(struct page *page, size_t offset, size_t len)
{ {
char *addr = kmap_atomic(page); char *addr = kmap_local_page(page);
memset(addr + offset, 0, len); memset(addr + offset, 0, len);
kunmap_atomic(addr); flush_dcache_page(page);
kunmap_local(addr);
} }
#endif /* _LINUX_HIGHMEM_H */ #endif /* _LINUX_HIGHMEM_H */
...@@ -209,7 +209,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, ...@@ -209,7 +209,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type,
*/ */
#define for_each_mem_range(i, p_start, p_end) \ #define for_each_mem_range(i, p_start, p_end) \
__for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, \ __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, \
MEMBLOCK_NONE, p_start, p_end, NULL) MEMBLOCK_HOTPLUG, p_start, p_end, NULL)
/** /**
* for_each_mem_range_rev - reverse iterate through memblock areas from * for_each_mem_range_rev - reverse iterate through memblock areas from
...@@ -220,7 +220,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, ...@@ -220,7 +220,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type,
*/ */
#define for_each_mem_range_rev(i, p_start, p_end) \ #define for_each_mem_range_rev(i, p_start, p_end) \
__for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, \ __for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, \
MEMBLOCK_NONE, p_start, p_end, NULL) MEMBLOCK_HOTPLUG, p_start, p_end, NULL)
/** /**
* for_each_reserved_mem_range - iterate over all reserved memblock areas * for_each_reserved_mem_range - iterate over all reserved memblock areas
......
...@@ -398,12 +398,12 @@ static void cgwb_release_workfn(struct work_struct *work) ...@@ -398,12 +398,12 @@ static void cgwb_release_workfn(struct work_struct *work)
blkcg_unpin_online(blkcg); blkcg_unpin_online(blkcg);
fprop_local_destroy_percpu(&wb->memcg_completions); fprop_local_destroy_percpu(&wb->memcg_completions);
percpu_ref_exit(&wb->refcnt);
spin_lock_irq(&cgwb_lock); spin_lock_irq(&cgwb_lock);
list_del(&wb->offline_node); list_del(&wb->offline_node);
spin_unlock_irq(&cgwb_lock); spin_unlock_irq(&cgwb_lock);
percpu_ref_exit(&wb->refcnt);
wb_exit(wb); wb_exit(wb);
WARN_ON_ONCE(!list_empty(&wb->b_attached)); WARN_ON_ONCE(!list_empty(&wb->b_attached));
kfree_rcu(wb, rcu); kfree_rcu(wb, rcu);
......
...@@ -733,6 +733,22 @@ void kfence_shutdown_cache(struct kmem_cache *s) ...@@ -733,6 +733,22 @@ void kfence_shutdown_cache(struct kmem_cache *s)
void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
{ {
/*
* Perform size check before switching kfence_allocation_gate, so that
* we don't disable KFENCE without making an allocation.
*/
if (size > PAGE_SIZE)
return NULL;
/*
* Skip allocations from non-default zones, including DMA. We cannot
* guarantee that pages in the KFENCE pool will have the requested
* properties (e.g. reside in DMAable memory).
*/
if ((flags & GFP_ZONEMASK) ||
(s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32)))
return NULL;
/* /*
* allocation_gate only needs to become non-zero, so it doesn't make * allocation_gate only needs to become non-zero, so it doesn't make
* sense to continue writing to it and pay the associated contention * sense to continue writing to it and pay the associated contention
...@@ -757,9 +773,6 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) ...@@ -757,9 +773,6 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
if (!READ_ONCE(kfence_enabled)) if (!READ_ONCE(kfence_enabled))
return NULL; return NULL;
if (size > PAGE_SIZE)
return NULL;
return kfence_guarded_alloc(s, size, flags); return kfence_guarded_alloc(s, size, flags);
} }
......
...@@ -852,7 +852,7 @@ static void kfence_test_exit(void) ...@@ -852,7 +852,7 @@ static void kfence_test_exit(void)
tracepoint_synchronize_unregister(); tracepoint_synchronize_unregister();
} }
late_initcall(kfence_test_init); late_initcall_sync(kfence_test_init);
module_exit(kfence_test_exit); module_exit(kfence_test_exit);
MODULE_LICENSE("GPL v2"); MODULE_LICENSE("GPL v2");
......
...@@ -947,7 +947,8 @@ static bool should_skip_region(struct memblock_type *type, ...@@ -947,7 +947,8 @@ static bool should_skip_region(struct memblock_type *type,
return true; return true;
/* skip hotpluggable memory regions if needed */ /* skip hotpluggable memory regions if needed */
if (movable_node_is_enabled() && memblock_is_hotpluggable(m)) if (movable_node_is_enabled() && memblock_is_hotpluggable(m) &&
!(flags & MEMBLOCK_HOTPLUG))
return true; return true;
/* if we want mirror memory skip non-mirror memory regions */ /* if we want mirror memory skip non-mirror memory regions */
......
...@@ -4026,9 +4026,18 @@ vm_fault_t finish_fault(struct vm_fault *vmf) ...@@ -4026,9 +4026,18 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
return ret; return ret;
} }
if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) if (vmf->prealloc_pte) {
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
if (likely(pmd_none(*vmf->pmd))) {
mm_inc_nr_ptes(vma->vm_mm);
pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
vmf->prealloc_pte = NULL;
}
spin_unlock(vmf->ptl);
} else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
return VM_FAULT_OOM; return VM_FAULT_OOM;
} }
}
/* See comment in handle_pte_fault() */ /* See comment in handle_pte_fault() */
if (pmd_devmap_trans_unstable(vmf->pmd)) if (pmd_devmap_trans_unstable(vmf->pmd))
......
...@@ -156,14 +156,14 @@ static inline void put_memcg_path_buf(void) ...@@ -156,14 +156,14 @@ static inline void put_memcg_path_buf(void)
#define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \ #define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \
do { \ do { \
const char *memcg_path; \ const char *memcg_path; \
preempt_disable(); \ local_lock(&memcg_paths.lock); \
memcg_path = get_mm_memcg_path(mm); \ memcg_path = get_mm_memcg_path(mm); \
trace_mmap_lock_##type(mm, \ trace_mmap_lock_##type(mm, \
memcg_path != NULL ? memcg_path : "", \ memcg_path != NULL ? memcg_path : "", \
##__VA_ARGS__); \ ##__VA_ARGS__); \
if (likely(memcg_path != NULL)) \ if (likely(memcg_path != NULL)) \
put_memcg_path_buf(); \ put_memcg_path_buf(); \
preempt_enable(); \ local_unlock(&memcg_paths.lock); \
} while (0) } while (0)
#else /* !CONFIG_MEMCG */ #else /* !CONFIG_MEMCG */
......
...@@ -840,20 +840,23 @@ void init_mem_debugging_and_hardening(void) ...@@ -840,20 +840,23 @@ void init_mem_debugging_and_hardening(void)
} }
#endif #endif
if (_init_on_alloc_enabled_early) { if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early) &&
if (page_poisoning_requested) page_poisoning_requested) {
pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
"will take precedence over init_on_alloc\n"); "will take precedence over init_on_alloc and init_on_free\n");
else _init_on_alloc_enabled_early = false;
static_branch_enable(&init_on_alloc); _init_on_free_enabled_early = false;
} }
if (_init_on_free_enabled_early) {
if (page_poisoning_requested) if (_init_on_alloc_enabled_early)
pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " static_branch_enable(&init_on_alloc);
"will take precedence over init_on_free\n");
else else
static_branch_disable(&init_on_alloc);
if (_init_on_free_enabled_early)
static_branch_enable(&init_on_free); static_branch_enable(&init_on_free);
} else
static_branch_disable(&init_on_free);
#ifdef CONFIG_DEBUG_PAGEALLOC #ifdef CONFIG_DEBUG_PAGEALLOC
if (!debug_pagealloc_enabled()) if (!debug_pagealloc_enabled())
......
...@@ -152,6 +152,7 @@ static void secretmem_freepage(struct page *page) ...@@ -152,6 +152,7 @@ static void secretmem_freepage(struct page *page)
} }
const struct address_space_operations secretmem_aops = { const struct address_space_operations secretmem_aops = {
.set_page_dirty = __set_page_dirty_no_writeback,
.freepage = secretmem_freepage, .freepage = secretmem_freepage,
.migratepage = secretmem_migratepage, .migratepage = secretmem_migratepage,
.isolate_page = secretmem_isolate_page, .isolate_page = secretmem_isolate_page,
......
...@@ -210,8 +210,10 @@ static void anon_release_pages(char *rel_area) ...@@ -210,8 +210,10 @@ static void anon_release_pages(char *rel_area)
static void anon_allocate_area(void **alloc_area) static void anon_allocate_area(void **alloc_area)
{ {
if (posix_memalign(alloc_area, page_size, nr_pages * page_size)) *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
err("posix_memalign() failed"); MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (*alloc_area == MAP_FAILED)
err("mmap of anonymous memory failed");
} }
static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
新手
引导
客服 返回
顶部