提交 7dc4c73d 编写于 作者: J Jingxian He 提交者: Zheng Zengkai

mm: add pin memory method for checkpoint add restore

hulk inclusion
category: feature
bugzilla: 48159
CVE: N/A

------------------------------

We can use the checkpoint and restore in userspace(criu) method to dump
and restore tasks when updating the kernel.
Currently, criu needs dump all memory data of tasks to files.
When the memory size is very large(larger than 1G),
the cost time of the dumping data will be very long(more than 1 min).

By pin the memory data of tasks and collect the corresponding physical pages
mapping info in checkpoint process, we can remap the physical pages to
restore tasks after upgrading the kernel. This pin memory method can
restore the task data within one second.

The pin memory area info is saved in the reserved memblock,
which can keep usable in the kernel update process.

The pin memory driver provides the following ioctl command for criu:
1) SET_PIN_MEM_AREA:
Set pin memory area, which can be remap to the restore task.
2) CLEAR_PIN_MEM_AREA:
Clear the pin memory area info,
which enable user reset the pin data.
3) REMAP_PIN_MEM_AREA:
Remap the pages of the pin memory to the restore task.
Signed-off-by: NJingxian He <hejingxian@huawei.com>
Reviewed-by: NChen Wandun <chenwandun@huawei.com>
Signed-off-by: NZheng Zengkai <zhengzengkai@huawei.com>
上级 94dc364f
......@@ -30,6 +30,9 @@
#include <linux/psci.h>
#include <linux/sched/task.h>
#include <linux/mm.h>
#ifdef CONFIG_PIN_MEMORY
#include <linux/pin_mem.h>
#endif
#include <asm/acpi.h>
#include <asm/fixmap.h>
......@@ -271,12 +274,19 @@ static void __init request_standard_resources(void)
crashk_res.end <= res->end)
request_resource(res, &crashk_res);
#endif
#ifdef CONFIG_QUICK_KEXEC
if (quick_kexec_res.end &&
quick_kexec_res.start >= res->start &&
quick_kexec_res.end <= res->end)
request_resource(res, &quick_kexec_res);
#endif
#ifdef CONFIG_PIN_MEMORY
if (pin_memory_resource.end && pin_memory_resource.start >= res->start &&
pin_memory_resource.end <= res->end)
request_resource(res, &pin_memory_resource);
#endif
}
#ifdef CONFIG_ARM64_PMEM_RESERVE
......
......@@ -30,6 +30,9 @@
#include <linux/crash_dump.h>
#include <linux/hugetlb.h>
#include <linux/acpi_iort.h>
#ifdef CONFIG_PIN_MEMORY
#include <linux/pin_mem.h>
#endif
#include <asm/boot.h>
#include <asm/fixmap.h>
......@@ -54,6 +57,52 @@ s64 memstart_addr __ro_after_init = -1;
EXPORT_SYMBOL(memstart_addr);
phys_addr_t start_at, mem_size;
#ifdef CONFIG_PIN_MEMORY
struct resource pin_memory_resource = {
.name = "Pin memory",
.start = 0,
.end = 0,
.flags = IORESOURCE_MEM,
.desc = IORES_DESC_RESERVED
};
static void __init reserve_pin_memory_res(void)
{
unsigned long long mem_start, mem_len;
int ret;
ret = parse_pin_memory(boot_command_line, memblock_phys_mem_size(),
&mem_len, &mem_start);
if (ret || !mem_len)
return;
mem_len = PAGE_ALIGN(mem_len);
if (!memblock_is_region_memory(mem_start, mem_len)) {
pr_warn("cannot reserve for pin memory: region is not memory!\n");
return;
}
if (memblock_is_region_reserved(mem_start, mem_len)) {
pr_warn("cannot reserve for pin memory: region overlaps reserved memory!\n");
return;
}
if (!IS_ALIGNED(mem_start, SZ_2M)) {
pr_warn("cannot reserve for pin memory: base address is not 2MB aligned\n");
return;
}
memblock_reserve(mem_start, mem_len);
pin_memory_resource.start = mem_start;
pin_memory_resource.end = mem_start + mem_len - 1;
}
#else
static void __init reserve_pin_memory_res(void)
{
}
#endif /* CONFIG_PIN_MEMORY */
/*
* If the corresponding config options are enabled, we create both ZONE_DMA
* and ZONE_DMA32. By default ZONE_DMA covers the 32-bit addressable memory
......@@ -616,6 +665,8 @@ void __init bootmem_init(void)
reserve_pmem();
#endif
reserve_pin_memory_res();
memblock_dump_all();
}
......@@ -705,6 +756,12 @@ void __init mem_init(void)
/* this will put all unused low memory onto the freelists */
memblock_free_all();
#ifdef CONFIG_PIN_MEMORY
/* pre alloc the pages for pin memory */
init_reserve_page_map((unsigned long)pin_memory_resource.start,
(unsigned long)(pin_memory_resource.end - pin_memory_resource.start));
#endif
mem_init_print_info(NULL);
/*
......
......@@ -471,6 +471,13 @@ config ADI
and SSM (Silicon Secured Memory). Intended consumers of this
driver include crash and makedumpfile.
config PIN_MEMORY_DEV
tristate "/dev/pinmem character device"
depends on PIN_MEMORY
default m
help
pin memory driver
endmenu
config RANDOM_TRUST_CPU
......
......@@ -47,3 +47,4 @@ obj-$(CONFIG_PS3_FLASH) += ps3flash.o
obj-$(CONFIG_XILLYBUS) += xillybus/
obj-$(CONFIG_POWERNV_OP_PANEL) += powernv-op-panel.o
obj-$(CONFIG_ADI) += adi.o
obj-$(CONFIG_PIN_MEMORY_DEV) += pin_memory.o
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright @ Huawei Technologies Co., Ltd. 2020-2020. ALL rights reserved.
* Description: Euler pin memory driver
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/miscdevice.h>
#include <linux/fs.h>
#include <linux/mm_types.h>
#include <linux/processor.h>
#include <uapi/asm-generic/ioctl.h>
#include <uapi/asm-generic/mman-common.h>
#include <uapi/asm/setup.h>
#include <linux/pin_mem.h>
#include <linux/sched/mm.h>
#define MAX_PIN_MEM_AREA_NUM 16
struct _pin_mem_area {
unsigned long virt_start;
unsigned long virt_end;
};
struct pin_mem_area_set {
unsigned int pid;
unsigned int area_num;
struct _pin_mem_area mem_area[MAX_PIN_MEM_AREA_NUM];
};
#define PIN_MEM_MAGIC 0x59
#define _SET_PIN_MEM_AREA 1
#define _CLEAR_PIN_MEM_AREA 2
#define _REMAP_PIN_MEM_AREA 3
#define _FINISH_PIN_MEM_DUMP 4
#define _INIT_PAGEMAP_READ 5
#define _PIN_MEM_IOC_MAX_NR 5
#define SET_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _SET_PIN_MEM_AREA, struct pin_mem_area_set)
#define CLEAR_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _CLEAR_PIN_MEM_AREA, int)
#define REMAP_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _REMAP_PIN_MEM_AREA, int)
#define FINISH_PIN_MEM_DUMP _IOW(PIN_MEM_MAGIC, _FINISH_PIN_MEM_DUMP, int)
#define INIT_PAGEMAP_READ _IOW(PIN_MEM_MAGIC, _INIT_PAGEMAP_READ, int)
static int set_pin_mem(struct pin_mem_area_set *pmas)
{
int i;
int ret = 0;
struct _pin_mem_area *pma;
struct mm_struct *mm;
struct task_struct *task;
struct pid *pid_s;
pid_s = find_get_pid(pmas->pid);
if (!pid_s) {
pr_warn("Get pid struct fail:%d.\n", pmas->pid);
return -EFAULT;
}
rcu_read_lock();
task = pid_task(pid_s, PIDTYPE_PID);
if (!task) {
pr_warn("Get task struct fail:%d.\n", pmas->pid);
goto fail;
}
mm = get_task_mm(task);
for (i = 0; i < pmas->area_num; i++) {
pma = &(pmas->mem_area[i]);
ret = pin_mem_area(task, mm, pma->virt_start, pma->virt_end);
if (ret) {
mmput(mm);
goto fail;
}
}
mmput(mm);
rcu_read_unlock();
put_pid(pid_s);
return ret;
fail:
rcu_read_unlock();
put_pid(pid_s);
return -EFAULT;
}
static int set_pin_mem_area(unsigned long arg)
{
struct pin_mem_area_set pmas;
void __user *buf = (void __user *)arg;
if (copy_from_user(&pmas, buf, sizeof(pmas)))
return -EINVAL;
if (pmas.area_num > MAX_PIN_MEM_AREA_NUM) {
pr_warn("Input area_num is too large.\n");
return -EINVAL;
}
return set_pin_mem(&pmas);
}
static int pin_mem_remap(unsigned long arg)
{
int pid;
struct task_struct *task;
struct mm_struct *mm;
vm_fault_t ret;
void __user *buf = (void __user *)arg;
struct pid *pid_s;
if (copy_from_user(&pid, buf, sizeof(int)))
return -EINVAL;
pid_s = find_get_pid(pid);
if (!pid_s) {
pr_warn("Get pid struct fail:%d.\n", pid);
return -EINVAL;
}
rcu_read_lock();
task = pid_task(pid_s, PIDTYPE_PID);
if (!task) {
pr_warn("Get task struct fail:%d.\n", pid);
goto fault;
}
mm = get_task_mm(task);
ret = do_mem_remap(pid, mm);
if (ret) {
pr_warn("Handle pin memory remap fail.\n");
mmput(mm);
goto fault;
}
mmput(mm);
rcu_read_unlock();
put_pid(pid_s);
return 0;
fault:
rcu_read_unlock();
put_pid(pid_s);
return -EFAULT;
}
static long pin_memory_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
long ret = 0;
if (_IOC_TYPE(cmd) != PIN_MEM_MAGIC)
return -EINVAL;
if (_IOC_NR(cmd) > _PIN_MEM_IOC_MAX_NR)
return -EINVAL;
switch (cmd) {
case SET_PIN_MEM_AREA:
ret = set_pin_mem_area(arg);
break;
case CLEAR_PIN_MEM_AREA:
clear_pin_memory_record();
break;
case REMAP_PIN_MEM_AREA:
ret = pin_mem_remap(arg);
break;
case FINISH_PIN_MEM_DUMP:
ret = finish_pin_mem_dump();
break;
case INIT_PAGEMAP_READ:
ret = init_pagemap_read();
break;
default:
return -EINVAL;
}
return ret;
}
static const struct file_operations pin_memory_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = pin_memory_ioctl,
.compat_ioctl = pin_memory_ioctl,
};
static struct miscdevice pin_memory_miscdev = {
.minor = MISC_DYNAMIC_MINOR,
.name = "pinmem",
.fops = &pin_memory_fops,
};
static int pin_memory_init(void)
{
int err = misc_register(&pin_memory_miscdev);
if (!err)
pr_info("pin_memory init\n");
else
pr_warn("pin_memory init failed!\n");
return err;
}
static void pin_memory_exit(void)
{
misc_deregister(&pin_memory_miscdev);
pr_info("pin_memory ko exists!\n");
}
module_init(pin_memory_init);
module_exit(pin_memory_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Euler");
MODULE_DESCRIPTION("pin memory");
......@@ -1665,6 +1665,144 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
return ret;
}
#ifdef CONFIG_PIN_MEMORY
static int get_pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
struct vm_area_struct *vma = walk->vma;
struct pagemapread *pm = walk->private;
spinlock_t *ptl;
pte_t *pte, *orig_pte;
int err = 0;
pagemap_entry_t pme;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
ptl = pmd_trans_huge_lock(pmdp, vma);
if (ptl) {
u64 flags = 0, frame = 0;
pmd_t pmd = *pmdp;
struct page *page = NULL;
if (pmd_present(pmd)) {
page = pmd_page(pmd);
flags |= PM_PRESENT;
frame = pmd_pfn(pmd) +
((addr & ~PMD_MASK) >> PAGE_SHIFT);
}
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
else if (is_swap_pmd(pmd)) {
swp_entry_t entry = pmd_to_swp_entry(pmd);
unsigned long offset;
offset = swp_offset(entry) +
((addr & ~PMD_MASK) >> PAGE_SHIFT);
frame = swp_type(entry) |
(offset << MAX_SWAPFILES_SHIFT);
flags |= PM_SWAP;
if (pmd_swp_soft_dirty(pmd))
flags |= PM_SOFT_DIRTY;
VM_BUG_ON(!is_pmd_migration_entry(pmd));
page = migration_entry_to_page(entry);
}
#endif
pme = make_pme(frame, flags);
err = add_to_pagemap(addr, &pme, pm);
spin_unlock(ptl);
return err;
}
if (pmd_trans_unstable(pmdp))
return 0;
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
for (; addr < end; pte++, addr += PAGE_SIZE) {
pme = pte_to_pagemap_entry(pm, vma, addr, *pte);
err = add_to_pagemap(addr, &pme, pm);
if (err)
break;
}
pte_unmap_unlock(orig_pte, ptl);
return err;
}
static const struct mm_walk_ops pin_pagemap_ops = {
.pmd_entry = get_pagemap_pmd_range,
.pte_hole = pagemap_pte_hole,
.hugetlb_entry = pagemap_hugetlb_range,
};
void *create_pagemap_walk(void)
{
struct pagemapread *pm;
struct mm_walk *pagemap_walk;
pagemap_walk = kzalloc(sizeof(struct mm_walk), GFP_KERNEL);
if (!pagemap_walk)
return NULL;
pm = kmalloc(sizeof(struct pagemapread), GFP_KERNEL);
if (!pm)
goto out_free_walk;
pm->show_pfn = true;
pm->len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT) + 1;
pm->buffer = kmalloc_array(pm->len, PM_ENTRY_BYTES, GFP_KERNEL);
if (!pm->buffer)
goto out_free;
pagemap_walk->ops = &pin_pagemap_ops;
pagemap_walk->private = pm;
return (void *)pagemap_walk;
out_free:
kfree(pm);
out_free_walk:
kfree(pagemap_walk);
return NULL;
}
void free_pagemap_walk(void *mem_walk)
{
struct pagemapread *pm;
struct mm_walk *pagemap_walk = (struct mm_walk *)mem_walk;
if (!pagemap_walk)
return;
if (pagemap_walk->private) {
pm = (struct pagemapread *)pagemap_walk->private;
kfree(pm->buffer);
kfree(pm);
pagemap_walk->private = NULL;
}
kfree(pagemap_walk);
}
int pagemap_get(struct mm_struct *mm, void *mem_walk,
unsigned long start_vaddr, unsigned long end_vaddr,
unsigned long *pte_entry, unsigned int *count)
{
int i, ret;
struct pagemapread *pm;
unsigned long end;
struct mm_walk *pagemap_walk = (struct mm_walk *)mem_walk;
if (!pte_entry || !mm || !pagemap_walk)
return -EFAULT;
pm = (struct pagemapread *)pagemap_walk->private;
pagemap_walk->mm = mm;
pm->pos = 0;
end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
if (end > end_vaddr)
end = end_vaddr;
ret = walk_page_range(mm, start_vaddr, end, pagemap_walk->ops, pm);
*count = pm->pos;
for (i = 0; i < pm->pos; i++)
pte_entry[i] = pm->buffer[i].pme;
return ret;
}
#endif
static int pagemap_open(struct inode *inode, struct file *file)
{
struct mm_struct *mm;
......
......@@ -87,4 +87,9 @@ int parse_crashkernel_high(char *cmdline, unsigned long long system_ram,
int parse_crashkernel_low(char *cmdline, unsigned long long system_ram,
unsigned long long *crash_size, unsigned long long *crash_base);
#ifdef CONFIG_PIN_MEMORY
int __init parse_pin_memory(char *cmdline, unsigned long long system_ram,
unsigned long long *pin_size, unsigned long long *pin_base);
#endif
#endif /* LINUX_CRASH_CORE_H */
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
* Provide the pin memory method for check point and restore task.
*/
#ifndef _LINUX_PIN_MEMORY_H
#define _LINUX_PIN_MEMORY_H
#ifdef CONFIG_PIN_MEMORY
#include <linux/errno.h>
#include <linux/mm_types.h>
#include <linux/err.h>
#ifdef CONFIG_ARM64
#include <linux/ioport.h>
#endif
#define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy)
#define COLLECT_PAGES_FINISH 0
#define COLLECT_PAGES_NEED_CONTINUE 1
#define COLLECT_PAGES_FAIL -1
#define COMPOUND_PAD_MASK 0xffffffff
#define COMPOUND_PAD_START 0x88
#define COMPOUND_PAD_DELTA 0x40
#define LIST_POISON4 0xdead000000000400
#define PAGE_FLAGS_CHECK_RESERVED (1UL << PG_reserved)
#define SHA256_DIGEST_SIZE 32
#define next_pme(pme) ((unsigned long *)((pme) + 1) + (pme)->nr_pages)
#define PIN_MEM_DUMP_MAGIC 0xfeab000000001acd
#define PM_PFRAME_BITS 55
#define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0)
#define PM_PRESENT BIT_ULL(63)
#define PM_SWAP BIT_ULL(62)
#define IS_PTE_PRESENT(entry) (((entry) & PM_PFRAME_MASK) && ((entry) & PM_PRESENT))
#define NEXT_PIN_ADDR(next, end_addr) (((next) + HPAGE_PMD_SIZE) > (end_addr) ? \
(end_addr) : ((next) + HPAGE_PMD_SIZE))
struct page_map_entry {
unsigned long virt_addr;
unsigned int nr_pages;
unsigned int is_huge_page;
unsigned long redirect_start;
unsigned long phy_addr_array[0];
};
struct page_map_info {
int pid;
int pid_reserved;
unsigned int entry_num;
int disable_free_page;
struct page_map_entry *pme;
};
struct pin_mem_dump_info {
char sha_digest[SHA256_DIGEST_SIZE];
unsigned long magic;
unsigned int pin_pid_num;
struct page_map_info pmi_array[0];
};
struct redirect_info {
unsigned int redirect_pages;
unsigned int redirect_index[0];
};
extern struct page_map_info *get_page_map_info_by_pid(int pid);
extern struct page_map_info *create_page_map_info_by_pid(int pid);
extern vm_fault_t do_mem_remap(int pid, struct mm_struct *mm);
extern vm_fault_t do_anon_page_remap(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmd, struct page *page);
extern void clear_pin_memory_record(void);
extern int pin_mem_area(struct task_struct *task, struct mm_struct *mm,
unsigned long start_addr, unsigned long end_addr);
extern vm_fault_t do_anon_huge_page_remap(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmd, struct page *page);
extern int finish_pin_mem_dump(void);
extern void *create_pagemap_walk(void);
extern void free_pagemap_walk(void *mem_walk);
extern int pagemap_get(struct mm_struct *mm, void *mem_walk,
unsigned long start_vaddr, unsigned long end_vaddr,
unsigned long *pte_entry, unsigned int *count);
extern int init_pagemap_read(void);
/* reserve space for pin memory*/
#ifdef CONFIG_ARM64
extern struct resource pin_memory_resource;
#endif
extern void init_reserve_page_map(unsigned long map_addr, unsigned long map_size);
#endif /* CONFIG_PIN_MEMORY */
#endif /* _LINUX_PIN_MEMORY_H */
......@@ -462,6 +462,17 @@ void __init reserve_crashkernel(void)
}
#endif /* CONFIG_ARCH_WANT_RESERVE_CRASH_KERNEL */
#ifdef CONFIG_PIN_MEMORY
int __init parse_pin_memory(char *cmdline,
unsigned long long system_ram,
unsigned long long *pin_size,
unsigned long long *pin_base)
{
return __parse_crashkernel(cmdline, system_ram, pin_size, pin_base,
"pinmemory=", NULL);
}
#endif
Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
void *data, size_t data_len)
{
......
......@@ -898,4 +898,12 @@ config ARCH_HAS_HUGEPD
config MAPPING_DIRTY_HELPERS
bool
config PIN_MEMORY
bool "Support for pin memory"
depends on MMU && ARM64
help
Say y here to enable the pin memory feature for checkpoint
and restore. We can pin the memory data of tasks and collect
the corresponding physical pages mapping info in checkpoint,
and remap the physical pages to restore tasks in restore.
endmenu
......@@ -122,3 +122,4 @@ obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o
obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o
obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o
obj-$(CONFIG_PIN_MEMORY) += pin_mem.o
......@@ -3015,3 +3015,66 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
update_mmu_cache_pmd(vma, address, pvmw->pmd);
}
#endif
#ifdef CONFIG_PIN_MEMORY
vm_fault_t do_anon_huge_page_remap(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmd, struct page *page)
{
gfp_t gfp;
pgtable_t pgtable;
spinlock_t *ptl;
pmd_t entry;
vm_fault_t ret = 0;
if (unlikely(anon_vma_prepare(vma)))
return VM_FAULT_OOM;
if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
return VM_FAULT_OOM;
gfp = alloc_hugepage_direct_gfpmask(vma);
prep_transhuge_page(page);
if (mem_cgroup_charge(page, vma->vm_mm, gfp)) {
put_page(page);
count_vm_event(THP_FAULT_FALLBACK);
count_vm_event(THP_FAULT_FALLBACK_CHARGE);
return VM_FAULT_FALLBACK;
}
cgroup_throttle_swaprate(page, gfp);
pgtable = pte_alloc_one(vma->vm_mm);
if (unlikely(!pgtable)) {
ret = VM_FAULT_OOM;
goto release;
}
__SetPageUptodate(page);
ptl = pmd_lock(vma->vm_mm, pmd);
if (unlikely(!pmd_none(*pmd))) {
goto unlock_release;
} else {
ret = check_stable_address_space(vma->vm_mm);
if (ret)
goto unlock_release;
entry = mk_huge_pmd(page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
page_add_new_anon_rmap(page, vma, address, true);
lru_cache_add_inactive_or_unevictable(page, vma);
pgtable_trans_huge_deposit(vma->vm_mm, pmd, pgtable);
set_pmd_at(vma->vm_mm, address, pmd, entry);
add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
mm_inc_nr_ptes(vma->vm_mm);
spin_unlock(ptl);
count_vm_event(THP_FAULT_ALLOC);
count_memcg_event_mm(vma->vm_mm, THP_FAULT_ALLOC);
}
return 0;
unlock_release:
spin_unlock(ptl);
release:
if (pgtable)
pte_free(vma->vm_mm, pgtable);
put_page(page);
return ret;
}
#endif
......@@ -5318,3 +5318,68 @@ void ptlock_free(struct page *page)
kmem_cache_free(page_ptl_cachep, page->ptl);
}
#endif
#ifdef CONFIG_PIN_MEMORY
vm_fault_t do_anon_page_remap(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmd, struct page *page)
{
pte_t entry;
spinlock_t *ptl;
pte_t *pte;
vm_fault_t ret = 0;
if (pte_alloc(vma->vm_mm, pmd))
return VM_FAULT_OOM;
/* See the comment in pte_alloc_one_map() */
if (unlikely(pmd_trans_unstable(pmd)))
return 0;
/* Allocate our own private page. */
if (unlikely(anon_vma_prepare(vma)))
goto oom;
if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL))
goto oom_free_page;
/*
* The memory barrier inside __SetPageUptodate makes sure that
* preceding stores to the page contents become visible before
* the set_pte_at() write.
*/
__SetPageUptodate(page);
entry = mk_pte(page, vma->vm_page_prot);
if (vma->vm_flags & VM_WRITE)
entry = pte_mkwrite(pte_mkdirty(entry));
pte = pte_offset_map_lock(vma->vm_mm, pmd, address,
&ptl);
if (!pte_none(*pte)) {
ret = VM_FAULT_FALLBACK;
goto release;
}
ret = check_stable_address_space(vma->vm_mm);
if (ret)
goto release;
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address, false);
lru_cache_add_inactive_or_unevictable(page, vma);
set_pte_at(vma->vm_mm, address, pte, entry);
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, address, pte);
unlock:
pte_unmap_unlock(pte, ptl);
return ret;
release:
put_page(page);
goto unlock;
oom_free_page:
put_page(page);
oom:
return VM_FAULT_OOM;
}
#endif
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册