From 094eaabb3fe8922793cf7d704023f43784319f15 Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Wed, 9 Feb 2022 15:36:23 +0800 Subject: [PATCH] proc: Count reliable memory usage of reliable tasks hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA ---------------------------------------------- Counting reliable memory allocated by the reliable user tasks. The policy of counting reliable memory usage is based on RSS statistics. Anywhere with counter of mm need count reliable pages too. Reliable page which is checked by page_reliable() need to update the reliable page counter by calling reliable_page_counter(). Updating the reliable pages should be considered if the following logic is added: - add_mm_counter - dec_mm_counter - inc_mm_counter_fast - dec_mm_counter_fast - rss[mm_counter(page)] Signed-off-by: Peng Wu Signed-off-by: Ma Wupeng Reviewed-by: Kefeng Wang Signed-off-by: Yang Yingliang --- Documentation/filesystems/proc.txt | 2 ++ fs/proc/task_mmu.c | 1 + include/linux/mem_reliable.h | 17 +++++++++++++++++ kernel/events/uprobes.c | 2 ++ mm/huge_memory.c | 8 ++++++++ mm/khugepaged.c | 1 + mm/ksm.c | 1 + mm/mem_reliable.c | 15 ++++++++++++++- mm/memory.c | 16 ++++++++++++++++ mm/migrate.c | 1 + mm/rmap.c | 5 +++++ mm/shmem.c | 1 + mm/swapfile.c | 1 + mm/userfaultfd.c | 1 + 14 files changed, 71 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 690db5b3eb53..1ef781f33b37 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -181,6 +181,7 @@ read the file /proc/PID/status: VmPTE: 20 kb VmSwap: 0 kB HugetlbPages: 0 kB + Reliable: 1608 KB CoreDumping: 0 Threads: 1 SigQ: 0/28578 @@ -254,6 +255,7 @@ Table 1-2: Contents of the status files (as of 4.8) VmSwap amount of swap used by anonymous private data (shmem swap usage is not included) HugetlbPages size of hugetlb memory portions + Reliable size of reliable memory used CoreDumping process's memory is currently being dumped (killing the process may lead to a corrupted core) Threads number of threads diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 495044e1990b..78ce353d0dfa 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -77,6 +77,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) SEQ_PUT_DEC(" kB\nVmSwap:\t", swap); seq_puts(m, " kB\n"); hugetlb_report_usage(m, mm); + reliable_report_usage(m, mm); } #undef SEQ_PUT_DEC diff --git a/include/linux/mem_reliable.h b/include/linux/mem_reliable.h index 31be68fac330..a18a843c7b52 100644 --- a/include/linux/mem_reliable.h +++ b/include/linux/mem_reliable.h @@ -14,11 +14,14 @@ extern struct static_key_false mem_reliable; extern bool reliable_enabled; +extern atomic_long_t reliable_user_used_nr_page; extern void add_reliable_mem_size(long sz); extern void mem_reliable_init(bool has_unmirrored_mem, unsigned long *zone_movable_pfn); extern void reliable_report_meminfo(struct seq_file *m); +extern bool page_reliable(struct page *page); +extern void reliable_report_usage(struct seq_file *m, struct mm_struct *mm); static inline bool mem_reliable_is_enabled(void) { @@ -47,6 +50,15 @@ static inline bool skip_none_movable_zone(gfp_t gfp, struct zoneref *z) return false; } + +static inline void reliable_page_counter(struct page *page, + struct mm_struct *mm, int val) +{ + if (page_reliable(page)) { + atomic_long_add(val, &mm->reliable_nr_page); + atomic_long_add(val, &reliable_user_used_nr_page); + } +} #else #define reliable_enabled 0 @@ -60,6 +72,11 @@ static inline bool skip_none_movable_zone(gfp_t gfp, struct zoneref *z) return false; } static inline void reliable_report_meminfo(struct seq_file *m) {} +static inline bool page_reliable(struct page *page) { return false; } +static inline void reliable_page_counter(struct page *page, + struct mm_struct *mm, int val) {} +static inline void reliable_report_usage(struct seq_file *m, + struct mm_struct *mm) {} #endif diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index c173e4131df8..de64e2983082 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -191,7 +191,9 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, if (!PageAnon(old_page)) { dec_mm_counter(mm, mm_counter_file(old_page)); + reliable_page_counter(old_page, mm, -1); inc_mm_counter(mm, MM_ANONPAGES); + reliable_page_counter(new_page, mm, 1); } flush_cache_page(vma, addr, pte_pfn(*pvmw.pte)); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 31f1c580ba9c..f8319265c1cf 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -673,6 +673,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf, pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable); set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); + reliable_page_counter(page, vma->vm_mm, HPAGE_PMD_NR); mm_inc_nr_ptes(vma->vm_mm); spin_unlock(vmf->ptl); count_vm_event(THP_FAULT_ALLOC); @@ -1080,6 +1081,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, get_page(src_page); page_dup_rmap(src_page, true); add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); + reliable_page_counter(src_page, dst_mm, HPAGE_PMD_NR); mm_inc_nr_ptes(dst_mm); pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); @@ -1468,6 +1470,8 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd) update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); if (!page) { add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); + reliable_page_counter(new_page, vma->vm_mm, + HPAGE_PMD_NR); } else { VM_BUG_ON_PAGE(!PageHead(page), page); page_remove_rmap(page, true); @@ -1850,10 +1854,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, if (PageAnon(page)) { zap_deposited_table(tlb->mm, pmd); add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); + reliable_page_counter(page, tlb->mm, -HPAGE_PMD_NR); } else { if (arch_needs_pgtable_deposit()) zap_deposited_table(tlb->mm, pmd); add_mm_counter(tlb->mm, mm_counter_file(page), -HPAGE_PMD_NR); + reliable_page_counter(page, tlb->mm, -HPAGE_PMD_NR); } spin_unlock(ptl); @@ -2209,6 +2215,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, put_page(page); } add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR); + reliable_page_counter(page, mm, -HPAGE_PMD_NR); return; } @@ -3170,6 +3177,7 @@ vm_fault_t do_anon_huge_page_remap(struct vm_area_struct *vma, unsigned long add pgtable_trans_huge_deposit(vma->vm_mm, pmd, pgtable); set_pmd_at(vma->vm_mm, address, pmd, entry); add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); + reliable_page_counter(page, vma->vm_mm, HPAGE_PMD_NR); mm_inc_nr_ptes(vma->vm_mm); spin_unlock(ptl); count_vm_event(THP_FAULT_ALLOC); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 55f171ed2d08..5ac248632752 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -648,6 +648,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { clear_user_highpage(page, address); add_mm_counter(vma->vm_mm, MM_ANONPAGES, 1); + reliable_page_counter(page, vma->vm_mm, 1); if (is_zero_pfn(pte_pfn(pteval))) { /* * ptl mostly unnecessary. diff --git a/mm/ksm.c b/mm/ksm.c index 9749729a5381..b656fa77f92f 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1184,6 +1184,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, * when tearing down the mm. */ dec_mm_counter(mm, MM_ANONPAGES); + reliable_page_counter(page, mm, -1); } flush_cache_page(vma, addr, pte_pfn(*ptep)); diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c index c03c77090cf5..d6aec0863892 100644 --- a/mm/mem_reliable.c +++ b/mm/mem_reliable.c @@ -12,14 +12,19 @@ DEFINE_STATIC_KEY_FALSE(mem_reliable); bool reliable_enabled; - static atomic_long_t total_reliable_mem; +atomic_long_t reliable_user_used_nr_page; void add_reliable_mem_size(long sz) { atomic_long_add(sz, &total_reliable_mem); } +bool page_reliable(struct page *page) +{ + return mem_reliable_is_enabled() && page_zonenum(page) < ZONE_MOVABLE; +} + static int reliable_mem_notifier(struct notifier_block *nb, unsigned long action, void *arg) { @@ -105,3 +110,11 @@ void reliable_report_meminfo(struct seq_file *m) used_reliable_mem_sz() >> 10); } } + +void reliable_report_usage(struct seq_file *m, struct mm_struct *mm) +{ + if (mem_reliable_is_enabled()) { + seq_printf(m, "Reliable:\t%8lu kB\n", + atomic_long_read(&mm->reliable_nr_page)); + } +} diff --git a/mm/memory.c b/mm/memory.c index 054e62292902..d4853970a7c1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -740,6 +740,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, rss[mm_counter(page)]++; + reliable_page_counter(page, dst_mm, 1); if (is_write_migration_entry(entry) && is_cow_mapping(vm_flags)) { /* @@ -766,6 +767,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, */ get_page(page); rss[mm_counter(page)]++; + reliable_page_counter(page, dst_mm, 1); page_dup_rmap(page, false); /* @@ -807,6 +809,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, get_page(page); page_dup_rmap(page, false); rss[mm_counter(page)]++; + reliable_page_counter(page, dst_mm, 1); } else if (pte_devmap(pte)) { page = pte_page(pte); @@ -819,6 +822,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, get_page(page); page_dup_rmap(page, false); rss[mm_counter(page)]++; + reliable_page_counter(page, dst_mm, 1); } } @@ -1102,6 +1106,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, mark_page_accessed(page); } rss[mm_counter(page)]--; + reliable_page_counter(page, mm, -1); page_remove_rmap(page, false); if (unlikely(page_mapcount(page) < 0)) print_bad_pte(vma, addr, ptent, page); @@ -1130,6 +1135,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); rss[mm_counter(page)]--; + reliable_page_counter(page, mm, -1); page_remove_rmap(page, false); put_page(page); continue; @@ -1147,6 +1153,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, page = migration_entry_to_page(entry); rss[mm_counter(page)]--; + reliable_page_counter(page, mm, -1); } if (unlikely(!free_swap_and_cache(entry))) print_bad_pte(vma, addr, ptent, NULL); @@ -1490,6 +1497,7 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr, /* Ok, finally just insert the thing.. */ get_page(page); inc_mm_counter_fast(mm, mm_counter_file(page)); + reliable_page_counter(page, mm, 1); page_add_file_rmap(page, false); set_pte_at(mm, addr, pte, mk_pte(page, prot)); @@ -2489,10 +2497,13 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) if (!PageAnon(old_page)) { dec_mm_counter_fast(mm, mm_counter_file(old_page)); + reliable_page_counter(old_page, mm, -1); inc_mm_counter_fast(mm, MM_ANONPAGES); + reliable_page_counter(new_page, mm, 1); } } else { inc_mm_counter_fast(mm, MM_ANONPAGES); + reliable_page_counter(new_page, mm, 1); } flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); entry = mk_pte(new_page, vma->vm_page_prot); @@ -3051,6 +3062,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) */ inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); + reliable_page_counter(page, vma->vm_mm, 1); dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS); pte = mk_pte(page, vma->vm_page_prot); if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) { @@ -3216,6 +3228,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) } inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); + reliable_page_counter(page, vma->vm_mm, 1); page_add_new_anon_rmap(page, vma, vmf->address, false); mem_cgroup_commit_charge(page, memcg, false, false); lru_cache_add_active_or_unevictable(page, vma); @@ -3416,6 +3429,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR); + reliable_page_counter(page, vma->vm_mm, HPAGE_PMD_NR); page_add_file_rmap(page, true); /* * deposit and withdraw with pmd lock held @@ -3489,6 +3503,7 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, if (write) entry = maybe_mkwrite(pte_mkdirty(entry), vma); /* copy-on-write page */ + reliable_page_counter(page, vma->vm_mm, 1); if (write && !(vma->vm_flags & VM_SHARED)) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, vmf->address, false); @@ -4910,6 +4925,7 @@ vm_fault_t do_anon_page_remap(struct vm_area_struct *vma, unsigned long address, if (ret) goto release; inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); + reliable_page_counter(page, vma->vm_mm, 1); page_add_new_anon_rmap(page, vma, address, false); mem_cgroup_commit_charge(page, memcg, false, false); lru_cache_add_active_or_unevictable(page, vma); diff --git a/mm/migrate.c b/mm/migrate.c index 90aa493faa60..eb27e8e2bf21 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2714,6 +2714,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate, } inc_mm_counter(mm, MM_ANONPAGES); + reliable_page_counter(page, mm, 1); page_add_new_anon_rmap(page, vma, addr, false); mem_cgroup_commit_charge(page, memcg, false, false); if (!is_zone_device_page(page)) diff --git a/mm/rmap.c b/mm/rmap.c index 7debdf0cc678..224fac084ad0 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1548,6 +1548,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, vma_mmu_pagesize(vma)); } else { dec_mm_counter(mm, mm_counter(page)); + reliable_page_counter(page, mm, -1); set_pte_at(mm, address, pvmw.pte, pteval); } @@ -1563,6 +1564,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * copied pages. */ dec_mm_counter(mm, mm_counter(page)); + reliable_page_counter(page, mm, -1); /* We have to invalidate as we cleared the pte */ mmu_notifier_invalidate_range(mm, address, address + PAGE_SIZE); @@ -1617,6 +1619,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, mmu_notifier_invalidate_range(mm, address, address + PAGE_SIZE); dec_mm_counter(mm, MM_ANONPAGES); + reliable_page_counter(page, mm, -1); goto discard; } @@ -1650,6 +1653,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, spin_unlock(&mmlist_lock); } dec_mm_counter(mm, MM_ANONPAGES); + reliable_page_counter(page, mm, -1); inc_mm_counter(mm, MM_SWAPENTS); swp_pte = swp_entry_to_pte(entry); if (pte_soft_dirty(pteval)) @@ -1670,6 +1674,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * See Documentation/vm/mmu_notifier.rst */ dec_mm_counter(mm, mm_counter_file(page)); + reliable_page_counter(page, mm, -1); } discard: /* diff --git a/mm/shmem.c b/mm/shmem.c index 8d32d49a4d7b..16bb7806a25e 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2473,6 +2473,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, spin_unlock_irq(&info->lock); inc_mm_counter(dst_mm, mm_counter_file(page)); + reliable_page_counter(page, dst_mm, 1); page_add_file_rmap(page, false); set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); diff --git a/mm/swapfile.c b/mm/swapfile.c index 4028994a51ae..2619729400d3 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1869,6 +1869,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, dec_mm_counter(vma->vm_mm, MM_SWAPENTS); inc_mm_counter(vma->vm_mm, MM_ANONPAGES); + reliable_page_counter(page, vma->vm_mm, 1); get_page(page); set_pte_at(vma->vm_mm, addr, pte, pte_mkold(mk_pte(page, vma->vm_page_prot))); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 1c86abd41c6d..c26dd2040624 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -116,6 +116,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, #endif inc_mm_counter(dst_mm, MM_ANONPAGES); + reliable_page_counter(page, dst_mm, 1); page_add_new_anon_rmap(page, dst_vma, dst_addr, false); mem_cgroup_commit_charge(page, memcg, false, false); lru_cache_add_active_or_unevictable(page, dst_vma); -- GitLab