diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index a57d96cf46440ab8c5324f4bf1538ee4ccc1daf2..2fa2f7cd12876895454d3684b3fa0ca0c8cd617c 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -195,6 +195,7 @@ read the file /proc/PID/status:: VmPTE: 20 kb VmSwap: 0 kB HugetlbPages: 0 kB + Reliable: 1608 kB CoreDumping: 0 THP_enabled: 1 Threads: 1 @@ -275,6 +276,7 @@ It's slow but very precise. VmSwap amount of swap used by anonymous private data (shmem swap usage is not included) HugetlbPages size of hugetlb memory portions + Reliable size of reliable memory used CoreDumping process's memory is currently being dumped (killing the process may lead to a corrupted core) THP_enabled process is allowed to use THP (returns 0 when diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 391b967fcfbfdb69d2f4d8c3a48d6dff1742c353..15f989844389f3f6e8d5c25938c4440bb5c5299f 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -77,6 +77,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) SEQ_PUT_DEC(" kB\nVmSwap:\t", swap); seq_puts(m, " kB\n"); hugetlb_report_usage(m, mm); + reliable_report_usage(m, mm); } #undef SEQ_PUT_DEC diff --git a/include/linux/mem_reliable.h b/include/linux/mem_reliable.h index 5e14980d5793d65d4582a3abedcbb27aafc7b9a8..ddadf28037429a37473bced527391530e7886921 100644 --- a/include/linux/mem_reliable.h +++ b/include/linux/mem_reliable.h @@ -23,6 +23,7 @@ extern bool pagecache_use_reliable_mem; extern struct percpu_counter pagecache_reliable_pages; extern struct percpu_counter anon_reliable_pages; extern unsigned long task_reliable_limit __read_mostly; +extern atomic_long_t reliable_user_used_nr_page; extern void mem_reliable_init(bool has_unmirrored_mem, unsigned long *zone_movable_pfn, @@ -39,6 +40,8 @@ extern bool mem_reliable_counter_initialized(void); extern void mem_reliable_out_of_memory(gfp_t gfp_mask, unsigned int order, int preferred_nid, nodemask_t *nodemask); extern void reliable_show_mem_info(void); +extern void reliable_report_usage(struct seq_file *m, + struct mm_struct *mm); static inline bool mem_reliable_is_enabled(void) { @@ -125,6 +128,13 @@ static inline bool reliable_allow_fb_enabled(void) { return reliable_allow_fallback; } + +static inline void reliable_page_counter(struct page *page, + struct mm_struct *mm, int val) +{ + if (page_reliable(page)) + atomic_long_add(val, &mm->reliable_nr_page); +} #else #define reliable_enabled 0 #define pagecache_use_reliable_mem 0 @@ -164,6 +174,10 @@ static inline void mem_reliable_out_of_memory(gfp_t gfp_mask, nodemask_t *nodemask) {} static inline bool reliable_allow_fb_enabled(void) { return false; } static inline void reliable_show_mem_info(void) {} +static inline void reliable_page_counter(struct page *page, + struct mm_struct *mm, int val) {} +static inline void reliable_report_usage(struct seq_file *m, + struct mm_struct *mm) {} #endif #endif diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index e1bbb3b92921d8eb084d2844cd2ae85ebaf5e14f..ad6664fcc3b2415963c63d66bed42ad8914c0532 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -183,6 +183,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, if (new_page) { get_page(new_page); + reliable_page_counter(new_page, mm, 1); page_add_new_anon_rmap(new_page, vma, addr, false); lru_cache_add_inactive_or_unevictable(new_page, vma); } else @@ -194,6 +195,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, inc_mm_counter(mm, MM_ANONPAGES); } + reliable_page_counter(old_page, mm, -1); flush_cache_page(vma, addr, pte_pfn(*pvmw.pte)); ptep_clear_flush_notify(vma, addr, pvmw.pte); if (new_page) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 79c855b5adada38b20008bd739f272259b8a25f3..fdd617e8197dd1ed820d47b3dae356dc3b916e5c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -652,6 +652,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf, pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable); set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); + reliable_page_counter(page, vma->vm_mm, HPAGE_PMD_NR); mm_inc_nr_ptes(vma->vm_mm); spin_unlock(vmf->ptl); count_vm_event(THP_FAULT_ALLOC); @@ -1115,6 +1116,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, get_page(src_page); page_dup_rmap(src_page, true); add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); + reliable_page_counter(src_page, dst_mm, HPAGE_PMD_NR); out_zero_page: mm_inc_nr_ptes(dst_mm); pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); @@ -1696,6 +1698,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, if (pmd_present(orig_pmd)) { page = pmd_page(orig_pmd); + reliable_page_counter(page, tlb->mm, -HPAGE_PMD_NR); page_remove_rmap(page, true); VM_BUG_ON_PAGE(page_mapcount(page) < 0, page); VM_BUG_ON_PAGE(!PageHead(page), page); @@ -2077,6 +2080,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, set_page_dirty(page); if (!PageReferenced(page) && pmd_young(old_pmd)) SetPageReferenced(page); + reliable_page_counter(page, mm, -HPAGE_PMD_NR); page_remove_rmap(page, true); put_page(page); } @@ -2212,6 +2216,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, if (freeze) { for (i = 0; i < HPAGE_PMD_NR; i++) { + reliable_page_counter(page + i, mm, -1); page_remove_rmap(page + i, false); put_page(page + i); } @@ -3006,6 +3011,7 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, if (pmd_soft_dirty(pmdval)) pmdswp = pmd_swp_mksoft_dirty(pmdswp); set_pmd_at(mm, address, pvmw->pmd, pmdswp); + reliable_page_counter(page, mm, -HPAGE_PMD_NR); page_remove_rmap(page, true); put_page(page); } @@ -3033,6 +3039,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) pmde = pmd_wrprotect(pmd_mkuffd_wp(pmde)); flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE); + reliable_page_counter(new, mm, HPAGE_PMD_NR); if (PageAnon(new)) page_add_anon_rmap(new, vma, mmun_start, true); else @@ -3089,6 +3096,7 @@ vm_fault_t do_anon_huge_page_remap(struct vm_area_struct *vma, unsigned long add pgtable_trans_huge_deposit(vma->vm_mm, pmd, pgtable); set_pmd_at(vma->vm_mm, address, pmd, entry); add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); + reliable_page_counter(page, vma->vm_mm, HPAGE_PMD_NR); mm_inc_nr_ptes(vma->vm_mm); spin_unlock(ptl); count_vm_event(THP_FAULT_ALLOC); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index c1346c93358666f784a6f7b912313806586cb96b..aaef16aa8945cbb2243c11c8d42c1f437660b5ff 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -748,6 +748,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { clear_user_highpage(page, address); add_mm_counter(vma->vm_mm, MM_ANONPAGES, 1); + reliable_page_counter(page, vma->vm_mm, 1); if (is_zero_pfn(pte_pfn(pteval))) { /* * ptl mostly unnecessary. @@ -776,6 +777,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, * superfluous. */ pte_clear(vma->vm_mm, address, _pte); + reliable_page_counter(src_page, vma->vm_mm, -1); page_remove_rmap(src_page, false); spin_unlock(ptl); free_page_and_swap_cache(src_page); @@ -1202,6 +1204,7 @@ static void collapse_huge_page(struct mm_struct *mm, spin_lock(pmd_ptl); BUG_ON(!pmd_none(*pmd)); + reliable_page_counter(new_page, vma->vm_mm, HPAGE_PMD_NR); page_add_new_anon_rmap(new_page, vma, address, true); lru_cache_add_inactive_or_unevictable(new_page, vma); pgtable_trans_huge_deposit(mm, pmd, pgtable); @@ -1509,6 +1512,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) if (pte_none(*pte)) continue; page = vm_normal_page(vma, addr, *pte); + reliable_page_counter(page, mm, -1); page_remove_rmap(page, false); } diff --git a/mm/ksm.c b/mm/ksm.c index 582c02058baf94e30c511b1faf9d4130aa4de6fe..169c0da1a9db84e072b1db9191762e0b5e172563 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1155,6 +1155,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, */ if (!is_zero_pfn(page_to_pfn(kpage))) { get_page(kpage); + reliable_page_counter(kpage, mm, 1); page_add_anon_rmap(kpage, vma, addr, false); newpte = mk_pte(kpage, vma->vm_page_prot); } else { @@ -1179,6 +1180,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, ptep_clear_flush(vma, addr, ptep); set_pte_at_notify(mm, addr, ptep, newpte); + reliable_page_counter(page, mm, -1); page_remove_rmap(page, false); if (!page_mapped(page)) try_to_free_swap(page); diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c index 3f6ed837824f3f0d3646f5b6aa04d27dade9f537..4540cfc76489e9778e6b355b03a63c7497675f96 100644 --- a/mm/mem_reliable.c +++ b/mm/mem_reliable.c @@ -33,6 +33,7 @@ static unsigned long reliable_pagecache_max_bytes = ULONG_MAX; /* reliable user limit for user tasks with reliable flag */ unsigned long task_reliable_limit = ULONG_MAX; long shmem_reliable_nr_page = ULONG_MAX >> PAGE_SHIFT; +atomic_long_t reliable_user_used_nr_page; bool mem_reliable_counter_initialized(void) { @@ -178,6 +179,7 @@ void reliable_report_meminfo(struct seq_file *m) show_val_kb(m, "ReliableTotal: ", total_reliable_pages()); show_val_kb(m, "ReliableUsed: ", used_reliable_pages()); + show_val_kb(m, "ReliableTaskUsed: ", task_reliable_used_pages()); show_val_kb(m, "ReliableBuddyMem: ", free_reliable_pages()); if (shmem_reliable_is_enabled()) { @@ -514,3 +516,14 @@ static int __init setup_reliable_debug(char *str) return 1; } __setup("reliable_debug", setup_reliable_debug); + +#define SEQ_PUT_DEC(str, val) \ + seq_put_decimal_ull_width(m, str, (val) << (PAGE_SHIFT-10), 8) +void reliable_report_usage(struct seq_file *m, struct mm_struct *mm) +{ + if (!mem_reliable_is_enabled()) + return; + + SEQ_PUT_DEC("Reliable:\t", atomic_long_read(&mm->reliable_nr_page)); + seq_puts(m, "kB\n"); +} diff --git a/mm/memory.c b/mm/memory.c index 3667ec456ace4842245fc32607e0e3c94fcd08a0..e5ad19b8eb60f4bc946376abd1d5ef2813261c0c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -834,6 +834,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma *prealloc = NULL; copy_user_highpage(new_page, page, addr, src_vma); __SetPageUptodate(new_page); + reliable_page_counter(new_page, dst_vma->vm_mm, 1); page_add_new_anon_rmap(new_page, dst_vma, addr, false); lru_cache_add_inactive_or_unevictable(new_page, dst_vma); rss[mm_counter(new_page)]++; @@ -1273,6 +1274,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, mark_page_accessed(page); } rss[mm_counter(page)]--; + reliable_page_counter(page, mm, -1); page_remove_rmap(page, false); if (unlikely(page_mapcount(page) < 0)) print_bad_pte(vma, addr, ptent, page); @@ -1300,6 +1302,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, } pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); + reliable_page_counter(page, mm, -1); rss[mm_counter(page)]--; page_remove_rmap(page, false); put_page(page); @@ -1664,6 +1667,7 @@ static int insert_page_into_pte_locked(struct mm_struct *mm, pte_t *pte, /* Ok, finally just insert the thing.. */ get_page(page); inc_mm_counter_fast(mm, mm_counter_file(page)); + reliable_page_counter(page, mm, 1); page_add_file_rmap(page, false); set_pte_at(mm, addr, pte, mk_pte(page, prot)); return 0; @@ -2942,9 +2946,12 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) mm_counter_file(old_page)); inc_mm_counter_fast(mm, MM_ANONPAGES); } + reliable_page_counter(old_page, mm, -1); } else { inc_mm_counter_fast(mm, MM_ANONPAGES); } + + reliable_page_counter(new_page, mm, 1); flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); entry = mk_pte(new_page, vma->vm_page_prot); entry = pte_sw_mkyoung(entry); @@ -3514,6 +3521,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) */ inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); + reliable_page_counter(page, vma->vm_mm, 1); dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS); pte = mk_pte(page, vma->vm_page_prot); if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) { @@ -3682,6 +3690,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) } inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); + reliable_page_counter(page, vma->vm_mm, 1); page_add_new_anon_rmap(page, vma, vmf->address, false); lru_cache_add_inactive_or_unevictable(page, vma); setpte: @@ -3876,6 +3885,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR); + reliable_page_counter(page, vma->vm_mm, HPAGE_PMD_NR); page_add_file_rmap(page, true); /* * deposit and withdraw with pmd lock held @@ -3948,6 +3958,7 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page) if (write) entry = maybe_mkwrite(pte_mkdirty(entry), vma); /* copy-on-write page */ + reliable_page_counter(page, vma->vm_mm, 1); if (write && !(vma->vm_flags & VM_SHARED)) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, vmf->address, false); @@ -5428,6 +5439,7 @@ vm_fault_t do_anon_page_remap(struct vm_area_struct *vma, unsigned long address, if (ret) goto release; inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); + reliable_page_counter(page, vma->vm_mm, 1); page_add_new_anon_rmap(page, vma, address, false); lru_cache_add_inactive_or_unevictable(page, vma); diff --git a/mm/migrate.c b/mm/migrate.c index 6f358153843a7c026f50b7a780b925e9daf595e3..1f78410a10635a5effa89cc09c86c85e079f1b2b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -269,6 +269,7 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, { set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte); + reliable_page_counter(new, vma->vm_mm, 1); if (PageAnon(new)) page_add_anon_rmap(new, vma, pvmw.address, false); else @@ -2205,6 +2206,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, * new page and page_add_new_anon_rmap guarantee the copy is * visible before the pagetable update. */ + reliable_page_counter(new_page, vma->vm_mm, HPAGE_PMD_NR); page_add_anon_rmap(new_page, vma, start, true); /* * At this point the pmd is numa/protnone (i.e. non present) and the TLB @@ -2222,6 +2224,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, page_ref_unfreeze(page, 2); mlock_migrate_page(new_page, page); + reliable_page_counter(page, vma->vm_mm, -HPAGE_PMD_NR); page_remove_rmap(page, true); set_page_owner_migrate_reason(new_page, MR_NUMA_MISPLACED); @@ -2466,6 +2469,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, * drop page refcount. Page won't be freed, as we took * a reference just above. */ + reliable_page_counter(page, mm, -1); page_remove_rmap(page, false); put_page(page); @@ -2958,6 +2962,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate, goto unlock_abort; inc_mm_counter(mm, MM_ANONPAGES); + reliable_page_counter(page, mm, 1); page_add_new_anon_rmap(page, vma, addr, false); if (!is_zone_device_page(page)) lru_cache_add_inactive_or_unevictable(page, vma); diff --git a/mm/mmap.c b/mm/mmap.c index 515d668e130170ff5656c76ac7d162dc34a091e7..1859f39d2af8e9efda6bbb4da7f83a84773fc727 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1751,6 +1751,7 @@ do_user_swap(struct mm_struct *mm, unsigned long addr_start, unsigned long len, set_pte(pte, swp_entry_to_pte(swp_entry(SWP_USERSWAP_ENTRY, page_to_pfn(page)))); dec_mm_counter(mm, MM_ANONPAGES); + reliable_page_counter(page, mm, -1); page_remove_rmap(page, false); put_page(page); diff --git a/mm/rmap.c b/mm/rmap.c index 0dc39cf94345da8f16d51616b88b33ba60268d6b..9719d73bd5fc5f5c630604dfaee376bbb0e7d116 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1591,6 +1591,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, vma_mmu_pagesize(vma)); } else { dec_mm_counter(mm, mm_counter(page)); + reliable_page_counter(page, mm, -1); set_pte_at(mm, address, pvmw.pte, pteval); } @@ -1606,6 +1607,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * copied pages. */ dec_mm_counter(mm, mm_counter(page)); + reliable_page_counter(page, mm, -1); /* We have to invalidate as we cleared the pte */ mmu_notifier_invalidate_range(mm, address, address + PAGE_SIZE); @@ -1685,6 +1687,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, mmu_notifier_invalidate_range(mm, address, address + PAGE_SIZE); dec_mm_counter(mm, MM_ANONPAGES); + reliable_page_counter(page, mm, -1); goto discard; } @@ -1718,6 +1721,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, spin_unlock(&mmlist_lock); } dec_mm_counter(mm, MM_ANONPAGES); + reliable_page_counter(page, mm, -1); inc_mm_counter(mm, MM_SWAPENTS); swp_pte = swp_entry_to_pte(entry); if (pte_soft_dirty(pteval)) @@ -1740,6 +1744,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * See Documentation/vm/mmu_notifier.rst */ dec_mm_counter(mm, mm_counter_file(page)); + reliable_page_counter(page, mm, -1); } discard: /* diff --git a/mm/shmem.c b/mm/shmem.c index fbddc7dfb72e13301af37294895c4f0c9ff5d05f..e85ac8c2150f47a3ece917699424bde6b940a16a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2467,6 +2467,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, spin_unlock_irq(&info->lock); inc_mm_counter(dst_mm, mm_counter_file(page)); + reliable_page_counter(page, dst_mm, 1); page_add_file_rmap(page, false); set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); diff --git a/mm/swapfile.c b/mm/swapfile.c index eaf483c7c83e7691297d12b818bfe20ffa1104e8..7faa30f460e40c6c68e4a5c32b441a17f5702056 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1935,6 +1935,8 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, get_page(page); set_pte_at(vma->vm_mm, addr, pte, pte_mkold(mk_pte(page, vma->vm_page_prot))); + + reliable_page_counter(page, vma->vm_mm, 1); if (page == swapcache) { page_add_anon_rmap(page, vma, addr, false); } else { /* ksm created a completely new copy */ diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 3849b28c09527f1174495b2529aa877b204616fc..15c46208a2accb1b097f9ff6412b2b63f7400c4a 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -150,6 +150,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, #endif inc_mm_counter(dst_mm, MM_ANONPAGES); + reliable_page_counter(page, dst_mm, 1); page_add_new_anon_rmap(page, dst_vma, dst_addr, false); lru_cache_add_inactive_or_unevictable(page, dst_vma);