diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt index 9b53d5827361fd647f3388212e648f502defc698..1c07547d3f81f28a19edb9de8752f8865b44478d 100644 --- a/Documentation/controllers/memory.txt +++ b/Documentation/controllers/memory.txt @@ -112,14 +112,22 @@ the per cgroup LRU. 2.2.1 Accounting details -All mapped pages (RSS) and unmapped user pages (Page Cache) are accounted. -RSS pages are accounted at the time of page_add_*_rmap() unless they've already -been accounted for earlier. A file page will be accounted for as Page Cache; -it's mapped into the page tables of a process, duplicate accounting is carefully -avoided. Page Cache pages are accounted at the time of add_to_page_cache(). -The corresponding routines that remove a page from the page tables or removes -a page from Page Cache is used to decrement the accounting counters of the -cgroup. +All mapped anon pages (RSS) and cache pages (Page Cache) are accounted. +(some pages which never be reclaimable and will not be on global LRU + are not accounted. we just accounts pages under usual vm management.) + +RSS pages are accounted at page_fault unless they've already been accounted +for earlier. A file page will be accounted for as Page Cache when it's +inserted into inode (radix-tree). While it's mapped into the page tables of +processes, duplicate accounting is carefully avoided. + +A RSS page is unaccounted when it's fully unmapped. A PageCache page is +unaccounted when it's removed from radix-tree. + +At page migration, accounting information is kept. + +Note: we just account pages-on-lru because our purpose is to control amount +of used pages. not-on-lru pages are tend to be out-of-control from vm view. 2.3 Shared Page Accounting diff --git a/mm/memory.c b/mm/memory.c index 54cf20ee0a83c6a3b6af2041f756a6d0aa3ae20a..3a6c4a6583256584303c4ac7c8813938abc49ed0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1323,18 +1323,14 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr, pte_t *pte; spinlock_t *ptl; - retval = mem_cgroup_charge(page, mm, GFP_KERNEL); - if (retval) - goto out; - retval = -EINVAL; if (PageAnon(page)) - goto out_uncharge; + goto out; retval = -ENOMEM; flush_dcache_page(page); pte = get_locked_pte(mm, addr, &ptl); if (!pte) - goto out_uncharge; + goto out; retval = -EBUSY; if (!pte_none(*pte)) goto out_unlock; @@ -1350,8 +1346,6 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr, return retval; out_unlock: pte_unmap_unlock(pte, ptl); -out_uncharge: - mem_cgroup_uncharge_page(page); out: return retval; } @@ -2463,6 +2457,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page; pte_t entry; int anon = 0; + int charged = 0; struct page *dirty_page = NULL; struct vm_fault vmf; int ret; @@ -2503,6 +2498,12 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, ret = VM_FAULT_OOM; goto out; } + if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { + ret = VM_FAULT_OOM; + page_cache_release(page); + goto out; + } + charged = 1; /* * Don't let another task, with possibly unlocked vma, * keep the mlocked page. @@ -2543,11 +2544,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, } - if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { - ret = VM_FAULT_OOM; - goto out; - } - page_table = pte_offset_map_lock(mm, pmd, address, &ptl); /* @@ -2585,7 +2581,8 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, /* no need to invalidate: a not-present page won't be cached */ update_mmu_cache(vma, address, entry); } else { - mem_cgroup_uncharge_page(page); + if (charged) + mem_cgroup_uncharge_page(page); if (anon) page_cache_release(page); else diff --git a/mm/rmap.c b/mm/rmap.c index 7e90bebbeb6cfe630fa1a1aeb5498bf1e6cee11c..8701d5fce7327d5af01aba202b2bab2f8eabfc73 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -727,8 +727,8 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma) page_clear_dirty(page); set_page_dirty(page); } - - mem_cgroup_uncharge_page(page); + if (PageAnon(page)) + mem_cgroup_uncharge_page(page); __dec_zone_page_state(page, PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); /*