diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1fbe14d39521d728dd877d263b278210bd9d3ed5..c592f315cd02e4755bf3281575fb136e7157db4c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -27,8 +27,17 @@ struct mm_struct; #ifdef CONFIG_CGROUP_MEM_RES_CTLR -extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, +extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); +extern int mem_cgroup_charge_migrate_fixup(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask); +/* for swap handling */ +extern int mem_cgroup_try_charge(struct mm_struct *mm, + gfp_t gfp_mask, struct mem_cgroup **ptr); +extern void mem_cgroup_commit_charge_swapin(struct page *page, + struct mem_cgroup *ptr); +extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr); + extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru); @@ -71,7 +80,9 @@ extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, #else /* CONFIG_CGROUP_MEM_RES_CTLR */ -static inline int mem_cgroup_charge(struct page *page, +struct mem_cgroup; + +static inline int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { return 0; @@ -83,6 +94,27 @@ static inline int mem_cgroup_cache_charge(struct page *page, return 0; } +static inline int mem_cgroup_charge_migrate_fixup(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask) +{ + return 0; +} + +static inline int mem_cgroup_try_charge(struct mm_struct *mm, + gfp_t gfp_mask, struct mem_cgroup **ptr) +{ + return 0; +} + +static inline void mem_cgroup_commit_charge_swapin(struct page *page, + struct mem_cgroup *ptr) +{ +} + +static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr) +{ +} + static inline void mem_cgroup_uncharge_page(struct page *page) { } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 51ee965455798e63af02d392e44c09be8436d60d..f568b19645517b77fb054b20fdbfc952ad7bd628 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -467,35 +467,31 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, return nr_taken; } -/* - * Charge the memory controller for page usage. - * Return - * 0 if the charge was successful - * < 0 if the cgroup is over its limit + +/** + * mem_cgroup_try_charge - get charge of PAGE_SIZE. + * @mm: an mm_struct which is charged against. (when *memcg is NULL) + * @gfp_mask: gfp_mask for reclaim. + * @memcg: a pointer to memory cgroup which is charged against. + * + * charge against memory cgroup pointed by *memcg. if *memcg == NULL, estimated + * memory cgroup from @mm is got and stored in *memcg. + * + * Returns 0 if success. -ENOMEM at failure. */ -static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask, enum charge_type ctype, - struct mem_cgroup *memcg) + +int mem_cgroup_try_charge(struct mm_struct *mm, + gfp_t gfp_mask, struct mem_cgroup **memcg) { struct mem_cgroup *mem; - struct page_cgroup *pc; - unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; - struct mem_cgroup_per_zone *mz; - unsigned long flags; - - pc = lookup_page_cgroup(page); - /* can happen at boot */ - if (unlikely(!pc)) - return 0; - prefetchw(pc); + int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; /* * We always charge the cgroup the mm_struct belongs to. * The mm_struct's mem_cgroup changes on task migration if the * thread group leader migrates. It's possible that mm is not * set, if so charge the init_mm (happens for pagecache usage). */ - - if (likely(!memcg)) { + if (likely(!*memcg)) { rcu_read_lock(); mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); if (unlikely(!mem)) { @@ -506,15 +502,17 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, * For every charge from the cgroup, increment reference count */ css_get(&mem->css); + *memcg = mem; rcu_read_unlock(); } else { - mem = memcg; - css_get(&memcg->css); + mem = *memcg; + css_get(&mem->css); } + while (unlikely(res_counter_charge(&mem->res, PAGE_SIZE))) { if (!(gfp_mask & __GFP_WAIT)) - goto out; + goto nomem; if (try_to_free_mem_cgroup_pages(mem, gfp_mask)) continue; @@ -531,18 +529,37 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, if (!nr_retries--) { mem_cgroup_out_of_memory(mem, gfp_mask); - goto out; + goto nomem; } } + return 0; +nomem: + css_put(&mem->css); + return -ENOMEM; +} + +/* + * commit a charge got by mem_cgroup_try_charge() and makes page_cgroup to be + * USED state. If already USED, uncharge and return. + */ + +static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, + struct page_cgroup *pc, + enum charge_type ctype) +{ + struct mem_cgroup_per_zone *mz; + unsigned long flags; + /* try_charge() can return NULL to *memcg, taking care of it. */ + if (!mem) + return; lock_page_cgroup(pc); if (unlikely(PageCgroupUsed(pc))) { unlock_page_cgroup(pc); res_counter_uncharge(&mem->res, PAGE_SIZE); css_put(&mem->css); - - goto done; + return; } pc->mem_cgroup = mem; /* @@ -557,15 +574,39 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, __mem_cgroup_add_list(mz, pc); spin_unlock_irqrestore(&mz->lru_lock, flags); unlock_page_cgroup(pc); +} -done: +/* + * Charge the memory controller for page usage. + * Return + * 0 if the charge was successful + * < 0 if the cgroup is over its limit + */ +static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask, enum charge_type ctype, + struct mem_cgroup *memcg) +{ + struct mem_cgroup *mem; + struct page_cgroup *pc; + int ret; + + pc = lookup_page_cgroup(page); + /* can happen at boot */ + if (unlikely(!pc)) + return 0; + prefetchw(pc); + + mem = memcg; + ret = mem_cgroup_try_charge(mm, gfp_mask, &mem); + if (ret) + return ret; + + __mem_cgroup_commit_charge(mem, pc, ctype); return 0; -out: - css_put(&mem->css); - return -ENOMEM; } -int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) +int mem_cgroup_newpage_charge(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask) { if (mem_cgroup_subsys.disabled) return 0; @@ -586,6 +627,34 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); } +/* + * same as mem_cgroup_newpage_charge(), now. + * But what we assume is different from newpage, and this is special case. + * treat this in special function. easy for maintenance. + */ + +int mem_cgroup_charge_migrate_fixup(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask) +{ + if (mem_cgroup_subsys.disabled) + return 0; + + if (PageCompound(page)) + return 0; + + if (page_mapped(page) || (page->mapping && !PageAnon(page))) + return 0; + + if (unlikely(!mm)) + mm = &init_mm; + + return mem_cgroup_charge_common(page, mm, gfp_mask, + MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); +} + + + + int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { @@ -628,6 +697,30 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); } + +void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) +{ + struct page_cgroup *pc; + + if (mem_cgroup_subsys.disabled) + return; + if (!ptr) + return; + pc = lookup_page_cgroup(page); + __mem_cgroup_commit_charge(ptr, pc, MEM_CGROUP_CHARGE_TYPE_MAPPED); +} + +void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) +{ + if (mem_cgroup_subsys.disabled) + return; + if (!mem) + return; + res_counter_uncharge(&mem->res, PAGE_SIZE); + css_put(&mem->css); +} + + /* * uncharge if !page_mapped(page) */ diff --git a/mm/memory.c b/mm/memory.c index 3f8fa06b963b281191ac2c1350af0b21b2dad7c5..7f210f1609905d770097969d68d3284997c89390 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2000,7 +2000,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, cow_user_page(new_page, old_page, address, vma); __SetPageUptodate(new_page); - if (mem_cgroup_charge(new_page, mm, GFP_KERNEL)) + if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) goto oom_free_new; /* @@ -2392,6 +2392,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page; swp_entry_t entry; pte_t pte; + struct mem_cgroup *ptr = NULL; int ret = 0; if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) @@ -2430,7 +2431,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, lock_page(page); delayacct_clear_flag(DELAYACCT_PF_SWAPIN); - if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { + if (mem_cgroup_try_charge(mm, GFP_KERNEL, &ptr) == -ENOMEM) { ret = VM_FAULT_OOM; unlock_page(page); goto out; @@ -2460,6 +2461,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, flush_icache_page(vma, page); set_pte_at(mm, address, page_table, pte); page_add_anon_rmap(page, vma, address); + mem_cgroup_commit_charge_swapin(page, ptr); swap_free(entry); if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) @@ -2480,7 +2482,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, out: return ret; out_nomap: - mem_cgroup_uncharge_page(page); + mem_cgroup_cancel_charge_swapin(ptr); pte_unmap_unlock(page_table, ptl); unlock_page(page); page_cache_release(page); @@ -2510,7 +2512,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, goto oom; __SetPageUptodate(page); - if (mem_cgroup_charge(page, mm, GFP_KERNEL)) + if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) goto oom_free_page; entry = mk_pte(page, vma->vm_page_prot); @@ -2601,7 +2603,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, ret = VM_FAULT_OOM; goto out; } - if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { + if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) { ret = VM_FAULT_OOM; page_cache_release(page); goto out; diff --git a/mm/migrate.c b/mm/migrate.c index 55373983c9c68410ebdb98883518e7f2b56728bd..246dcb973ae70347a866c4c632d129fa1c6efbfc 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -133,7 +133,7 @@ static void remove_migration_pte(struct vm_area_struct *vma, * be reliable, and this charge can actually fail: oh well, we don't * make the situation any worse by proceeding as if it had succeeded. */ - mem_cgroup_charge(new, mm, GFP_ATOMIC); + mem_cgroup_charge_migrate_fixup(new, mm, GFP_ATOMIC); get_page(new); pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); diff --git a/mm/swapfile.c b/mm/swapfile.c index eec5ca758a23b91c5707655dde0b191d7d4ada52..fb926efb51672e955204b72b0e5c1435660bcf15 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -690,17 +690,18 @@ unsigned int count_swap_pages(int type, int free) static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, swp_entry_t entry, struct page *page) { + struct mem_cgroup *ptr = NULL; spinlock_t *ptl; pte_t *pte; int ret = 1; - if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) + if (mem_cgroup_try_charge(vma->vm_mm, GFP_KERNEL, &ptr)) ret = -ENOMEM; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { if (ret > 0) - mem_cgroup_uncharge_page(page); + mem_cgroup_cancel_charge_swapin(ptr); ret = 0; goto out; } @@ -710,6 +711,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, set_pte_at(vma->vm_mm, addr, pte, pte_mkold(mk_pte(page, vma->vm_page_prot))); page_add_anon_rmap(page, vma, addr); + mem_cgroup_commit_charge_swapin(page, ptr); swap_free(entry); /* * Move the page to the active list so it is not