提交 e1a1cd59 编写于 作者: B Balbir Singh 提交者: Linus Torvalds

Memory controller: make charging gfp mask aware

Nick Piggin pointed out that swap cache and page cache addition routines
could be called from non GFP_KERNEL contexts.  This patch makes the
charging routine aware of the gfp context.  Charging might fail if the
cgroup is over it's limit, in which case a suitable error is returned.

This patch was tested on a Powerpc box.  I am still looking at being able
to test the path, through which allocations happen in non GFP_KERNEL
contexts.

[kamezawa.hiroyu@jp.fujitsu.com: problem with ZONE_MOVABLE]
Signed-off-by: NBalbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: NKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 bed7161a
...@@ -32,7 +32,8 @@ extern void mm_free_cgroup(struct mm_struct *mm); ...@@ -32,7 +32,8 @@ extern void mm_free_cgroup(struct mm_struct *mm);
extern void page_assign_page_cgroup(struct page *page, extern void page_assign_page_cgroup(struct page *page,
struct page_cgroup *pc); struct page_cgroup *pc);
extern struct page_cgroup *page_get_page_cgroup(struct page *page); extern struct page_cgroup *page_get_page_cgroup(struct page *page);
extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm); extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask);
extern void mem_cgroup_uncharge(struct page_cgroup *pc); extern void mem_cgroup_uncharge(struct page_cgroup *pc);
extern void mem_cgroup_move_lists(struct page_cgroup *pc, bool active); extern void mem_cgroup_move_lists(struct page_cgroup *pc, bool active);
extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
...@@ -42,7 +43,8 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, ...@@ -42,7 +43,8 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
struct mem_cgroup *mem_cont, struct mem_cgroup *mem_cont,
int active); int active);
extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm); extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask);
extern struct mem_cgroup *mm_cgroup(struct mm_struct *mm); extern struct mem_cgroup *mm_cgroup(struct mm_struct *mm);
static inline void mem_cgroup_uncharge_page(struct page *page) static inline void mem_cgroup_uncharge_page(struct page *page)
...@@ -70,7 +72,8 @@ static inline struct page_cgroup *page_get_page_cgroup(struct page *page) ...@@ -70,7 +72,8 @@ static inline struct page_cgroup *page_get_page_cgroup(struct page *page)
return NULL; return NULL;
} }
static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm) static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask)
{ {
return 0; return 0;
} }
...@@ -89,7 +92,8 @@ static inline void mem_cgroup_move_lists(struct page_cgroup *pc, ...@@ -89,7 +92,8 @@ static inline void mem_cgroup_move_lists(struct page_cgroup *pc,
} }
static inline int mem_cgroup_cache_charge(struct page *page, static inline int mem_cgroup_cache_charge(struct page *page,
struct mm_struct *mm) struct mm_struct *mm,
gfp_t gfp_mask)
{ {
return 0; return 0;
} }
......
...@@ -183,7 +183,8 @@ extern void swap_setup(void); ...@@ -183,7 +183,8 @@ extern void swap_setup(void);
/* linux/mm/vmscan.c */ /* linux/mm/vmscan.c */
extern unsigned long try_to_free_pages(struct zone **zones, int order, extern unsigned long try_to_free_pages(struct zone **zones, int order,
gfp_t gfp_mask); gfp_t gfp_mask);
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
gfp_t gfp_mask);
extern int __isolate_lru_page(struct page *page, int mode); extern int __isolate_lru_page(struct page *page, int mode);
extern unsigned long shrink_all_memory(unsigned long nr_pages); extern unsigned long shrink_all_memory(unsigned long nr_pages);
extern int vm_swappiness; extern int vm_swappiness;
......
...@@ -464,7 +464,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping, ...@@ -464,7 +464,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
if (error == 0) { if (error == 0) {
error = mem_cgroup_cache_charge(page, current->mm); error = mem_cgroup_cache_charge(page, current->mm, gfp_mask);
if (error) if (error)
goto out; goto out;
......
...@@ -261,7 +261,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, ...@@ -261,7 +261,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
* 0 if the charge was successful * 0 if the charge was successful
* < 0 if the cgroup is over its limit * < 0 if the cgroup is over its limit
*/ */
int mem_cgroup_charge(struct page *page, struct mm_struct *mm) int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask)
{ {
struct mem_cgroup *mem; struct mem_cgroup *mem;
struct page_cgroup *pc, *race_pc; struct page_cgroup *pc, *race_pc;
...@@ -293,7 +294,7 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm) ...@@ -293,7 +294,7 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
unlock_page_cgroup(page); unlock_page_cgroup(page);
pc = kzalloc(sizeof(struct page_cgroup), GFP_KERNEL); pc = kzalloc(sizeof(struct page_cgroup), gfp_mask);
if (pc == NULL) if (pc == NULL)
goto err; goto err;
...@@ -320,7 +321,14 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm) ...@@ -320,7 +321,14 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
* the cgroup limit. * the cgroup limit.
*/ */
while (res_counter_charge(&mem->res, PAGE_SIZE)) { while (res_counter_charge(&mem->res, PAGE_SIZE)) {
if (try_to_free_mem_cgroup_pages(mem)) bool is_atomic = gfp_mask & GFP_ATOMIC;
/*
* We cannot reclaim under GFP_ATOMIC, fail the charge
*/
if (is_atomic)
goto noreclaim;
if (try_to_free_mem_cgroup_pages(mem, gfp_mask))
continue; continue;
/* /*
...@@ -344,8 +352,9 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm) ...@@ -344,8 +352,9 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
congestion_wait(WRITE, HZ/10); congestion_wait(WRITE, HZ/10);
continue; continue;
} }
noreclaim:
css_put(&mem->css); css_put(&mem->css);
if (!is_atomic)
mem_cgroup_out_of_memory(mem, GFP_KERNEL); mem_cgroup_out_of_memory(mem, GFP_KERNEL);
goto free_pc; goto free_pc;
} }
...@@ -385,7 +394,8 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm) ...@@ -385,7 +394,8 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
/* /*
* See if the cached pages should be charged at all? * See if the cached pages should be charged at all?
*/ */
int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm) int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask)
{ {
struct mem_cgroup *mem; struct mem_cgroup *mem;
if (!mm) if (!mm)
...@@ -393,7 +403,7 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm) ...@@ -393,7 +403,7 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm)
mem = rcu_dereference(mm->mem_cgroup); mem = rcu_dereference(mm->mem_cgroup);
if (mem->control_type == MEM_CGROUP_TYPE_ALL) if (mem->control_type == MEM_CGROUP_TYPE_ALL)
return mem_cgroup_charge(page, mm); return mem_cgroup_charge(page, mm, gfp_mask);
else else
return 0; return 0;
} }
......
...@@ -1147,7 +1147,7 @@ static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *pa ...@@ -1147,7 +1147,7 @@ static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *pa
pte_t *pte; pte_t *pte;
spinlock_t *ptl; spinlock_t *ptl;
retval = mem_cgroup_charge(page, mm); retval = mem_cgroup_charge(page, mm, GFP_KERNEL);
if (retval) if (retval)
goto out; goto out;
...@@ -1650,7 +1650,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1650,7 +1650,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
cow_user_page(new_page, old_page, address, vma); cow_user_page(new_page, old_page, address, vma);
__SetPageUptodate(new_page); __SetPageUptodate(new_page);
if (mem_cgroup_charge(new_page, mm)) if (mem_cgroup_charge(new_page, mm, GFP_KERNEL))
goto oom_free_new; goto oom_free_new;
/* /*
...@@ -2052,7 +2052,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2052,7 +2052,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
count_vm_event(PGMAJFAULT); count_vm_event(PGMAJFAULT);
} }
if (mem_cgroup_charge(page, mm)) { if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
delayacct_clear_flag(DELAYACCT_PF_SWAPIN); delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
ret = VM_FAULT_OOM; ret = VM_FAULT_OOM;
goto out; goto out;
...@@ -2139,7 +2139,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2139,7 +2139,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto oom; goto oom;
__SetPageUptodate(page); __SetPageUptodate(page);
if (mem_cgroup_charge(page, mm)) if (mem_cgroup_charge(page, mm, GFP_KERNEL))
goto oom_free_page; goto oom_free_page;
entry = mk_pte(page, vma->vm_page_prot); entry = mk_pte(page, vma->vm_page_prot);
...@@ -2277,7 +2277,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2277,7 +2277,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
} }
if (mem_cgroup_charge(page, mm)) { if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
ret = VM_FAULT_OOM; ret = VM_FAULT_OOM;
goto out; goto out;
} }
......
...@@ -153,7 +153,7 @@ static void remove_migration_pte(struct vm_area_struct *vma, ...@@ -153,7 +153,7 @@ static void remove_migration_pte(struct vm_area_struct *vma,
return; return;
} }
if (mem_cgroup_charge(new, mm)) { if (mem_cgroup_charge(new, mm, GFP_KERNEL)) {
pte_unmap(ptep); pte_unmap(ptep);
return; return;
} }
......
...@@ -78,7 +78,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) ...@@ -78,7 +78,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
error = radix_tree_preload(gfp_mask); error = radix_tree_preload(gfp_mask);
if (!error) { if (!error) {
error = mem_cgroup_cache_charge(page, current->mm); error = mem_cgroup_cache_charge(page, current->mm, gfp_mask);
if (error) if (error)
goto out; goto out;
......
...@@ -510,7 +510,7 @@ unsigned int count_swap_pages(int type, int free) ...@@ -510,7 +510,7 @@ unsigned int count_swap_pages(int type, int free)
static int unuse_pte(struct vm_area_struct *vma, pte_t *pte, static int unuse_pte(struct vm_area_struct *vma, pte_t *pte,
unsigned long addr, swp_entry_t entry, struct page *page) unsigned long addr, swp_entry_t entry, struct page *page)
{ {
if (mem_cgroup_charge(page, vma->vm_mm)) if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL))
return -ENOMEM; return -ENOMEM;
inc_mm_counter(vma->vm_mm, anon_rss); inc_mm_counter(vma->vm_mm, anon_rss);
......
...@@ -1337,16 +1337,11 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask) ...@@ -1337,16 +1337,11 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
#ifdef CONFIG_CGROUP_MEM_CONT #ifdef CONFIG_CGROUP_MEM_CONT
#ifdef CONFIG_HIGHMEM unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
#define ZONE_USERPAGES ZONE_HIGHMEM gfp_t gfp_mask)
#else
#define ZONE_USERPAGES ZONE_NORMAL
#endif
unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont)
{ {
struct scan_control sc = { struct scan_control sc = {
.gfp_mask = GFP_KERNEL, .gfp_mask = gfp_mask,
.may_writepage = !laptop_mode, .may_writepage = !laptop_mode,
.may_swap = 1, .may_swap = 1,
.swap_cluster_max = SWAP_CLUSTER_MAX, .swap_cluster_max = SWAP_CLUSTER_MAX,
...@@ -1357,9 +1352,10 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont) ...@@ -1357,9 +1352,10 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont)
}; };
int node; int node;
struct zone **zones; struct zone **zones;
int target_zone = gfp_zone(GFP_HIGHUSER_MOVABLE);
for_each_online_node(node) { for_each_online_node(node) {
zones = NODE_DATA(node)->node_zonelists[ZONE_USERPAGES].zones; zones = NODE_DATA(node)->node_zonelists[target_zone].zones;
if (do_try_to_free_pages(zones, sc.gfp_mask, &sc)) if (do_try_to_free_pages(zones, sc.gfp_mask, &sc))
return 1; return 1;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册