提交 52383431 编写于 作者: V Vladimir Davydov 提交者: Linus Torvalds

mm: get rid of __GFP_KMEMCG

Currently to allocate a page that should be charged to kmemcg (e.g.
threadinfo), we pass __GFP_KMEMCG flag to the page allocator.  The page
allocated is then to be freed by free_memcg_kmem_pages.  Apart from
looking asymmetrical, this also requires intrusion to the general
allocation path.  So let's introduce separate functions that will
alloc/free pages charged to kmemcg.

The new functions are called alloc_kmem_pages and free_kmem_pages.  They
should be used when the caller actually would like to use kmalloc, but
has to fall back to the page allocator for the allocation is large.
They only differ from alloc_pages and free_pages in that besides
allocating or freeing pages they also charge them to the kmem resource
counter of the current memory cgroup.

[sfr@canb.auug.org.au: export kmalloc_order() to modules]
Signed-off-by: NVladimir Davydov <vdavydov@parallels.com>
Acked-by: NGreg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: NMichal Hocko <mhocko@suse.cz>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: NStephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 5dfb4175
...@@ -31,7 +31,6 @@ struct vm_area_struct; ...@@ -31,7 +31,6 @@ struct vm_area_struct;
#define ___GFP_HARDWALL 0x20000u #define ___GFP_HARDWALL 0x20000u
#define ___GFP_THISNODE 0x40000u #define ___GFP_THISNODE 0x40000u
#define ___GFP_RECLAIMABLE 0x80000u #define ___GFP_RECLAIMABLE 0x80000u
#define ___GFP_KMEMCG 0x100000u
#define ___GFP_NOTRACK 0x200000u #define ___GFP_NOTRACK 0x200000u
#define ___GFP_NO_KSWAPD 0x400000u #define ___GFP_NO_KSWAPD 0x400000u
#define ___GFP_OTHER_NODE 0x800000u #define ___GFP_OTHER_NODE 0x800000u
...@@ -91,7 +90,6 @@ struct vm_area_struct; ...@@ -91,7 +90,6 @@ struct vm_area_struct;
#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD) #define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */ #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
/* /*
...@@ -353,6 +351,10 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, ...@@ -353,6 +351,10 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
#define alloc_page_vma_node(gfp_mask, vma, addr, node) \ #define alloc_page_vma_node(gfp_mask, vma, addr, node) \
alloc_pages_vma(gfp_mask, 0, vma, addr, node) alloc_pages_vma(gfp_mask, 0, vma, addr, node)
extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
unsigned int order);
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask); extern unsigned long get_zeroed_page(gfp_t gfp_mask);
...@@ -372,8 +374,8 @@ extern void free_pages(unsigned long addr, unsigned int order); ...@@ -372,8 +374,8 @@ extern void free_pages(unsigned long addr, unsigned int order);
extern void free_hot_cold_page(struct page *page, int cold); extern void free_hot_cold_page(struct page *page, int cold);
extern void free_hot_cold_page_list(struct list_head *list, int cold); extern void free_hot_cold_page_list(struct list_head *list, int cold);
extern void __free_memcg_kmem_pages(struct page *page, unsigned int order); extern void __free_kmem_pages(struct page *page, unsigned int order);
extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order); extern void free_kmem_pages(unsigned long addr, unsigned int order);
#define __free_page(page) __free_pages((page), 0) #define __free_page(page) __free_pages((page), 0)
#define free_page(addr) free_pages((addr), 0) #define free_page(addr) free_pages((addr), 0)
......
...@@ -537,7 +537,7 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order) ...@@ -537,7 +537,7 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
* res_counter_charge_nofail, but we hope those allocations are rare, * res_counter_charge_nofail, but we hope those allocations are rare,
* and won't be worth the trouble. * and won't be worth the trouble.
*/ */
if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL)) if (gfp & __GFP_NOFAIL)
return true; return true;
if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD)) if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
return true; return true;
......
...@@ -369,16 +369,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, ...@@ -369,16 +369,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
#include <linux/slub_def.h> #include <linux/slub_def.h>
#endif #endif
static __always_inline void * extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order);
kmalloc_order(size_t size, gfp_t flags, unsigned int order)
{
void *ret;
flags |= (__GFP_COMP | __GFP_KMEMCG);
ret = (void *) __get_free_pages(flags, order);
kmemleak_alloc(ret, size, 1, flags);
return ret;
}
#ifdef CONFIG_TRACING #ifdef CONFIG_TRACING
extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order); extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order);
......
...@@ -61,8 +61,6 @@ extern long do_no_restart_syscall(struct restart_block *parm); ...@@ -61,8 +61,6 @@ extern long do_no_restart_syscall(struct restart_block *parm);
# define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK) # define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK)
#endif #endif
#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG)
/* /*
* flag set/clear/test wrappers * flag set/clear/test wrappers
* - pass TIF_xxxx constants to these functions * - pass TIF_xxxx constants to these functions
......
...@@ -34,7 +34,6 @@ ...@@ -34,7 +34,6 @@
{(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \ {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \
{(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \ {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \
{(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
{(unsigned long)__GFP_KMEMCG, "GFP_KMEMCG"}, \
{(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \ {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \
{(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \ {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \
{(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \ {(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \
......
...@@ -150,15 +150,15 @@ void __weak arch_release_thread_info(struct thread_info *ti) ...@@ -150,15 +150,15 @@ void __weak arch_release_thread_info(struct thread_info *ti)
static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
int node) int node)
{ {
struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED, struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP,
THREAD_SIZE_ORDER); THREAD_SIZE_ORDER);
return page ? page_address(page) : NULL; return page ? page_address(page) : NULL;
} }
static inline void free_thread_info(struct thread_info *ti) static inline void free_thread_info(struct thread_info *ti)
{ {
free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER); free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
} }
# else # else
static struct kmem_cache *thread_info_cache; static struct kmem_cache *thread_info_cache;
......
...@@ -3540,11 +3540,12 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order) ...@@ -3540,11 +3540,12 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
/* /*
* Disabling accounting is only relevant for some specific memcg * Disabling accounting is only relevant for some specific memcg
* internal allocations. Therefore we would initially not have such * internal allocations. Therefore we would initially not have such
* check here, since direct calls to the page allocator that are marked * check here, since direct calls to the page allocator that are
* with GFP_KMEMCG only happen outside memcg core. We are mostly * accounted to kmemcg (alloc_kmem_pages and friends) only happen
* concerned with cache allocations, and by having this test at * outside memcg core. We are mostly concerned with cache allocations,
* memcg_kmem_get_cache, we are already able to relay the allocation to * and by having this test at memcg_kmem_get_cache, we are already able
* the root cache and bypass the memcg cache altogether. * to relay the allocation to the root cache and bypass the memcg cache
* altogether.
* *
* There is one exception, though: the SLUB allocator does not create * There is one exception, though: the SLUB allocator does not create
* large order caches, but rather service large kmallocs directly from * large order caches, but rather service large kmallocs directly from
......
...@@ -2697,7 +2697,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, ...@@ -2697,7 +2697,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
int migratetype = allocflags_to_migratetype(gfp_mask); int migratetype = allocflags_to_migratetype(gfp_mask);
unsigned int cpuset_mems_cookie; unsigned int cpuset_mems_cookie;
int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR; int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
struct mem_cgroup *memcg = NULL;
gfp_mask &= gfp_allowed_mask; gfp_mask &= gfp_allowed_mask;
...@@ -2716,13 +2715,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, ...@@ -2716,13 +2715,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
if (unlikely(!zonelist->_zonerefs->zone)) if (unlikely(!zonelist->_zonerefs->zone))
return NULL; return NULL;
/*
* Will only have any effect when __GFP_KMEMCG is set. This is
* verified in the (always inline) callee
*/
if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
return NULL;
retry_cpuset: retry_cpuset:
cpuset_mems_cookie = read_mems_allowed_begin(); cpuset_mems_cookie = read_mems_allowed_begin();
...@@ -2782,8 +2774,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, ...@@ -2782,8 +2774,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
goto retry_cpuset; goto retry_cpuset;
memcg_kmem_commit_charge(page, memcg, order);
return page; return page;
} }
EXPORT_SYMBOL(__alloc_pages_nodemask); EXPORT_SYMBOL(__alloc_pages_nodemask);
...@@ -2837,27 +2827,51 @@ void free_pages(unsigned long addr, unsigned int order) ...@@ -2837,27 +2827,51 @@ void free_pages(unsigned long addr, unsigned int order)
EXPORT_SYMBOL(free_pages); EXPORT_SYMBOL(free_pages);
/* /*
* __free_memcg_kmem_pages and free_memcg_kmem_pages will free * alloc_kmem_pages charges newly allocated pages to the kmem resource counter
* pages allocated with __GFP_KMEMCG. * of the current memory cgroup.
* *
* Those pages are accounted to a particular memcg, embedded in the * It should be used when the caller would like to use kmalloc, but since the
* corresponding page_cgroup. To avoid adding a hit in the allocator to search * allocation is large, it has to fall back to the page allocator.
* for that information only to find out that it is NULL for users who have no */
* interest in that whatsoever, we provide these functions. struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
* {
* The caller knows better which flags it relies on. struct page *page;
struct mem_cgroup *memcg = NULL;
if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
return NULL;
page = alloc_pages(gfp_mask, order);
memcg_kmem_commit_charge(page, memcg, order);
return page;
}
struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
{
struct page *page;
struct mem_cgroup *memcg = NULL;
if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
return NULL;
page = alloc_pages_node(nid, gfp_mask, order);
memcg_kmem_commit_charge(page, memcg, order);
return page;
}
/*
* __free_kmem_pages and free_kmem_pages will free pages allocated with
* alloc_kmem_pages.
*/ */
void __free_memcg_kmem_pages(struct page *page, unsigned int order) void __free_kmem_pages(struct page *page, unsigned int order)
{ {
memcg_kmem_uncharge_pages(page, order); memcg_kmem_uncharge_pages(page, order);
__free_pages(page, order); __free_pages(page, order);
} }
void free_memcg_kmem_pages(unsigned long addr, unsigned int order) void free_kmem_pages(unsigned long addr, unsigned int order)
{ {
if (addr != 0) { if (addr != 0) {
VM_BUG_ON(!virt_addr_valid((void *)addr)); VM_BUG_ON(!virt_addr_valid((void *)addr));
__free_memcg_kmem_pages(virt_to_page((void *)addr), order); __free_kmem_pages(virt_to_page((void *)addr), order);
} }
} }
......
...@@ -582,6 +582,19 @@ void __init create_kmalloc_caches(unsigned long flags) ...@@ -582,6 +582,19 @@ void __init create_kmalloc_caches(unsigned long flags)
} }
#endif /* !CONFIG_SLOB */ #endif /* !CONFIG_SLOB */
void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
{
void *ret;
struct page *page;
flags |= __GFP_COMP;
page = alloc_kmem_pages(flags, order);
ret = page ? page_address(page) : NULL;
kmemleak_alloc(ret, size, 1, flags);
return ret;
}
EXPORT_SYMBOL(kmalloc_order);
#ifdef CONFIG_TRACING #ifdef CONFIG_TRACING
void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
{ {
......
...@@ -3311,8 +3311,8 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node) ...@@ -3311,8 +3311,8 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
struct page *page; struct page *page;
void *ptr = NULL; void *ptr = NULL;
flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG; flags |= __GFP_COMP | __GFP_NOTRACK;
page = alloc_pages_node(node, flags, get_order(size)); page = alloc_kmem_pages_node(node, flags, get_order(size));
if (page) if (page)
ptr = page_address(page); ptr = page_address(page);
...@@ -3381,7 +3381,7 @@ void kfree(const void *x) ...@@ -3381,7 +3381,7 @@ void kfree(const void *x)
if (unlikely(!PageSlab(page))) { if (unlikely(!PageSlab(page))) {
BUG_ON(!PageCompound(page)); BUG_ON(!PageCompound(page));
kfree_hook(x); kfree_hook(x);
__free_memcg_kmem_pages(page, compound_order(page)); __free_kmem_pages(page, compound_order(page));
return; return;
} }
slab_free(page->slab_cache, page, object, _RET_IP_); slab_free(page->slab_cache, page, object, _RET_IP_);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册