提交 9442ec9d 编写于 作者: H Hugh Dickins 提交者: Linus Torvalds

memcg: bad page if page_cgroup when free

Replace free_hot_cold_page's VM_BUG_ON(page_get_page_cgroup(page)) by a "Bad
page state" and clear: most users don't have CONFIG_DEBUG_VM on, and if it
were set here, it'd likely cause corruption when the page is reused.

Don't use page_assign_page_cgroup to clear it: that should be private to
memcontrol.c, and always called with the lock taken; and memmap_init_zone
doesn't need it either - like page->mapping and other pointers throughout the
kernel, Linux assumes pointers in zeroed structures are NULL pointers.

Instead use page_reset_bad_cgroup, added to memcontrol.h for this only.
Signed-off-by: NHugh Dickins <hugh@veritas.com>
Cc: David Rientjes <rientjes@google.com>
Acked-by: NBalbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: NKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hirokazu Takahashi <taka@valinux.co.jp>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 98837c7f
...@@ -29,8 +29,9 @@ struct mm_struct; ...@@ -29,8 +29,9 @@ struct mm_struct;
extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p); extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p);
extern void mm_free_cgroup(struct mm_struct *mm); extern void mm_free_cgroup(struct mm_struct *mm);
extern void page_assign_page_cgroup(struct page *page,
struct page_cgroup *pc); #define page_reset_bad_cgroup(page) ((page)->page_cgroup = 0)
extern struct page_cgroup *page_get_page_cgroup(struct page *page); extern struct page_cgroup *page_get_page_cgroup(struct page *page);
extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask); gfp_t gfp_mask);
...@@ -82,8 +83,7 @@ static inline void mm_free_cgroup(struct mm_struct *mm) ...@@ -82,8 +83,7 @@ static inline void mm_free_cgroup(struct mm_struct *mm)
{ {
} }
static inline void page_assign_page_cgroup(struct page *page, static inline void page_reset_bad_cgroup(struct page *page)
struct page_cgroup *pc)
{ {
} }
......
...@@ -140,11 +140,17 @@ struct mem_cgroup { ...@@ -140,11 +140,17 @@ struct mem_cgroup {
/* /*
* We use the lower bit of the page->page_cgroup pointer as a bit spin * We use the lower bit of the page->page_cgroup pointer as a bit spin
* lock. We need to ensure that page->page_cgroup is atleast two * lock. We need to ensure that page->page_cgroup is at least two
* byte aligned (based on comments from Nick Piggin) * byte aligned (based on comments from Nick Piggin). But since
* bit_spin_lock doesn't actually set that lock bit in a non-debug
* uniprocessor kernel, we should avoid setting it here too.
*/ */
#define PAGE_CGROUP_LOCK_BIT 0x0 #define PAGE_CGROUP_LOCK_BIT 0x0
#define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
#define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT)
#else
#define PAGE_CGROUP_LOCK 0x0
#endif
/* /*
* A page_cgroup page is associated with every page descriptor. The * A page_cgroup page is associated with every page descriptor. The
...@@ -271,19 +277,10 @@ static inline int page_cgroup_locked(struct page *page) ...@@ -271,19 +277,10 @@ static inline int page_cgroup_locked(struct page *page)
&page->page_cgroup); &page->page_cgroup);
} }
void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc)
{ {
int locked; VM_BUG_ON(!page_cgroup_locked(page));
page->page_cgroup = ((unsigned long)pc | PAGE_CGROUP_LOCK);
/*
* While resetting the page_cgroup we might not hold the
* page_cgroup lock. free_hot_cold_page() is an example
* of such a scenario
*/
if (pc)
VM_BUG_ON(!page_cgroup_locked(page));
locked = (page->page_cgroup & PAGE_CGROUP_LOCK);
page->page_cgroup = ((unsigned long)pc | locked);
} }
struct page_cgroup *page_get_page_cgroup(struct page *page) struct page_cgroup *page_get_page_cgroup(struct page *page)
......
...@@ -222,13 +222,19 @@ static inline int bad_range(struct zone *zone, struct page *page) ...@@ -222,13 +222,19 @@ static inline int bad_range(struct zone *zone, struct page *page)
static void bad_page(struct page *page) static void bad_page(struct page *page)
{ {
printk(KERN_EMERG "Bad page state in process '%s'\n" void *pc = page_get_page_cgroup(page);
KERN_EMERG "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n"
KERN_EMERG "Trying to fix it up, but a reboot is needed\n" printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG
KERN_EMERG "Backtrace:\n", "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
current->comm, page, (int)(2*sizeof(unsigned long)), current->comm, page, (int)(2*sizeof(unsigned long)),
(unsigned long)page->flags, page->mapping, (unsigned long)page->flags, page->mapping,
page_mapcount(page), page_count(page)); page_mapcount(page), page_count(page));
if (pc) {
printk(KERN_EMERG "cgroup:%p\n", pc);
page_reset_bad_cgroup(page);
}
printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
KERN_EMERG "Backtrace:\n");
dump_stack(); dump_stack();
page->flags &= ~(1 << PG_lru | page->flags &= ~(1 << PG_lru |
1 << PG_private | 1 << PG_private |
...@@ -454,6 +460,7 @@ static inline int free_pages_check(struct page *page) ...@@ -454,6 +460,7 @@ static inline int free_pages_check(struct page *page)
{ {
if (unlikely(page_mapcount(page) | if (unlikely(page_mapcount(page) |
(page->mapping != NULL) | (page->mapping != NULL) |
(page_get_page_cgroup(page) != NULL) |
(page_count(page) != 0) | (page_count(page) != 0) |
(page->flags & ( (page->flags & (
1 << PG_lru | 1 << PG_lru |
...@@ -603,6 +610,7 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) ...@@ -603,6 +610,7 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
{ {
if (unlikely(page_mapcount(page) | if (unlikely(page_mapcount(page) |
(page->mapping != NULL) | (page->mapping != NULL) |
(page_get_page_cgroup(page) != NULL) |
(page_count(page) != 0) | (page_count(page) != 0) |
(page->flags & ( (page->flags & (
1 << PG_lru | 1 << PG_lru |
...@@ -989,7 +997,6 @@ static void free_hot_cold_page(struct page *page, int cold) ...@@ -989,7 +997,6 @@ static void free_hot_cold_page(struct page *page, int cold)
if (!PageHighMem(page)) if (!PageHighMem(page))
debug_check_no_locks_freed(page_address(page), PAGE_SIZE); debug_check_no_locks_freed(page_address(page), PAGE_SIZE);
VM_BUG_ON(page_get_page_cgroup(page));
arch_free_page(page, 0); arch_free_page(page, 0);
kernel_map_pages(page, 1, 0); kernel_map_pages(page, 1, 0);
...@@ -2528,7 +2535,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, ...@@ -2528,7 +2535,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
set_page_links(page, zone, nid, pfn); set_page_links(page, zone, nid, pfn);
init_page_count(page); init_page_count(page);
reset_page_mapcount(page); reset_page_mapcount(page);
page_assign_page_cgroup(page, NULL);
SetPageReserved(page); SetPageReserved(page);
/* /*
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
反馈
建议
客服 返回
顶部