提交 4ae7c039 编写于 作者: C Christoph Lameter 提交者: Linus Torvalds

[PATCH] Periodically drain non local pagesets

The pageset array can potentially acquire a huge amount of memory on large
NUMA systems.  F.e.  on a system with 512 processors and 256 nodes there
will be 256*512 pagesets.  If each pageset only holds 5 pages then we are
talking about 655360 pages.With a 16K page size on IA64 this results in
potentially 10 Gigabytes of memory being trapped in pagesets.  The typical
cases are much less for smaller systems but there is still the potential of
memory being trapped in off node pagesets.  Off node memory may be rarely
used if local memory is available and so we may potentially have memory in
seldom used pagesets without this patch.

The slab allocator flushes its per cpu caches every 2 seconds.  The
following patch flushes the off node pageset caches in the same way by
tying into the slab flush.

The patch also changes /proc/zoneinfo to include the number of pages
currently in each pageset.
Signed-off-by: NChristoph Lameter <clameter@sgi.com>
Signed-off-by: NAndrew Morton <akpm@osdl.org>
Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
上级 578c2fd6
...@@ -133,5 +133,10 @@ extern void FASTCALL(free_cold_page(struct page *page)); ...@@ -133,5 +133,10 @@ extern void FASTCALL(free_cold_page(struct page *page));
#define free_page(addr) free_pages((addr),0) #define free_page(addr) free_pages((addr),0)
void page_alloc_init(void); void page_alloc_init(void);
#ifdef CONFIG_NUMA
void drain_remote_pages(void);
#else
static inline void drain_remote_pages(void) { };
#endif
#endif /* __LINUX_GFP_H */ #endif /* __LINUX_GFP_H */
...@@ -516,6 +516,36 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, ...@@ -516,6 +516,36 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
return allocated; return allocated;
} }
#ifdef CONFIG_NUMA
/* Called from the slab reaper to drain remote pagesets */
void drain_remote_pages(void)
{
struct zone *zone;
int i;
unsigned long flags;
local_irq_save(flags);
for_each_zone(zone) {
struct per_cpu_pageset *pset;
/* Do not drain local pagesets */
if (zone->zone_pgdat->node_id == numa_node_id())
continue;
pset = zone->pageset[smp_processor_id()];
for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
struct per_cpu_pages *pcp;
pcp = &pset->pcp[i];
if (pcp->count)
pcp->count -= free_pages_bulk(zone, pcp->count,
&pcp->list, 0);
}
}
local_irq_restore(flags);
}
#endif
#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU) #if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU)
static void __drain_pages(unsigned int cpu) static void __drain_pages(unsigned int cpu)
{ {
...@@ -1271,12 +1301,13 @@ void show_free_areas(void) ...@@ -1271,12 +1301,13 @@ void show_free_areas(void)
pageset = zone_pcp(zone, cpu); pageset = zone_pcp(zone, cpu);
for (temperature = 0; temperature < 2; temperature++) for (temperature = 0; temperature < 2; temperature++)
printk("cpu %d %s: low %d, high %d, batch %d\n", printk("cpu %d %s: low %d, high %d, batch %d used:%d\n",
cpu, cpu,
temperature ? "cold" : "hot", temperature ? "cold" : "hot",
pageset->pcp[temperature].low, pageset->pcp[temperature].low,
pageset->pcp[temperature].high, pageset->pcp[temperature].high,
pageset->pcp[temperature].batch); pageset->pcp[temperature].batch,
pageset->pcp[temperature].count);
} }
} }
......
...@@ -2851,6 +2851,7 @@ static void cache_reap(void *unused) ...@@ -2851,6 +2851,7 @@ static void cache_reap(void *unused)
} }
check_irq_on(); check_irq_on();
up(&cache_chain_sem); up(&cache_chain_sem);
drain_remote_pages();
/* Setup the next iteration */ /* Setup the next iteration */
schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC + smp_processor_id()); schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC + smp_processor_id());
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册