diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 47928399e38a58631d41d2d71c87bb4cec110a7c..3e16fe08150ec6f3bb7a94e7bc6ec12a2e5193a7 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -655,8 +655,10 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) #else reserve_bootmem(phys, len); #endif - if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) + if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { dma_reserve += len / PAGE_SIZE; + set_dma_reserve(dma_reserve); + } } int kern_addr_valid(unsigned long addr) diff --git a/include/linux/mm.h b/include/linux/mm.h index c0402da7cce0b5e454cdb4315653362cf62fd998..22936e1fcdf27464acd45454e7ff2db8004fad59 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -984,6 +984,7 @@ extern void sparse_memory_present_with_active_regions(int nid); extern int early_pfn_to_nid(unsigned long pfn); #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ +extern void set_dma_reserve(unsigned long new_dma_reserve); extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long); extern void setup_per_zone_pages_min(void); extern void mem_init(void); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 26c9939857faa271370adff3543181dc07b3284f..8d9a1eb9fbba9dddbfa3bbb71628a1557bf3158e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -104,6 +104,7 @@ int min_free_kbytes = 1024; unsigned long __meminitdata nr_kernel_pages; unsigned long __meminitdata nr_all_pages; +static unsigned long __initdata dma_reserve; #ifdef CONFIG_ARCH_POPULATES_NODE_MAP /* @@ -2213,6 +2214,20 @@ unsigned long __init zone_absent_pages_in_node(int nid, arch_zone_lowest_possible_pfn[zone_type], arch_zone_highest_possible_pfn[zone_type]); } + +/* Return the zone index a PFN is in */ +int memmap_zone_idx(struct page *lmem_map) +{ + int i; + unsigned long phys_addr = virt_to_phys(lmem_map); + unsigned long pfn = phys_addr >> PAGE_SHIFT; + + for (i = 0; i < MAX_NR_ZONES; i++) + if (pfn < arch_zone_highest_possible_pfn[i]) + break; + + return i; +} #else static inline unsigned long zone_spanned_pages_in_node(int nid, unsigned long zone_type, @@ -2230,6 +2245,11 @@ static inline unsigned long zone_absent_pages_in_node(int nid, return zholes_size[zone_type]; } + +static inline int memmap_zone_idx(struct page *lmem_map) +{ + return MAX_NR_ZONES; +} #endif static void __init calculate_node_totalpages(struct pglist_data *pgdat, @@ -2274,12 +2294,35 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; - unsigned long size, realsize; + unsigned long size, realsize, memmap_pages; size = zone_spanned_pages_in_node(nid, j, zones_size); realsize = size - zone_absent_pages_in_node(nid, j, zholes_size); + /* + * Adjust realsize so that it accounts for how much memory + * is used by this zone for memmap. This affects the watermark + * and per-cpu initialisations + */ + memmap_pages = (size * sizeof(struct page)) >> PAGE_SHIFT; + if (realsize >= memmap_pages) { + realsize -= memmap_pages; + printk(KERN_DEBUG + " %s zone: %lu pages used for memmap\n", + zone_names[j], memmap_pages); + } else + printk(KERN_WARNING + " %s zone: %lu pages exceeds realsize %lu\n", + zone_names[j], memmap_pages, realsize); + + /* Account for reserved DMA pages */ + if (j == ZONE_DMA && realsize > dma_reserve) { + realsize -= dma_reserve; + printk(KERN_DEBUG " DMA zone: %lu pages reserved\n", + dma_reserve); + } + if (!is_highmem_idx(j)) nr_kernel_pages += realsize; nr_all_pages += realsize; @@ -2596,6 +2639,21 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) } #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ +/** + * set_dma_reserve - Account the specified number of pages reserved in ZONE_DMA + * @new_dma_reserve - The number of pages to mark reserved + * + * The per-cpu batchsize and zone watermarks are determined by present_pages. + * In the DMA zone, a significant percentage may be consumed by kernel image + * and other unfreeable allocations which can skew the watermarks badly. This + * function may optionally be used to account for unfreeable pages in + * ZONE_DMA. The effect will be lower watermarks and smaller per-cpu batchsize + */ +void __init set_dma_reserve(unsigned long new_dma_reserve) +{ + dma_reserve = new_dma_reserve; +} + #ifndef CONFIG_NEED_MULTIPLE_NODES static bootmem_data_t contig_bootmem_data; struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data };