diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index aca9c65f8d08b8ce9b3ed38c92877cff44431d22..73e358612eaffa3aa9ef27026d542a382b3a2afd 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -11,6 +11,15 @@ struct pglist_data; struct mem_section; #ifdef CONFIG_MEMORY_HOTPLUG + +/* + * Magic number for free bootmem. + * The normal smallest mapcount is -1. Here is smaller value than it. + */ +#define SECTION_INFO 0xfffffffe +#define MIX_INFO 0xfffffffd +#define NODE_INFO 0xfffffffc + /* * pgdat resizing functions */ @@ -145,6 +154,18 @@ static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) #endif /* CONFIG_NUMA */ #endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ +#ifdef CONFIG_SPARSEMEM_VMEMMAP +static inline void register_page_bootmem_info_node(struct pglist_data *pgdat) +{ +} +static inline void put_page_bootmem(struct page *page) +{ +} +#else +extern void register_page_bootmem_info_node(struct pglist_data *pgdat); +extern void put_page_bootmem(struct page *page); +#endif + #else /* ! CONFIG_MEMORY_HOTPLUG */ /* * Stub functions for when hotplug is off @@ -172,6 +193,10 @@ static inline int mhp_notimplemented(const char *func) return -ENOSYS; } +static inline void register_page_bootmem_info_node(struct pglist_data *pgdat) +{ +} + #endif /* ! CONFIG_MEMORY_HOTPLUG */ extern int add_memory(int nid, u64 start, u64 size); @@ -180,5 +205,7 @@ extern int remove_memory(u64 start, u64 size); extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, int nr_pages); extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms); +extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, + unsigned long pnum); #endif /* __LINUX_MEMORY_HOTPLUG_H */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c3828497f41d7a6960287d95398de19ddb566072..aad98003176f895ab9c361765109f981fe25be20 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -896,6 +896,7 @@ static inline struct mem_section *__nr_to_section(unsigned long nr) return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; } extern int __section_nr(struct mem_section* ms); +extern unsigned long usemap_size(void); /* * We use the lower bits of the mem_map pointer to store diff --git a/mm/bootmem.c b/mm/bootmem.c index b6791646143e9a24e7aeaacef415e572e619da45..369624d2789cc0cc44abddffe6954e43c31fe6f9 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -461,6 +461,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) { + register_page_bootmem_info_node(pgdat); return free_all_bootmem_core(pgdat); } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index c8b3ca79de2d65ab2430f17f9cd92bfb4c417728..cba36ef0d5063beea3b8e7edb92d415683d66f27 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -58,8 +58,105 @@ static void release_memory_resource(struct resource *res) return; } - #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +#ifndef CONFIG_SPARSEMEM_VMEMMAP +static void get_page_bootmem(unsigned long info, struct page *page, int magic) +{ + atomic_set(&page->_mapcount, magic); + SetPagePrivate(page); + set_page_private(page, info); + atomic_inc(&page->_count); +} + +void put_page_bootmem(struct page *page) +{ + int magic; + + magic = atomic_read(&page->_mapcount); + BUG_ON(magic >= -1); + + if (atomic_dec_return(&page->_count) == 1) { + ClearPagePrivate(page); + set_page_private(page, 0); + reset_page_mapcount(page); + __free_pages_bootmem(page, 0); + } + +} + +void register_page_bootmem_info_section(unsigned long start_pfn) +{ + unsigned long *usemap, mapsize, section_nr, i; + struct mem_section *ms; + struct page *page, *memmap; + + if (!pfn_valid(start_pfn)) + return; + + section_nr = pfn_to_section_nr(start_pfn); + ms = __nr_to_section(section_nr); + + /* Get section's memmap address */ + memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); + + /* + * Get page for the memmap's phys address + * XXX: need more consideration for sparse_vmemmap... + */ + page = virt_to_page(memmap); + mapsize = sizeof(struct page) * PAGES_PER_SECTION; + mapsize = PAGE_ALIGN(mapsize) >> PAGE_SHIFT; + + /* remember memmap's page */ + for (i = 0; i < mapsize; i++, page++) + get_page_bootmem(section_nr, page, SECTION_INFO); + + usemap = __nr_to_section(section_nr)->pageblock_flags; + page = virt_to_page(usemap); + + mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT; + + for (i = 0; i < mapsize; i++, page++) + get_page_bootmem(section_nr, page, MIX_INFO); + +} + +void register_page_bootmem_info_node(struct pglist_data *pgdat) +{ + unsigned long i, pfn, end_pfn, nr_pages; + int node = pgdat->node_id; + struct page *page; + struct zone *zone; + + nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT; + page = virt_to_page(pgdat); + + for (i = 0; i < nr_pages; i++, page++) + get_page_bootmem(node, page, NODE_INFO); + + zone = &pgdat->node_zones[0]; + for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) { + if (zone->wait_table) { + nr_pages = zone->wait_table_hash_nr_entries + * sizeof(wait_queue_head_t); + nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT; + page = virt_to_page(zone->wait_table); + + for (i = 0; i < nr_pages; i++, page++) + get_page_bootmem(node, page, NODE_INFO); + } + } + + pfn = pgdat->node_start_pfn; + end_pfn = pfn + pgdat->node_spanned_pages; + + /* register_section info */ + for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) + register_page_bootmem_info_section(pfn); + +} +#endif /* !CONFIG_SPARSEMEM_VMEMMAP */ + static int __add_zone(struct zone *zone, unsigned long phys_start_pfn) { struct pglist_data *pgdat = zone->zone_pgdat; diff --git a/mm/sparse.c b/mm/sparse.c index 186a85bf7912b559cda312ec636e4bb962233785..8903c484389adfe18be39db99b29c5e5840ed296 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -210,7 +210,6 @@ static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long p /* * Decode mem_map from the coded memmap */ -static struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum) { /* mask off the extra low bits of information */ @@ -233,7 +232,7 @@ static int __meminit sparse_init_one_section(struct mem_section *ms, return 1; } -static unsigned long usemap_size(void) +unsigned long usemap_size(void) { unsigned long size_bytes; size_bytes = roundup(SECTION_BLOCKFLAGS_BITS, 8) / 8;