diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 746b57e3d3704a76702f1b25b23d003710847b61..6c90461ed99fd5707c178d3b4fbf922b367b55b0 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -476,11 +476,56 @@ extern struct pglist_data contig_page_data; struct page; struct mem_section { - struct page *section_mem_map; + /* + * This is, logically, a pointer to an array of struct + * pages. However, it is stored with some other magic. + * (see sparse.c::sparse_init_one_section()) + * + * Making it a UL at least makes someone do a cast + * before using it wrong. + */ + unsigned long section_mem_map; }; extern struct mem_section mem_section[NR_MEM_SECTIONS]; +static inline struct mem_section *__nr_to_section(unsigned long nr) +{ + return &mem_section[nr]; +} + +/* + * We use the lower bits of the mem_map pointer to store + * a little bit of information. There should be at least + * 3 bits here due to 32-bit alignment. + */ +#define SECTION_MARKED_PRESENT (1UL<<0) +#define SECTION_HAS_MEM_MAP (1UL<<1) +#define SECTION_MAP_LAST_BIT (1UL<<2) +#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1)) + +static inline struct page *__section_mem_map_addr(struct mem_section *section) +{ + unsigned long map = section->section_mem_map; + map &= SECTION_MAP_MASK; + return (struct page *)map; +} + +static inline int valid_section(struct mem_section *section) +{ + return (section->section_mem_map & SECTION_MARKED_PRESENT); +} + +static inline int section_has_mem_map(struct mem_section *section) +{ + return (section->section_mem_map & SECTION_HAS_MEM_MAP); +} + +static inline int valid_section_nr(unsigned long nr) +{ + return valid_section(__nr_to_section(nr)); +} + /* * Given a kernel address, find the home node of the underlying memory. */ @@ -488,24 +533,25 @@ extern struct mem_section mem_section[NR_MEM_SECTIONS]; static inline struct mem_section *__pfn_to_section(unsigned long pfn) { - return &mem_section[pfn_to_section_nr(pfn)]; + return __nr_to_section(pfn_to_section_nr(pfn)); } #define pfn_to_page(pfn) \ ({ \ unsigned long __pfn = (pfn); \ - __pfn_to_section(__pfn)->section_mem_map + __pfn; \ + __section_mem_map_addr(__pfn_to_section(__pfn)) + __pfn; \ }) #define page_to_pfn(page) \ ({ \ - page - mem_section[page_to_section(page)].section_mem_map; \ + page - __section_mem_map_addr(__nr_to_section( \ + page_to_section(page))); \ }) static inline int pfn_valid(unsigned long pfn) { if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; - return mem_section[pfn_to_section_nr(pfn)].section_mem_map != 0; + return valid_section(__nr_to_section(pfn_to_section_nr(pfn))); } /* diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1eb683f9b3af45108132b0acc98fb01fea2428e4..7ee675ad101eb579375050fb8ff503f6fbc4cfd1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1650,8 +1650,8 @@ void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone, unsigned long start_pfn) { struct page *page; - int end_pfn = start_pfn + size; - int pfn; + unsigned long end_pfn = start_pfn + size; + unsigned long pfn; for (pfn = start_pfn; pfn < end_pfn; pfn++, page++) { if (!early_pfn_valid(pfn)) diff --git a/mm/sparse.c b/mm/sparse.c index f888385b9e14ef46438d79974c24fdbc873af820..b54e304df4a70c7232dcc9464f69582d68339993 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -25,7 +25,7 @@ void memory_present(int nid, unsigned long start, unsigned long end) for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { unsigned long section = pfn_to_section_nr(pfn); if (!mem_section[section].section_mem_map) - mem_section[section].section_mem_map = (void *) -1; + mem_section[section].section_mem_map = SECTION_MARKED_PRESENT; } } @@ -50,6 +50,56 @@ unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn, return nr_pages * sizeof(struct page); } +/* + * Subtle, we encode the real pfn into the mem_map such that + * the identity pfn - section_mem_map will return the actual + * physical page frame number. + */ +static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum) +{ + return (unsigned long)(mem_map - (section_nr_to_pfn(pnum))); +} + +/* + * We need this if we ever free the mem_maps. While not implemented yet, + * this function is included for parity with its sibling. + */ +static __attribute((unused)) +struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum) +{ + return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum); +} + +static int sparse_init_one_section(struct mem_section *ms, + unsigned long pnum, struct page *mem_map) +{ + if (!valid_section(ms)) + return -EINVAL; + + ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum); + + return 1; +} + +static struct page *sparse_early_mem_map_alloc(unsigned long pnum) +{ + struct page *map; + int nid = early_pfn_to_nid(section_nr_to_pfn(pnum)); + + map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); + if (map) + return map; + + map = alloc_bootmem_node(NODE_DATA(nid), + sizeof(struct page) * PAGES_PER_SECTION); + if (map) + return map; + + printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__); + mem_section[pnum].section_mem_map = 0; + return NULL; +} + /* * Allocate the accumulated non-linear sections, allocate a mem_map * for each and record the physical to section mapping. @@ -58,28 +108,30 @@ void sparse_init(void) { unsigned long pnum; struct page *map; - int nid; for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { - if (!mem_section[pnum].section_mem_map) + if (!valid_section_nr(pnum)) continue; - nid = early_pfn_to_nid(section_nr_to_pfn(pnum)); - map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); - if (!map) - map = alloc_bootmem_node(NODE_DATA(nid), - sizeof(struct page) * PAGES_PER_SECTION); - if (!map) { - mem_section[pnum].section_mem_map = 0; - continue; - } - - /* - * Subtle, we encode the real pfn into the mem_map such that - * the identity pfn - section_mem_map will return the actual - * physical page frame number. - */ - mem_section[pnum].section_mem_map = map - - section_nr_to_pfn(pnum); + map = sparse_early_mem_map_alloc(pnum); + if (map) + sparse_init_one_section(&mem_section[pnum], pnum, map); } } + +/* + * returns the number of sections whose mem_maps were properly + * set. If this is <=0, then that means that the passed-in + * map was not consumed and must be freed. + */ +int sparse_add_one_section(unsigned long start_pfn, int nr_pages, struct page *map) +{ + struct mem_section *ms = __pfn_to_section(start_pfn); + + if (ms->section_mem_map & SECTION_MARKED_PRESENT) + return -EEXIST; + + ms->section_mem_map |= SECTION_MARKED_PRESENT; + + return sparse_init_one_section(ms, pfn_to_section_nr(start_pfn), map); +}