diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fad4668ca826feab29eeba2ca3c6aa31254833d1..1352d8cad2c9c7b102f7c601ebaa1678893ec063 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #include @@ -1295,7 +1296,6 @@ static void __init __free_pages_boot_core(struct page *page, unsigned int order) __ClearPageReserved(p); set_page_count(p, 0); - page_zone(page)->managed_pages += nr_pages; set_page_refcounted(page); __free_pages(page, order); } @@ -1359,6 +1359,8 @@ void __init __free_pages_bootmem(struct page *page, unsigned long pfn, { if (early_page_uninitialised(pfn)) return; + + page_zone(page)->managed_pages += 1UL << order; return __free_pages_boot_core(page, order); } @@ -1497,23 +1499,31 @@ deferred_pfn_valid(int nid, unsigned long pfn, return true; } +struct deferred_args { + int nid; + int zid; + atomic64_t nr_pages; +}; + /* * Free pages to buddy allocator. Try to free aligned pages in * pageblock_nr_pages sizes. */ -static void __init deferred_free_pages(int nid, int zid, unsigned long pfn, - unsigned long end_pfn) +static int __init deferred_free_pages(int nid, int zid, unsigned long pfn, + unsigned long end_pfn) { struct mminit_pfnnid_cache nid_init_state = { }; unsigned long nr_pgmask = pageblock_nr_pages - 1; - unsigned long nr_free = 0; + unsigned long nr_free = 0, nr_pages = 0; for (; pfn < end_pfn; pfn++) { if (!deferred_pfn_valid(nid, pfn, &nid_init_state)) { deferred_free_range(pfn - nr_free, nr_free); + nr_pages += nr_free; nr_free = 0; } else if (!(pfn & nr_pgmask)) { deferred_free_range(pfn - nr_free, nr_free); + nr_pages += nr_free; nr_free = 1; touch_nmi_watchdog(); } else { @@ -1522,16 +1532,27 @@ static void __init deferred_free_pages(int nid, int zid, unsigned long pfn, } /* Free the last block of pages to allocator */ deferred_free_range(pfn - nr_free, nr_free); + nr_pages += nr_free; + + return nr_pages; +} + +static int __init deferred_free_chunk(unsigned long pfn, unsigned long end_pfn, + struct deferred_args *args) +{ + unsigned long nr_pages = deferred_free_pages(args->nid, args->zid, pfn, + end_pfn); + atomic64_add(nr_pages, &args->nr_pages); + return KTASK_RETURN_SUCCESS; } /* * Initialize struct pages. We minimize pfn page lookups and scheduler checks * by performing it only once every pageblock_nr_pages. - * Return number of pages initialized. + * Return number of pages initialized in deferred_args. */ -static unsigned long __init deferred_init_pages(int nid, int zid, - unsigned long pfn, - unsigned long end_pfn) +static int __init deferred_init_pages(int nid, int zid, unsigned long pfn, + unsigned long end_pfn) { struct mminit_pfnnid_cache nid_init_state = { }; unsigned long nr_pgmask = pageblock_nr_pages - 1; @@ -1551,7 +1572,17 @@ static unsigned long __init deferred_init_pages(int nid, int zid, __init_single_page(page, pfn, zid, nid); nr_pages++; } - return (nr_pages); + + return nr_pages; +} + +static int __init deferred_init_chunk(unsigned long pfn, unsigned long end_pfn, + struct deferred_args *args) +{ + unsigned long nr_pages = deferred_init_pages(args->nid, args->zid, pfn, + end_pfn); + atomic64_add(nr_pages, &args->nr_pages); + return KTASK_RETURN_SUCCESS; } /* Initialise remaining memory on a node */ @@ -1560,13 +1591,15 @@ static int __init deferred_init_memmap(void *data) pg_data_t *pgdat = data; int nid = pgdat->node_id; unsigned long start = jiffies; - unsigned long nr_pages = 0; + unsigned long nr_init = 0, nr_free = 0; unsigned long spfn, epfn, first_init_pfn, flags; phys_addr_t spa, epa; int zid; struct zone *zone; const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); u64 i; + unsigned long nr_node_cpus; + struct ktask_node kn; /* Bind memory initialisation thread to a local node if possible */ if (!cpumask_empty(cpumask)) @@ -1580,6 +1613,14 @@ static int __init deferred_init_memmap(void *data) return 0; } + /* + * We'd like to know the memory bandwidth of the chip to calculate the + * most efficient number of threads to start, but we can't. In + * testing, a good value for a variety of systems was a quarter of the + * CPUs on the node. + */ + nr_node_cpus = DIV_ROUND_UP(cpumask_weight(cpumask), 4); + /* Sanity check boundaries */ BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn); BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat)); @@ -1600,21 +1641,46 @@ static int __init deferred_init_memmap(void *data) * page in __free_one_page()). */ for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { + struct deferred_args args = { nid, zid, ATOMIC64_INIT(0) }; + DEFINE_KTASK_CTL(ctl, deferred_init_chunk, &args, + KTASK_PTE_MINCHUNK); + ktask_ctl_set_max_threads(&ctl, nr_node_cpus); + spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa)); - nr_pages += deferred_init_pages(nid, zid, spfn, epfn); + + kn.kn_start = (void *)spfn; + kn.kn_task_size = (spfn < epfn) ? epfn - spfn : 0; + kn.kn_nid = nid; + (void) ktask_run_numa(&kn, 1, &ctl); + + nr_init += atomic64_read(&args.nr_pages); } for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { + struct deferred_args args = { nid, zid, ATOMIC64_INIT(0) }; + DEFINE_KTASK_CTL(ctl, deferred_free_chunk, &args, + KTASK_PTE_MINCHUNK); + ktask_ctl_set_max_threads(&ctl, nr_node_cpus); + spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa)); - deferred_free_pages(nid, zid, spfn, epfn); + + kn.kn_start = (void *)spfn; + kn.kn_task_size = (spfn < epfn) ? epfn - spfn : 0; + kn.kn_nid = nid; + (void) ktask_run_numa(&kn, 1, &ctl); + + nr_free += atomic64_read(&args.nr_pages); } pgdat_resize_unlock(pgdat, &flags); /* Sanity check that the next zone really is unpopulated */ WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone)); + VM_BUG_ON(nr_init != nr_free); + + zone->managed_pages += nr_free; - pr_info("node %d initialised, %lu pages in %ums\n", nid, nr_pages, + pr_info("node %d initialised, %lu pages in %ums\n", nid, nr_free, jiffies_to_msecs(jiffies - start)); pgdat_init_report_one_done();