diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 420f7317aa6af681e32b2bed0e722f1769ad963f..5623a9d1cf81359fcd27ff9fa57d59156ba96b7a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1402,6 +1402,11 @@ registers. Default set by CONFIG_HPET_MMAP_DEFAULT. hugepages= [HW,X86-32,IA-64] HugeTLB pages to allocate at boot. + If using node format, the number of pages to allocate + per-node can be specified. + Format: or (node format) + :[,:] + hugepagesz= [HW,IA-64,PPC,X86-64] The size of the HugeTLB pages. On x86-64 and powerpc, this option can be specified multiple times interleaved with hugepages= to reserve diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index cef0b7ee1024646cdc0edeb4d9c5cbbc0a4837fc..163b0ef7d156e157a989f3f152e1736d1bea58a5 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -241,17 +241,22 @@ int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate) m->hstate = hstate; return 1; } + +bool __init hugetlb_node_alloc_supported(void) +{ + return false; +} #endif -int __init alloc_bootmem_huge_page(struct hstate *h) +int __init alloc_bootmem_huge_page(struct hstate *h, int nid) { #ifdef CONFIG_PPC_BOOK3S_64 if (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled()) return pseries_alloc_bootmem_huge_page(h); #endif - return __alloc_bootmem_huge_page(h); + return __alloc_bootmem_huge_page(h, nid); } #if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 561e7679fadc443f8e829d875245a215d95b7423..588f9f2a44fceb6630fdee663fc499d3d5fd0f45 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -347,6 +347,7 @@ struct hstate { unsigned long nr_overcommit_huge_pages; struct list_head hugepage_activelist; struct list_head hugepage_freelists[MAX_NUMNODES]; + unsigned int max_huge_pages_node[MAX_NUMNODES]; unsigned int nr_huge_pages_node[MAX_NUMNODES]; unsigned int free_huge_pages_node[MAX_NUMNODES]; unsigned int surplus_huge_pages_node[MAX_NUMNODES]; @@ -409,8 +410,9 @@ int hugetlb_insert_hugepage(struct vm_area_struct *vma, unsigned long addr, struct page *hpage, pgprot_t prot); /* arch callback */ -int __init __alloc_bootmem_huge_page(struct hstate *h); -int __init alloc_bootmem_huge_page(struct hstate *h); +int __init __alloc_bootmem_huge_page(struct hstate *h, int nid); +int __init alloc_bootmem_huge_page(struct hstate *h, int nid); +bool __init hugetlb_node_alloc_supported(void); void __init hugetlb_bad_size(void); void __init hugetlb_add_hstate(unsigned order); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b2ed9174bfd730f3a34447932deacfa85e285bc3..8286a6f8cb05066a10248439d2cbf2cc0506bc9f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -56,6 +56,7 @@ static struct hstate * __initdata parsed_hstate; static unsigned long __initdata default_hstate_max_huge_pages; static unsigned long __initdata default_hstate_size; static bool __initdata parsed_valid_hugepagesz = true; +static unsigned int default_hugepages_in_node[MAX_NUMNODES] __initdata; int enable_charge_mighp __read_mostly; /* @@ -2241,33 +2242,39 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, return ERR_PTR(-ENOSPC); } -int alloc_bootmem_huge_page(struct hstate *h) +int alloc_bootmem_huge_page(struct hstate *h, int nid) __attribute__ ((weak, alias("__alloc_bootmem_huge_page"))); -int __alloc_bootmem_huge_page(struct hstate *h) +int __alloc_bootmem_huge_page(struct hstate *h, int nid) { - struct huge_bootmem_page *m; + struct huge_bootmem_page *m = NULL; /* initialize for clang */ int nr_nodes, node; + if (nid >= nr_online_nodes) + return 0; + /* do node specific alloc */ + if (nid != NUMA_NO_NODE) { + m = memblock_virt_alloc_try_nid_raw(huge_page_size(h), huge_page_size(h), + 0, MEMBLOCK_ALLOC_ACCESSIBLE, nid); + if (!m) + return 0; + goto found; + } + /* allocate from next node when distributing huge pages */ for_each_node_mask_to_alloc(h, nr_nodes, node, &node_states[N_MEMORY]) { - void *addr; - - addr = memblock_virt_alloc_try_nid_raw( + m = memblock_virt_alloc_try_nid_raw( huge_page_size(h), huge_page_size(h), 0, BOOTMEM_ALLOC_ACCESSIBLE, node); - if (addr) { - /* - * Use the beginning of the huge page to store the - * huge_bootmem_page struct (until gather_bootmem - * puts them into the mem_map). - */ - m = addr; - goto found; - } + /* + * Use the beginning of the huge page to store the + * huge_bootmem_page struct (until gather_bootmem + * puts them into the mem_map). + */ + if (!m) + return 0; + goto found; } - return 0; found: - BUG_ON(!IS_ALIGNED(virt_to_phys(m), huge_page_size(h))); /* Put them into a private list first because mem_map is not up yet */ INIT_LIST_HEAD(&m->list); list_add(&m->list, &huge_boot_pages); @@ -2310,12 +2317,55 @@ static void __init gather_bootmem_prealloc(void) cond_resched(); } } +static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid) +{ + unsigned long i; + char buf[32]; + + for (i = 0; i < h->max_huge_pages_node[nid]; ++i) { + if (hstate_is_gigantic(h)) { + if (!alloc_bootmem_huge_page(h, nid)) + break; + } else { + struct page *page; + gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; + + page = alloc_fresh_huge_page(h, gfp_mask, nid, + &node_states[N_MEMORY], NULL); + if (!page) + break; + put_page(page); /* free it into the hugepage allocator */ + } + cond_resched(); + } + if (i == h->max_huge_pages_node[nid]) + return; + + string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32); + pr_warn("HugeTLB: allocating %u of page size %s failed node%d. Only allocated %lu hugepages.\n", + h->max_huge_pages_node[nid], buf, nid, i); + h->max_huge_pages -= (h->max_huge_pages_node[nid] - i); + h->max_huge_pages_node[nid] = i; +} static void __init hugetlb_hstate_alloc_pages(struct hstate *h) { unsigned long i; nodemask_t *node_alloc_noretry; + bool node_specific_alloc = false; + + /* do node specific alloc */ + for (i = 0; i < nr_online_nodes; i++) { + if (h->max_huge_pages_node[i] > 0) { + hugetlb_hstate_alloc_pages_onenode(h, i); + node_specific_alloc = true; + } + } + + if (node_specific_alloc) + return; + /* below will do all node balanced alloc */ if (!hstate_is_gigantic(h)) { /* * Bit mask controlling how hard we retry per-node allocations. @@ -2336,7 +2386,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h) for (i = 0; i < h->max_huge_pages; ++i) { if (hstate_is_gigantic(h)) { - if (!alloc_bootmem_huge_page(h)) + if (!alloc_bootmem_huge_page(h, NUMA_NO_NODE)) break; } else if (!alloc_pool_huge_page(h, &node_states[N_MEMORY], @@ -2991,8 +3041,13 @@ static int __init hugetlb_init(void) } default_hstate_idx = hstate_index(size_to_hstate(default_hstate_size)); if (default_hstate_max_huge_pages) { - if (!default_hstate.max_huge_pages) + if (!default_hstate.max_huge_pages) { default_hstate.max_huge_pages = default_hstate_max_huge_pages; + + for (i = 0; i < nr_online_nodes; i++) + default_hstate.max_huge_pages_node[i] = + default_hugepages_in_node[i]; + } } hugetlb_init_hstates(); @@ -3052,10 +3107,19 @@ void __init hugetlb_add_hstate(unsigned int order) parsed_hstate = h; } +bool __init __weak hugetlb_node_alloc_supported(void) +{ + return true; +} + static int __init hugetlb_nrpages_setup(char *s) { unsigned long *mhp; static unsigned long *last_mhp; + int node = NUMA_NO_NODE; + int count; + unsigned long tmp; + char *p = s; if (!parsed_valid_hugepagesz) { pr_warn("hugepages = %s preceded by " @@ -3077,8 +3141,40 @@ static int __init hugetlb_nrpages_setup(char *s) return 1; } - if (sscanf(s, "%lu", mhp) <= 0) - *mhp = 0; + while (*p) { + count = 0; + if (sscanf(p, "%lu%n", &tmp, &count) != 1) + goto invalid; + /* Parameter is node format */ + if (p[count] == ':') { + if (!hugetlb_node_alloc_supported()) { + pr_warn("HugeTLB: architecture can't support node specific alloc, ignoring!\n"); + return 0; + } + node = tmp; + p += count + 1; + if (node < 0 || node >= nr_online_nodes) + goto invalid; + /* Parse hugepages */ + if (sscanf(p, "%lu%n", &tmp, &count) != 1) + goto invalid; + if (!hugetlb_max_hstate) + default_hugepages_in_node[node] = tmp; + else + parsed_hstate->max_huge_pages_node[node] = tmp; + *mhp += tmp; + /* Go to parse next node*/ + if (p[count] == ',') + p += count + 1; + else + break; + } else { + if (p != s) + goto invalid; + *mhp = tmp; + break; + } + } /* * Global state is always initialized later in hugetlb_init. @@ -3091,6 +3187,10 @@ static int __init hugetlb_nrpages_setup(char *s) last_mhp = mhp; return 1; + +invalid: + pr_warn("HugeTLB: Invalid hugepages parameter %s\n", p); + return 0; } __setup("hugepages=", hugetlb_nrpages_setup);