bootmem.c 24.1 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 *  bootmem - A boot-time physical memory allocator and configurator
L
Linus Torvalds 已提交
3 4
 *
 *  Copyright (C) 1999 Ingo Molnar
5 6
 *                1999 Kanoj Sarcar, SGI
 *                2008 Johannes Weiner
L
Linus Torvalds 已提交
7
 *
8 9
 * Access to this subsystem has to be serialized externally (which is true
 * for the boot process anyway).
L
Linus Torvalds 已提交
10 11
 */
#include <linux/init.h>
12
#include <linux/pfn.h>
13
#include <linux/slab.h>
L
Linus Torvalds 已提交
14 15
#include <linux/bootmem.h>
#include <linux/module.h>
16
#include <linux/kmemleak.h>
17
#include <linux/range.h>
18 19

#include <asm/bug.h>
L
Linus Torvalds 已提交
20
#include <asm/io.h>
21
#include <asm/processor.h>
22

L
Linus Torvalds 已提交
23 24 25 26 27 28
#include "internal.h"

unsigned long max_low_pfn;
unsigned long min_low_pfn;
unsigned long max_pfn;

29 30 31 32 33 34 35 36
#ifdef CONFIG_CRASH_DUMP
/*
 * If we have booted due to a crash, max_pfn will be a very low value. We need
 * to know the amount of memory that the previous kernel used.
 */
unsigned long saved_max_pfn;
#endif

37
#ifndef CONFIG_NO_BOOTMEM
38 39
bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;

40 41
static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list);

42 43 44 45 46 47 48 49 50 51 52 53 54
static int bootmem_debug;

static int __init bootmem_debug_setup(char *buf)
{
	bootmem_debug = 1;
	return 0;
}
early_param("bootmem_debug", bootmem_debug_setup);

#define bdebug(fmt, args...) ({				\
	if (unlikely(bootmem_debug))			\
		printk(KERN_INFO			\
			"bootmem::%s " fmt,		\
55
			__func__, ## args);		\
56 57
})

58
static unsigned long __init bootmap_bytes(unsigned long pages)
59
{
60
	unsigned long bytes = (pages + 7) / 8;
61

62
	return ALIGN(bytes, sizeof(long));
63 64
}

65 66 67 68
/**
 * bootmem_bootmap_pages - calculate bitmap size in pages
 * @pages: number of pages the bitmap has to represent
 */
69
unsigned long __init bootmem_bootmap_pages(unsigned long pages)
L
Linus Torvalds 已提交
70
{
71
	unsigned long bytes = bootmap_bytes(pages);
L
Linus Torvalds 已提交
72

73
	return PAGE_ALIGN(bytes) >> PAGE_SHIFT;
L
Linus Torvalds 已提交
74
}
75

76 77 78
/*
 * link bdata in order
 */
79
static void __init link_bootmem(bootmem_data_t *bdata)
80
{
81
	struct list_head *iter;
82

83 84 85 86
	list_for_each(iter, &bdata_list) {
		bootmem_data_t *ent;

		ent = list_entry(iter, bootmem_data_t, list);
87
		if (bdata->node_min_pfn < ent->node_min_pfn)
88
			break;
89
	}
90
	list_add_tail(&bdata->list, iter);
91 92
}

L
Linus Torvalds 已提交
93 94 95
/*
 * Called once to set up the allocator itself.
 */
96
static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
L
Linus Torvalds 已提交
97 98
	unsigned long mapstart, unsigned long start, unsigned long end)
{
99
	unsigned long mapsize;
L
Linus Torvalds 已提交
100

101
	mminit_validate_memmodel_limits(&start, &end);
102
	bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
103
	bdata->node_min_pfn = start;
L
Linus Torvalds 已提交
104
	bdata->node_low_pfn = end;
105
	link_bootmem(bdata);
L
Linus Torvalds 已提交
106 107 108 109 110

	/*
	 * Initially all pages are reserved - setup_arch() has to
	 * register free RAM areas explicitly.
	 */
111
	mapsize = bootmap_bytes(end - start);
L
Linus Torvalds 已提交
112 113
	memset(bdata->node_bootmem_map, 0xff, mapsize);

114 115 116
	bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n",
		bdata - bootmem_node_data, start, mapstart, end, mapsize);

L
Linus Torvalds 已提交
117 118 119
	return mapsize;
}

120 121 122 123 124 125 126 127 128
/**
 * init_bootmem_node - register a node as boot memory
 * @pgdat: node to register
 * @freepfn: pfn where the bitmap for this node is to be placed
 * @startpfn: first pfn on the node
 * @endpfn: first pfn after the node
 *
 * Returns the number of bytes needed to hold the bitmap for this node.
 */
129 130 131 132 133 134
unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
				unsigned long startpfn, unsigned long endpfn)
{
	return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);
}

135 136 137 138 139 140 141
/**
 * init_bootmem - register boot memory
 * @start: pfn where the bitmap is to be placed
 * @pages: number of available physical pages
 *
 * Returns the number of bytes needed to hold the bitmap.
 */
142 143 144 145 146 147
unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
{
	max_low_pfn = pages;
	min_low_pfn = start;
	return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
}
148
#endif
F
FUJITA Tomonori 已提交
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
/*
 * free_bootmem_late - free bootmem pages directly to page allocator
 * @addr: starting address of the range
 * @size: size of the range in bytes
 *
 * This is only useful when the bootmem allocator has already been torn
 * down, but we are still initializing the system.  Pages are given directly
 * to the page allocator, no bootmem metadata is updated because it is gone.
 */
void __init free_bootmem_late(unsigned long addr, unsigned long size)
{
	unsigned long cursor, end;

	kmemleak_free_part(__va(addr), size);

	cursor = PFN_UP(addr);
	end = PFN_DOWN(addr + size);

	for (; cursor < end; cursor++) {
		__free_pages_bootmem(pfn_to_page(cursor), 0);
		totalram_pages++;
	}
}

173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
#ifdef CONFIG_NO_BOOTMEM
static void __init __free_pages_memory(unsigned long start, unsigned long end)
{
	int i;
	unsigned long start_aligned, end_aligned;
	int order = ilog2(BITS_PER_LONG);

	start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
	end_aligned = end & ~(BITS_PER_LONG - 1);

	if (end_aligned <= start_aligned) {
		for (i = start; i < end; i++)
			__free_pages_bootmem(pfn_to_page(i), 0);

		return;
	}

	for (i = start; i < start_aligned; i++)
		__free_pages_bootmem(pfn_to_page(i), 0);

	for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG)
		__free_pages_bootmem(pfn_to_page(i), order);

	for (i = end_aligned; i < end; i++)
		__free_pages_bootmem(pfn_to_page(i), 0);
}

unsigned long __init free_all_memory_core_early(int nodeid)
{
	int i;
	u64 start, end;
	unsigned long count = 0;
	struct range *range = NULL;
	int nr_range;

	nr_range = get_free_all_memory_range(&range, nodeid);

	for (i = 0; i < nr_range; i++) {
		start = range[i].start;
		end = range[i].end;
		count += end - start;
		__free_pages_memory(start, end);
	}

	return count;
}
#else
220 221
static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
{
222
	int aligned;
223
	struct page *page;
224 225 226 227 228
	unsigned long start, end, pages, count = 0;

	if (!bdata->node_bootmem_map)
		return 0;

229
	start = bdata->node_min_pfn;
230 231
	end = bdata->node_low_pfn;

232
	/*
233 234
	 * If the start is aligned to the machines wordsize, we might
	 * be able to free pages in bulks of that order.
235
	 */
236
	aligned = !(start & (BITS_PER_LONG - 1));
237

238 239
	bdebug("nid=%td start=%lx end=%lx aligned=%d\n",
		bdata - bootmem_node_data, start, end, aligned);
240

241 242
	while (start < end) {
		unsigned long *map, idx, vec;
243

244
		map = bdata->node_bootmem_map;
245
		idx = start - bdata->node_min_pfn;
246 247 248 249 250 251
		vec = ~map[idx / BITS_PER_LONG];

		if (aligned && vec == ~0UL && start + BITS_PER_LONG < end) {
			int order = ilog2(BITS_PER_LONG);

			__free_pages_bootmem(pfn_to_page(start), order);
252
			count += BITS_PER_LONG;
253 254 255 256 257 258
		} else {
			unsigned long off = 0;

			while (vec && off < BITS_PER_LONG) {
				if (vec & 1) {
					page = pfn_to_page(start + off);
259
					__free_pages_bootmem(page, 0);
260
					count++;
261
				}
262 263
				vec >>= 1;
				off++;
264 265
			}
		}
266
		start += BITS_PER_LONG;
267 268 269
	}

	page = virt_to_page(bdata->node_bootmem_map);
270
	pages = bdata->node_low_pfn - bdata->node_min_pfn;
271 272 273 274
	pages = bootmem_bootmap_pages(pages);
	count += pages;
	while (pages--)
		__free_pages_bootmem(page++, 0);
275

276 277
	bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);

278 279
	return count;
}
280
#endif
281

282 283 284 285 286 287
/**
 * free_all_bootmem_node - release a node's free pages to the buddy allocator
 * @pgdat: node to be released
 *
 * Returns the number of pages actually released.
 */
288 289 290
unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
{
	register_page_bootmem_info_node(pgdat);
291 292 293 294
#ifdef CONFIG_NO_BOOTMEM
	/* free_all_memory_core_early(MAX_NUMNODES) will be called later */
	return 0;
#else
295
	return free_all_bootmem_core(pgdat->bdata);
296
#endif
297 298
}

299 300 301 302 303
/**
 * free_all_bootmem - release free pages to the buddy allocator
 *
 * Returns the number of pages actually released.
 */
304 305
unsigned long __init free_all_bootmem(void)
{
306
#ifdef CONFIG_NO_BOOTMEM
307 308 309 310 311 312 313 314
	/*
	 * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id
	 *  because in some case like Node0 doesnt have RAM installed
	 *  low ram will be on Node1
	 * Use MAX_NUMNODES will make sure all ranges in early_node_map[]
	 *  will be used instead of only Node0 related
	 */
	return free_all_memory_core_early(MAX_NUMNODES);
315
#else
316 317 318 319 320 321 322
	unsigned long total_pages = 0;
	bootmem_data_t *bdata;

	list_for_each_entry(bdata, &bdata_list, list)
		total_pages += free_all_bootmem_core(bdata);

	return total_pages;
323
#endif
324 325
}

326
#ifndef CONFIG_NO_BOOTMEM
J
Johannes Weiner 已提交
327 328 329 330 331 332
static void __init __free(bootmem_data_t *bdata,
			unsigned long sidx, unsigned long eidx)
{
	unsigned long idx;

	bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data,
333 334
		sidx + bdata->node_min_pfn,
		eidx + bdata->node_min_pfn);
J
Johannes Weiner 已提交
335

336 337 338
	if (bdata->hint_idx > sidx)
		bdata->hint_idx = sidx;

J
Johannes Weiner 已提交
339 340 341 342 343 344 345 346 347 348 349 350 351
	for (idx = sidx; idx < eidx; idx++)
		if (!test_and_clear_bit(idx, bdata->node_bootmem_map))
			BUG();
}

static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,
			unsigned long eidx, int flags)
{
	unsigned long idx;
	int exclusive = flags & BOOTMEM_EXCLUSIVE;

	bdebug("nid=%td start=%lx end=%lx flags=%x\n",
		bdata - bootmem_node_data,
352 353
		sidx + bdata->node_min_pfn,
		eidx + bdata->node_min_pfn,
J
Johannes Weiner 已提交
354 355 356 357 358 359 360 361 362
		flags);

	for (idx = sidx; idx < eidx; idx++)
		if (test_and_set_bit(idx, bdata->node_bootmem_map)) {
			if (exclusive) {
				__free(bdata, sidx, idx);
				return -EBUSY;
			}
			bdebug("silent double reserve of PFN %lx\n",
363
				idx + bdata->node_min_pfn);
J
Johannes Weiner 已提交
364 365 366 367
		}
	return 0;
}

368 369 370
static int __init mark_bootmem_node(bootmem_data_t *bdata,
				unsigned long start, unsigned long end,
				int reserve, int flags)
371 372 373
{
	unsigned long sidx, eidx;

374 375
	bdebug("nid=%td start=%lx end=%lx reserve=%d flags=%x\n",
		bdata - bootmem_node_data, start, end, reserve, flags);
376

377
	BUG_ON(start < bdata->node_min_pfn);
378
	BUG_ON(end > bdata->node_low_pfn);
379

380 381
	sidx = start - bdata->node_min_pfn;
	eidx = end - bdata->node_min_pfn;
382

383 384
	if (reserve)
		return __reserve(bdata, sidx, eidx, flags);
385
	else
386 387 388 389 390 391 392 393 394 395 396 397 398 399 400
		__free(bdata, sidx, eidx);
	return 0;
}

static int __init mark_bootmem(unsigned long start, unsigned long end,
				int reserve, int flags)
{
	unsigned long pos;
	bootmem_data_t *bdata;

	pos = start;
	list_for_each_entry(bdata, &bdata_list, list) {
		int err;
		unsigned long max;

401 402
		if (pos < bdata->node_min_pfn ||
		    pos >= bdata->node_low_pfn) {
403 404 405 406 407
			BUG_ON(pos != start);
			continue;
		}

		max = min(bdata->node_low_pfn, end);
408

409 410 411 412 413
		err = mark_bootmem_node(bdata, pos, max, reserve, flags);
		if (reserve && err) {
			mark_bootmem(start, pos, 0, 0);
			return err;
		}
414

415 416 417 418 419
		if (max == end)
			return 0;
		pos = bdata->node_low_pfn;
	}
	BUG();
420
}
421
#endif
422

423 424 425 426 427 428 429 430
/**
 * free_bootmem_node - mark a page range as usable
 * @pgdat: node the range resides on
 * @physaddr: starting address of the range
 * @size: size of the range in bytes
 *
 * Partial pages will be considered reserved and left as they are.
 *
431
 * The range must reside completely on the specified node.
432
 */
433 434 435
void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
			      unsigned long size)
{
436 437 438
#ifdef CONFIG_NO_BOOTMEM
	free_early(physaddr, physaddr + size);
#else
439 440
	unsigned long start, end;

441 442
	kmemleak_free_part(__va(physaddr), size);

443 444 445 446
	start = PFN_UP(physaddr);
	end = PFN_DOWN(physaddr + size);

	mark_bootmem_node(pgdat->bdata, start, end, 0, 0);
447
#endif
448 449
}

450 451 452 453 454 455 456
/**
 * free_bootmem - mark a page range as usable
 * @addr: starting address of the range
 * @size: size of the range in bytes
 *
 * Partial pages will be considered reserved and left as they are.
 *
457
 * The range must be contiguous but may span node boundaries.
458
 */
459 460
void __init free_bootmem(unsigned long addr, unsigned long size)
{
461 462 463
#ifdef CONFIG_NO_BOOTMEM
	free_early(addr, addr + size);
#else
464
	unsigned long start, end;
465

466 467
	kmemleak_free_part(__va(addr), size);

468 469
	start = PFN_UP(addr);
	end = PFN_DOWN(addr + size);
L
Linus Torvalds 已提交
470

471
	mark_bootmem(start, end, 0, 0);
472
#endif
L
Linus Torvalds 已提交
473 474
}

475 476 477 478 479 480 481 482 483
/**
 * reserve_bootmem_node - mark a page range as reserved
 * @pgdat: node the range resides on
 * @physaddr: starting address of the range
 * @size: size of the range in bytes
 * @flags: reservation flags (see linux/bootmem.h)
 *
 * Partial pages will be reserved.
 *
484
 * The range must reside completely on the specified node.
485
 */
486 487
int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
				 unsigned long size, int flags)
L
Linus Torvalds 已提交
488
{
489 490 491 492
#ifdef CONFIG_NO_BOOTMEM
	panic("no bootmem");
	return 0;
#else
493
	unsigned long start, end;
L
Linus Torvalds 已提交
494

495 496 497 498
	start = PFN_DOWN(physaddr);
	end = PFN_UP(physaddr + size);

	return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
499
#endif
500
}
501

502 503 504 505 506 507 508 509
/**
 * reserve_bootmem - mark a page range as usable
 * @addr: starting address of the range
 * @size: size of the range in bytes
 * @flags: reservation flags (see linux/bootmem.h)
 *
 * Partial pages will be reserved.
 *
510
 * The range must be contiguous but may span node boundaries.
511
 */
512 513 514
int __init reserve_bootmem(unsigned long addr, unsigned long size,
			    int flags)
{
515 516 517 518
#ifdef CONFIG_NO_BOOTMEM
	panic("no bootmem");
	return 0;
#else
519
	unsigned long start, end;
L
Linus Torvalds 已提交
520

521 522
	start = PFN_DOWN(addr);
	end = PFN_UP(addr + size);
523

524
	return mark_bootmem(start, end, 1, flags);
525
#endif
L
Linus Torvalds 已提交
526 527
}

528
#ifndef CONFIG_NO_BOOTMEM
529 530
static unsigned long __init align_idx(struct bootmem_data *bdata,
				      unsigned long idx, unsigned long step)
531 532 533 534 535 536 537 538 539 540 541
{
	unsigned long base = bdata->node_min_pfn;

	/*
	 * Align the index with respect to the node start so that the
	 * combination of both satisfies the requested alignment.
	 */

	return ALIGN(base + idx, step) - base;
}

542 543
static unsigned long __init align_off(struct bootmem_data *bdata,
				      unsigned long off, unsigned long align)
544 545 546 547 548 549 550 551
{
	unsigned long base = PFN_PHYS(bdata->node_min_pfn);

	/* Same as align_idx for byte offsets */

	return ALIGN(base + off, align) - base;
}

552 553 554
static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
					unsigned long size, unsigned long align,
					unsigned long goal, unsigned long limit)
L
Linus Torvalds 已提交
555
{
556
	unsigned long fallback = 0;
557 558
	unsigned long min, max, start, sidx, midx, step;

559 560 561 562
	bdebug("nid=%td size=%lx [%lu pages] align=%lx goal=%lx limit=%lx\n",
		bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT,
		align, goal, limit);

563 564 565
	BUG_ON(!size);
	BUG_ON(align & (align - 1));
	BUG_ON(limit && goal + size > limit);
L
Linus Torvalds 已提交
566

567 568 569
	if (!bdata->node_bootmem_map)
		return NULL;

570
	min = bdata->node_min_pfn;
571
	max = bdata->node_low_pfn;
Y
Yinghai Lu 已提交
572

573 574 575 576 577 578
	goal >>= PAGE_SHIFT;
	limit >>= PAGE_SHIFT;

	if (limit && max > limit)
		max = limit;
	if (max <= min)
Y
Yinghai Lu 已提交
579 580
		return NULL;

581
	step = max(align >> PAGE_SHIFT, 1UL);
582

583 584 585 586
	if (goal && min < goal && goal < max)
		start = ALIGN(goal, step);
	else
		start = ALIGN(min, step);
L
Linus Torvalds 已提交
587

588
	sidx = start - bdata->node_min_pfn;
589
	midx = max - bdata->node_min_pfn;
L
Linus Torvalds 已提交
590

591
	if (bdata->hint_idx > sidx) {
592 593 594 595 596
		/*
		 * Handle the valid case of sidx being zero and still
		 * catch the fallback below.
		 */
		fallback = sidx + 1;
597
		sidx = align_idx(bdata, bdata->hint_idx, step);
598
	}
L
Linus Torvalds 已提交
599

600 601 602 603 604 605
	while (1) {
		int merge;
		void *region;
		unsigned long eidx, i, start_off, end_off;
find_block:
		sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx);
606
		sidx = align_idx(bdata, sidx, step);
607
		eidx = sidx + PFN_UP(size);
608

609
		if (sidx >= midx || eidx > midx)
610
			break;
L
Linus Torvalds 已提交
611

612 613
		for (i = sidx; i < eidx; i++)
			if (test_bit(i, bdata->node_bootmem_map)) {
614
				sidx = align_idx(bdata, i, step);
615 616 617 618
				if (sidx == i)
					sidx += step;
				goto find_block;
			}
L
Linus Torvalds 已提交
619

620
		if (bdata->last_end_off & (PAGE_SIZE - 1) &&
621
				PFN_DOWN(bdata->last_end_off) + 1 == sidx)
622
			start_off = align_off(bdata, bdata->last_end_off, align);
623 624 625 626 627 628 629 630 631 632 633 634
		else
			start_off = PFN_PHYS(sidx);

		merge = PFN_DOWN(start_off) < sidx;
		end_off = start_off + size;

		bdata->last_end_off = end_off;
		bdata->hint_idx = PFN_UP(end_off);

		/*
		 * Reserve the area now:
		 */
J
Johannes Weiner 已提交
635 636 637
		if (__reserve(bdata, PFN_DOWN(start_off) + merge,
				PFN_UP(end_off), BOOTMEM_EXCLUSIVE))
			BUG();
638

639 640
		region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
				start_off);
641
		memset(region, 0, size);
642 643 644 645 646
		/*
		 * The min_count is set to 0 so that bootmem allocated blocks
		 * are never reported as leaks.
		 */
		kmemleak_alloc(region, size, 0, 0);
647
		return region;
L
Linus Torvalds 已提交
648 649
	}

650
	if (fallback) {
651
		sidx = align_idx(bdata, fallback - 1, step);
652 653 654 655 656 657 658
		fallback = 0;
		goto find_block;
	}

	return NULL;
}

659 660 661 662
static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata,
					unsigned long size, unsigned long align,
					unsigned long goal, unsigned long limit)
{
663 664 665
	if (WARN_ON_ONCE(slab_is_available()))
		return kzalloc(size, GFP_NOWAIT);

666
#ifdef CONFIG_HAVE_ARCH_BOOTMEM
667 668 669 670 671 672 673 674 675
	{
		bootmem_data_t *p_bdata;

		p_bdata = bootmem_arch_preferred_node(bdata, size, align,
							goal, limit);
		if (p_bdata)
			return alloc_bootmem_core(p_bdata, size, align,
							goal, limit);
	}
676 677 678
#endif
	return NULL;
}
679
#endif
680

681 682 683 684 685
static void * __init ___alloc_bootmem_nopanic(unsigned long size,
					unsigned long align,
					unsigned long goal,
					unsigned long limit)
{
686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705
#ifdef CONFIG_NO_BOOTMEM
	void *ptr;

	if (WARN_ON_ONCE(slab_is_available()))
		return kzalloc(size, GFP_NOWAIT);

restart:

	ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit);

	if (ptr)
		return ptr;

	if (goal != 0) {
		goal = 0;
		goto restart;
	}

	return NULL;
#else
706
	bootmem_data_t *bdata;
707
	void *region;
708 709

restart:
710 711 712
	region = alloc_arch_preferred_bootmem(NULL, size, align, goal, limit);
	if (region)
		return region;
713

714
	list_for_each_entry(bdata, &bdata_list, list) {
715 716
		if (goal && bdata->node_low_pfn <= PFN_DOWN(goal))
			continue;
717
		if (limit && bdata->node_min_pfn >= PFN_DOWN(limit))
718 719 720 721 722 723 724
			break;

		region = alloc_bootmem_core(bdata, size, align, goal, limit);
		if (region)
			return region;
	}

725 726
	if (goal) {
		goal = 0;
727
		goto restart;
728
	}
729

730
	return NULL;
731
#endif
L
Linus Torvalds 已提交
732 733
}

734 735 736 737 738 739 740 741 742 743 744 745 746
/**
 * __alloc_bootmem_nopanic - allocate boot memory without panicking
 * @size: size of the request in bytes
 * @align: alignment of the region
 * @goal: preferred starting address of the region
 *
 * The goal is dropped if it can not be satisfied and the allocation will
 * fall back to memory below @goal.
 *
 * Allocation may happen on any node in the system.
 *
 * Returns NULL on failure.
 */
747
void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
748
					unsigned long goal)
L
Linus Torvalds 已提交
749
{
750 751 752 753 754 755 756
	unsigned long limit = 0;

#ifdef CONFIG_NO_BOOTMEM
	limit = -1UL;
#endif

	return ___alloc_bootmem_nopanic(size, align, goal, limit);
757
}
L
Linus Torvalds 已提交
758

759 760 761 762 763 764 765 766 767 768 769 770
static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
					unsigned long goal, unsigned long limit)
{
	void *mem = ___alloc_bootmem_nopanic(size, align, goal, limit);

	if (mem)
		return mem;
	/*
	 * Whoops, we cannot satisfy the allocation request.
	 */
	printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size);
	panic("Out of memory");
771 772
	return NULL;
}
L
Linus Torvalds 已提交
773

774 775 776 777 778 779 780 781 782 783 784 785 786
/**
 * __alloc_bootmem - allocate boot memory
 * @size: size of the request in bytes
 * @align: alignment of the region
 * @goal: preferred starting address of the region
 *
 * The goal is dropped if it can not be satisfied and the allocation will
 * fall back to memory below @goal.
 *
 * Allocation may happen on any node in the system.
 *
 * The function panics if the request can not be satisfied.
 */
787 788
void * __init __alloc_bootmem(unsigned long size, unsigned long align,
			      unsigned long goal)
789
{
790 791 792 793 794 795 796
	unsigned long limit = 0;

#ifdef CONFIG_NO_BOOTMEM
	limit = -1UL;
#endif

	return ___alloc_bootmem(size, align, goal, limit);
L
Linus Torvalds 已提交
797 798
}

799
#ifndef CONFIG_NO_BOOTMEM
800 801 802 803 804 805
static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
				unsigned long size, unsigned long align,
				unsigned long goal, unsigned long limit)
{
	void *ptr;

806 807 808 809
	ptr = alloc_arch_preferred_bootmem(bdata, size, align, goal, limit);
	if (ptr)
		return ptr;

810 811 812 813 814 815
	ptr = alloc_bootmem_core(bdata, size, align, goal, limit);
	if (ptr)
		return ptr;

	return ___alloc_bootmem(size, align, goal, limit);
}
816
#endif
817

818 819 820 821 822 823 824 825 826 827 828 829 830 831 832
/**
 * __alloc_bootmem_node - allocate boot memory from a specific node
 * @pgdat: node to allocate from
 * @size: size of the request in bytes
 * @align: alignment of the region
 * @goal: preferred starting address of the region
 *
 * The goal is dropped if it can not be satisfied and the allocation will
 * fall back to memory below @goal.
 *
 * Allocation may fall back to any node in the system if the specified node
 * can not hold the requested memory.
 *
 * The function panics if the request can not be satisfied.
 */
833 834
void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
				   unsigned long align, unsigned long goal)
L
Linus Torvalds 已提交
835
{
836 837
	void *ptr;

838 839 840
	if (WARN_ON_ONCE(slab_is_available()))
		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);

841
#ifdef CONFIG_NO_BOOTMEM
842 843 844 845 846 847
	ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
					 goal, -1ULL);
	if (ptr)
		return ptr;

	ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
848 849
					 goal, -1ULL);
#else
850
	ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
851
#endif
852 853

	return ptr;
854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887
}

void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
				   unsigned long align, unsigned long goal)
{
#ifdef MAX_DMA32_PFN
	unsigned long end_pfn;

	if (WARN_ON_ONCE(slab_is_available()))
		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);

	/* update goal according ...MAX_DMA32_PFN */
	end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages;

	if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) &&
	    (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) {
		void *ptr;
		unsigned long new_goal;

		new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
#ifdef CONFIG_NO_BOOTMEM
		ptr =  __alloc_memory_core_early(pgdat->node_id, size, align,
						 new_goal, -1ULL);
#else
		ptr = alloc_bootmem_core(pgdat->bdata, size, align,
						 new_goal, 0);
#endif
		if (ptr)
			return ptr;
	}
#endif

	return __alloc_bootmem_node(pgdat, size, align, goal);

L
Linus Torvalds 已提交
888 889
}

890
#ifdef CONFIG_SPARSEMEM
891 892 893 894 895 896 897
/**
 * alloc_bootmem_section - allocate boot memory from a specific section
 * @size: size of the request in bytes
 * @section_nr: sparse map section to allocate from
 *
 * Return NULL on failure.
 */
898 899 900
void * __init alloc_bootmem_section(unsigned long size,
				    unsigned long section_nr)
{
901 902 903 904 905 906 907 908 909 910
#ifdef CONFIG_NO_BOOTMEM
	unsigned long pfn, goal, limit;

	pfn = section_nr_to_pfn(section_nr);
	goal = pfn << PAGE_SHIFT;
	limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;

	return __alloc_memory_core_early(early_pfn_to_nid(pfn), size,
					 SMP_CACHE_BYTES, goal, limit);
#else
911 912
	bootmem_data_t *bdata;
	unsigned long pfn, goal, limit;
913 914

	pfn = section_nr_to_pfn(section_nr);
915 916 917
	goal = pfn << PAGE_SHIFT;
	limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
	bdata = &bootmem_node_data[early_pfn_to_nid(pfn)];
918

919
	return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit);
920
#endif
921 922 923
}
#endif

924 925 926 927 928
void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
				   unsigned long align, unsigned long goal)
{
	void *ptr;

929 930 931
	if (WARN_ON_ONCE(slab_is_available()))
		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);

932 933 934 935
#ifdef CONFIG_NO_BOOTMEM
	ptr =  __alloc_memory_core_early(pgdat->node_id, size, align,
						 goal, -1ULL);
#else
936 937 938 939
	ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0);
	if (ptr)
		return ptr;

940
	ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
941
#endif
942 943 944 945 946 947
	if (ptr)
		return ptr;

	return __alloc_bootmem_nopanic(size, align, goal);
}

948 949 950
#ifndef ARCH_LOW_ADDRESS_LIMIT
#define ARCH_LOW_ADDRESS_LIMIT	0xffffffffUL
#endif
951

952 953 954 955 956 957 958 959 960 961 962 963 964
/**
 * __alloc_bootmem_low - allocate low boot memory
 * @size: size of the request in bytes
 * @align: alignment of the region
 * @goal: preferred starting address of the region
 *
 * The goal is dropped if it can not be satisfied and the allocation will
 * fall back to memory below @goal.
 *
 * Allocation may happen on any node in the system.
 *
 * The function panics if the request can not be satisfied.
 */
965 966
void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
				  unsigned long goal)
967
{
968
	return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
969 970
}

971 972 973 974 975 976 977 978 979 980 981 982 983 984 985
/**
 * __alloc_bootmem_low_node - allocate low boot memory from a specific node
 * @pgdat: node to allocate from
 * @size: size of the request in bytes
 * @align: alignment of the region
 * @goal: preferred starting address of the region
 *
 * The goal is dropped if it can not be satisfied and the allocation will
 * fall back to memory below @goal.
 *
 * Allocation may fall back to any node in the system if the specified node
 * can not hold the requested memory.
 *
 * The function panics if the request can not be satisfied.
 */
986 987 988
void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
				       unsigned long align, unsigned long goal)
{
989 990
	void *ptr;

991 992 993
	if (WARN_ON_ONCE(slab_is_available()))
		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);

994
#ifdef CONFIG_NO_BOOTMEM
995 996 997 998 999
	ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
				goal, ARCH_LOW_ADDRESS_LIMIT);
	if (ptr)
		return ptr;
	ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
1000 1001
				goal, ARCH_LOW_ADDRESS_LIMIT);
#else
1002
	ptr = ___alloc_bootmem_node(pgdat->bdata, size, align,
1003
				goal, ARCH_LOW_ADDRESS_LIMIT);
1004
#endif
1005
	return ptr;
1006
}