bootmem.c 24.4 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
2
 *  bootmem - A boot-time physical memory allocator and configurator
L
Linus Torvalds 已提交
3 4
 *
 *  Copyright (C) 1999 Ingo Molnar
5 6
 *                1999 Kanoj Sarcar, SGI
 *                2008 Johannes Weiner
L
Linus Torvalds 已提交
7
 *
8 9
 * Access to this subsystem has to be serialized externally (which is true
 * for the boot process anyway).
L
Linus Torvalds 已提交
10 11
 */
#include <linux/init.h>
12
#include <linux/pfn.h>
13
#include <linux/slab.h>
L
Linus Torvalds 已提交
14 15
#include <linux/bootmem.h>
#include <linux/module.h>
16
#include <linux/kmemleak.h>
17
#include <linux/range.h>
18
#include <linux/memblock.h>
19 20

#include <asm/bug.h>
L
Linus Torvalds 已提交
21
#include <asm/io.h>
22
#include <asm/processor.h>
23

L
Linus Torvalds 已提交
24 25 26 27 28 29
#include "internal.h"

unsigned long max_low_pfn;
unsigned long min_low_pfn;
unsigned long max_pfn;

30 31 32 33 34 35 36 37
#ifdef CONFIG_CRASH_DUMP
/*
 * If we have booted due to a crash, max_pfn will be a very low value. We need
 * to know the amount of memory that the previous kernel used.
 */
unsigned long saved_max_pfn;
#endif

38
#ifndef CONFIG_NO_BOOTMEM
39 40
bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;

41 42
static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list);

43 44 45 46 47 48 49 50 51 52 53 54 55
static int bootmem_debug;

static int __init bootmem_debug_setup(char *buf)
{
	bootmem_debug = 1;
	return 0;
}
early_param("bootmem_debug", bootmem_debug_setup);

#define bdebug(fmt, args...) ({				\
	if (unlikely(bootmem_debug))			\
		printk(KERN_INFO			\
			"bootmem::%s " fmt,		\
56
			__func__, ## args);		\
57 58
})

59
static unsigned long __init bootmap_bytes(unsigned long pages)
60
{
61
	unsigned long bytes = (pages + 7) / 8;
62

63
	return ALIGN(bytes, sizeof(long));
64 65
}

66 67 68 69
/**
 * bootmem_bootmap_pages - calculate bitmap size in pages
 * @pages: number of pages the bitmap has to represent
 */
70
unsigned long __init bootmem_bootmap_pages(unsigned long pages)
L
Linus Torvalds 已提交
71
{
72
	unsigned long bytes = bootmap_bytes(pages);
L
Linus Torvalds 已提交
73

74
	return PAGE_ALIGN(bytes) >> PAGE_SHIFT;
L
Linus Torvalds 已提交
75
}
76

77 78 79
/*
 * link bdata in order
 */
80
static void __init link_bootmem(bootmem_data_t *bdata)
81
{
82
	struct list_head *iter;
83

84 85 86 87
	list_for_each(iter, &bdata_list) {
		bootmem_data_t *ent;

		ent = list_entry(iter, bootmem_data_t, list);
88
		if (bdata->node_min_pfn < ent->node_min_pfn)
89
			break;
90
	}
91
	list_add_tail(&bdata->list, iter);
92 93
}

L
Linus Torvalds 已提交
94 95 96
/*
 * Called once to set up the allocator itself.
 */
97
static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
L
Linus Torvalds 已提交
98 99
	unsigned long mapstart, unsigned long start, unsigned long end)
{
100
	unsigned long mapsize;
L
Linus Torvalds 已提交
101

102
	mminit_validate_memmodel_limits(&start, &end);
103
	bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
104
	bdata->node_min_pfn = start;
L
Linus Torvalds 已提交
105
	bdata->node_low_pfn = end;
106
	link_bootmem(bdata);
L
Linus Torvalds 已提交
107 108 109 110 111

	/*
	 * Initially all pages are reserved - setup_arch() has to
	 * register free RAM areas explicitly.
	 */
112
	mapsize = bootmap_bytes(end - start);
L
Linus Torvalds 已提交
113 114
	memset(bdata->node_bootmem_map, 0xff, mapsize);

115 116 117
	bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n",
		bdata - bootmem_node_data, start, mapstart, end, mapsize);

L
Linus Torvalds 已提交
118 119 120
	return mapsize;
}

121 122 123 124 125 126 127 128 129
/**
 * init_bootmem_node - register a node as boot memory
 * @pgdat: node to register
 * @freepfn: pfn where the bitmap for this node is to be placed
 * @startpfn: first pfn on the node
 * @endpfn: first pfn after the node
 *
 * Returns the number of bytes needed to hold the bitmap for this node.
 */
130 131 132 133 134 135
unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
				unsigned long startpfn, unsigned long endpfn)
{
	return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);
}

136 137 138 139 140 141 142
/**
 * init_bootmem - register boot memory
 * @start: pfn where the bitmap is to be placed
 * @pages: number of available physical pages
 *
 * Returns the number of bytes needed to hold the bitmap.
 */
143 144 145 146 147 148
unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
{
	max_low_pfn = pages;
	min_low_pfn = start;
	return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
}
149
#endif
F
FUJITA Tomonori 已提交
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
/*
 * free_bootmem_late - free bootmem pages directly to page allocator
 * @addr: starting address of the range
 * @size: size of the range in bytes
 *
 * This is only useful when the bootmem allocator has already been torn
 * down, but we are still initializing the system.  Pages are given directly
 * to the page allocator, no bootmem metadata is updated because it is gone.
 */
void __init free_bootmem_late(unsigned long addr, unsigned long size)
{
	unsigned long cursor, end;

	kmemleak_free_part(__va(addr), size);

	cursor = PFN_UP(addr);
	end = PFN_DOWN(addr + size);

	for (; cursor < end; cursor++) {
		__free_pages_bootmem(pfn_to_page(cursor), 0);
		totalram_pages++;
	}
}

174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
#ifdef CONFIG_NO_BOOTMEM
static void __init __free_pages_memory(unsigned long start, unsigned long end)
{
	int i;
	unsigned long start_aligned, end_aligned;
	int order = ilog2(BITS_PER_LONG);

	start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
	end_aligned = end & ~(BITS_PER_LONG - 1);

	if (end_aligned <= start_aligned) {
		for (i = start; i < end; i++)
			__free_pages_bootmem(pfn_to_page(i), 0);

		return;
	}

	for (i = start; i < start_aligned; i++)
		__free_pages_bootmem(pfn_to_page(i), 0);

	for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG)
		__free_pages_bootmem(pfn_to_page(i), order);

	for (i = end_aligned; i < end; i++)
		__free_pages_bootmem(pfn_to_page(i), 0);
}

unsigned long __init free_all_memory_core_early(int nodeid)
{
	int i;
	u64 start, end;
	unsigned long count = 0;
	struct range *range = NULL;
	int nr_range;

	nr_range = get_free_all_memory_range(&range, nodeid);

	for (i = 0; i < nr_range; i++) {
		start = range[i].start;
		end = range[i].end;
		count += end - start;
		__free_pages_memory(start, end);
	}

	return count;
}
#else
221 222
static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
{
223
	int aligned;
224
	struct page *page;
225 226 227 228 229
	unsigned long start, end, pages, count = 0;

	if (!bdata->node_bootmem_map)
		return 0;

230
	start = bdata->node_min_pfn;
231 232
	end = bdata->node_low_pfn;

233
	/*
234 235
	 * If the start is aligned to the machines wordsize, we might
	 * be able to free pages in bulks of that order.
236
	 */
237
	aligned = !(start & (BITS_PER_LONG - 1));
238

239 240
	bdebug("nid=%td start=%lx end=%lx aligned=%d\n",
		bdata - bootmem_node_data, start, end, aligned);
241

242 243
	while (start < end) {
		unsigned long *map, idx, vec;
244

245
		map = bdata->node_bootmem_map;
246
		idx = start - bdata->node_min_pfn;
247 248 249 250 251 252
		vec = ~map[idx / BITS_PER_LONG];

		if (aligned && vec == ~0UL && start + BITS_PER_LONG < end) {
			int order = ilog2(BITS_PER_LONG);

			__free_pages_bootmem(pfn_to_page(start), order);
253
			count += BITS_PER_LONG;
254 255 256 257 258 259
		} else {
			unsigned long off = 0;

			while (vec && off < BITS_PER_LONG) {
				if (vec & 1) {
					page = pfn_to_page(start + off);
260
					__free_pages_bootmem(page, 0);
261
					count++;
262
				}
263 264
				vec >>= 1;
				off++;
265 266
			}
		}
267
		start += BITS_PER_LONG;
268 269 270
	}

	page = virt_to_page(bdata->node_bootmem_map);
271
	pages = bdata->node_low_pfn - bdata->node_min_pfn;
272 273 274 275
	pages = bootmem_bootmap_pages(pages);
	count += pages;
	while (pages--)
		__free_pages_bootmem(page++, 0);
276

277 278
	bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);

279 280
	return count;
}
281
#endif
282

283 284 285 286 287 288
/**
 * free_all_bootmem_node - release a node's free pages to the buddy allocator
 * @pgdat: node to be released
 *
 * Returns the number of pages actually released.
 */
289 290 291
unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
{
	register_page_bootmem_info_node(pgdat);
292 293 294 295
#ifdef CONFIG_NO_BOOTMEM
	/* free_all_memory_core_early(MAX_NUMNODES) will be called later */
	return 0;
#else
296
	return free_all_bootmem_core(pgdat->bdata);
297
#endif
298 299
}

300 301 302 303 304
/**
 * free_all_bootmem - release free pages to the buddy allocator
 *
 * Returns the number of pages actually released.
 */
305 306
unsigned long __init free_all_bootmem(void)
{
307
#ifdef CONFIG_NO_BOOTMEM
308 309 310 311 312 313 314 315
	/*
	 * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id
	 *  because in some case like Node0 doesnt have RAM installed
	 *  low ram will be on Node1
	 * Use MAX_NUMNODES will make sure all ranges in early_node_map[]
	 *  will be used instead of only Node0 related
	 */
	return free_all_memory_core_early(MAX_NUMNODES);
316
#else
317 318 319 320 321 322 323
	unsigned long total_pages = 0;
	bootmem_data_t *bdata;

	list_for_each_entry(bdata, &bdata_list, list)
		total_pages += free_all_bootmem_core(bdata);

	return total_pages;
324
#endif
325 326
}

327
#ifndef CONFIG_NO_BOOTMEM
J
Johannes Weiner 已提交
328 329 330 331 332 333
static void __init __free(bootmem_data_t *bdata,
			unsigned long sidx, unsigned long eidx)
{
	unsigned long idx;

	bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data,
334 335
		sidx + bdata->node_min_pfn,
		eidx + bdata->node_min_pfn);
J
Johannes Weiner 已提交
336

337 338 339
	if (bdata->hint_idx > sidx)
		bdata->hint_idx = sidx;

J
Johannes Weiner 已提交
340 341 342 343 344 345 346 347 348 349 350 351 352
	for (idx = sidx; idx < eidx; idx++)
		if (!test_and_clear_bit(idx, bdata->node_bootmem_map))
			BUG();
}

static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,
			unsigned long eidx, int flags)
{
	unsigned long idx;
	int exclusive = flags & BOOTMEM_EXCLUSIVE;

	bdebug("nid=%td start=%lx end=%lx flags=%x\n",
		bdata - bootmem_node_data,
353 354
		sidx + bdata->node_min_pfn,
		eidx + bdata->node_min_pfn,
J
Johannes Weiner 已提交
355 356 357 358 359 360 361 362 363
		flags);

	for (idx = sidx; idx < eidx; idx++)
		if (test_and_set_bit(idx, bdata->node_bootmem_map)) {
			if (exclusive) {
				__free(bdata, sidx, idx);
				return -EBUSY;
			}
			bdebug("silent double reserve of PFN %lx\n",
364
				idx + bdata->node_min_pfn);
J
Johannes Weiner 已提交
365 366 367 368
		}
	return 0;
}

369 370 371
static int __init mark_bootmem_node(bootmem_data_t *bdata,
				unsigned long start, unsigned long end,
				int reserve, int flags)
372 373 374
{
	unsigned long sidx, eidx;

375 376
	bdebug("nid=%td start=%lx end=%lx reserve=%d flags=%x\n",
		bdata - bootmem_node_data, start, end, reserve, flags);
377

378
	BUG_ON(start < bdata->node_min_pfn);
379
	BUG_ON(end > bdata->node_low_pfn);
380

381 382
	sidx = start - bdata->node_min_pfn;
	eidx = end - bdata->node_min_pfn;
383

384 385
	if (reserve)
		return __reserve(bdata, sidx, eidx, flags);
386
	else
387 388 389 390 391 392 393 394 395 396 397 398 399 400 401
		__free(bdata, sidx, eidx);
	return 0;
}

static int __init mark_bootmem(unsigned long start, unsigned long end,
				int reserve, int flags)
{
	unsigned long pos;
	bootmem_data_t *bdata;

	pos = start;
	list_for_each_entry(bdata, &bdata_list, list) {
		int err;
		unsigned long max;

402 403
		if (pos < bdata->node_min_pfn ||
		    pos >= bdata->node_low_pfn) {
404 405 406 407 408
			BUG_ON(pos != start);
			continue;
		}

		max = min(bdata->node_low_pfn, end);
409

410 411 412 413 414
		err = mark_bootmem_node(bdata, pos, max, reserve, flags);
		if (reserve && err) {
			mark_bootmem(start, pos, 0, 0);
			return err;
		}
415

416 417 418 419 420
		if (max == end)
			return 0;
		pos = bdata->node_low_pfn;
	}
	BUG();
421
}
422
#endif
423

424 425 426 427 428 429 430 431
/**
 * free_bootmem_node - mark a page range as usable
 * @pgdat: node the range resides on
 * @physaddr: starting address of the range
 * @size: size of the range in bytes
 *
 * Partial pages will be considered reserved and left as they are.
 *
432
 * The range must reside completely on the specified node.
433
 */
434 435 436
void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
			      unsigned long size)
{
437
#ifdef CONFIG_NO_BOOTMEM
438
	kmemleak_free_part(__va(physaddr), size);
439
	memblock_x86_free_range(physaddr, physaddr + size);
440
#else
441 442
	unsigned long start, end;

443 444
	kmemleak_free_part(__va(physaddr), size);

445 446 447 448
	start = PFN_UP(physaddr);
	end = PFN_DOWN(physaddr + size);

	mark_bootmem_node(pgdat->bdata, start, end, 0, 0);
449
#endif
450 451
}

452 453 454 455 456 457 458
/**
 * free_bootmem - mark a page range as usable
 * @addr: starting address of the range
 * @size: size of the range in bytes
 *
 * Partial pages will be considered reserved and left as they are.
 *
459
 * The range must be contiguous but may span node boundaries.
460
 */
461 462
void __init free_bootmem(unsigned long addr, unsigned long size)
{
463
#ifdef CONFIG_NO_BOOTMEM
464
	kmemleak_free_part(__va(addr), size);
465
	memblock_x86_free_range(addr, addr + size);
466
#else
467
	unsigned long start, end;
468

469 470
	kmemleak_free_part(__va(addr), size);

471 472
	start = PFN_UP(addr);
	end = PFN_DOWN(addr + size);
L
Linus Torvalds 已提交
473

474
	mark_bootmem(start, end, 0, 0);
475
#endif
L
Linus Torvalds 已提交
476 477
}

478 479 480 481 482 483 484 485 486
/**
 * reserve_bootmem_node - mark a page range as reserved
 * @pgdat: node the range resides on
 * @physaddr: starting address of the range
 * @size: size of the range in bytes
 * @flags: reservation flags (see linux/bootmem.h)
 *
 * Partial pages will be reserved.
 *
487
 * The range must reside completely on the specified node.
488
 */
489 490
int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
				 unsigned long size, int flags)
L
Linus Torvalds 已提交
491
{
492 493 494 495
#ifdef CONFIG_NO_BOOTMEM
	panic("no bootmem");
	return 0;
#else
496
	unsigned long start, end;
L
Linus Torvalds 已提交
497

498 499 500 501
	start = PFN_DOWN(physaddr);
	end = PFN_UP(physaddr + size);

	return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
502
#endif
503
}
504

505 506 507 508 509 510 511 512
/**
 * reserve_bootmem - mark a page range as usable
 * @addr: starting address of the range
 * @size: size of the range in bytes
 * @flags: reservation flags (see linux/bootmem.h)
 *
 * Partial pages will be reserved.
 *
513
 * The range must be contiguous but may span node boundaries.
514
 */
515 516 517
int __init reserve_bootmem(unsigned long addr, unsigned long size,
			    int flags)
{
518 519 520 521
#ifdef CONFIG_NO_BOOTMEM
	panic("no bootmem");
	return 0;
#else
522
	unsigned long start, end;
L
Linus Torvalds 已提交
523

524 525
	start = PFN_DOWN(addr);
	end = PFN_UP(addr + size);
526

527
	return mark_bootmem(start, end, 1, flags);
528
#endif
L
Linus Torvalds 已提交
529 530
}

531
#ifndef CONFIG_NO_BOOTMEM
532 533 534 535 536 537
int __weak __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
				   int flags)
{
	return reserve_bootmem(phys, len, flags);
}

538 539
static unsigned long __init align_idx(struct bootmem_data *bdata,
				      unsigned long idx, unsigned long step)
540 541 542 543 544 545 546 547 548 549 550
{
	unsigned long base = bdata->node_min_pfn;

	/*
	 * Align the index with respect to the node start so that the
	 * combination of both satisfies the requested alignment.
	 */

	return ALIGN(base + idx, step) - base;
}

551 552
static unsigned long __init align_off(struct bootmem_data *bdata,
				      unsigned long off, unsigned long align)
553 554 555 556 557 558 559 560
{
	unsigned long base = PFN_PHYS(bdata->node_min_pfn);

	/* Same as align_idx for byte offsets */

	return ALIGN(base + off, align) - base;
}

561 562 563
static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
					unsigned long size, unsigned long align,
					unsigned long goal, unsigned long limit)
L
Linus Torvalds 已提交
564
{
565
	unsigned long fallback = 0;
566 567
	unsigned long min, max, start, sidx, midx, step;

568 569 570 571
	bdebug("nid=%td size=%lx [%lu pages] align=%lx goal=%lx limit=%lx\n",
		bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT,
		align, goal, limit);

572 573 574
	BUG_ON(!size);
	BUG_ON(align & (align - 1));
	BUG_ON(limit && goal + size > limit);
L
Linus Torvalds 已提交
575

576 577 578
	if (!bdata->node_bootmem_map)
		return NULL;

579
	min = bdata->node_min_pfn;
580
	max = bdata->node_low_pfn;
Y
Yinghai Lu 已提交
581

582 583 584 585 586 587
	goal >>= PAGE_SHIFT;
	limit >>= PAGE_SHIFT;

	if (limit && max > limit)
		max = limit;
	if (max <= min)
Y
Yinghai Lu 已提交
588 589
		return NULL;

590
	step = max(align >> PAGE_SHIFT, 1UL);
591

592 593 594 595
	if (goal && min < goal && goal < max)
		start = ALIGN(goal, step);
	else
		start = ALIGN(min, step);
L
Linus Torvalds 已提交
596

597
	sidx = start - bdata->node_min_pfn;
598
	midx = max - bdata->node_min_pfn;
L
Linus Torvalds 已提交
599

600
	if (bdata->hint_idx > sidx) {
601 602 603 604 605
		/*
		 * Handle the valid case of sidx being zero and still
		 * catch the fallback below.
		 */
		fallback = sidx + 1;
606
		sidx = align_idx(bdata, bdata->hint_idx, step);
607
	}
L
Linus Torvalds 已提交
608

609 610 611 612 613 614
	while (1) {
		int merge;
		void *region;
		unsigned long eidx, i, start_off, end_off;
find_block:
		sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx);
615
		sidx = align_idx(bdata, sidx, step);
616
		eidx = sidx + PFN_UP(size);
617

618
		if (sidx >= midx || eidx > midx)
619
			break;
L
Linus Torvalds 已提交
620

621 622
		for (i = sidx; i < eidx; i++)
			if (test_bit(i, bdata->node_bootmem_map)) {
623
				sidx = align_idx(bdata, i, step);
624 625 626 627
				if (sidx == i)
					sidx += step;
				goto find_block;
			}
L
Linus Torvalds 已提交
628

629
		if (bdata->last_end_off & (PAGE_SIZE - 1) &&
630
				PFN_DOWN(bdata->last_end_off) + 1 == sidx)
631
			start_off = align_off(bdata, bdata->last_end_off, align);
632 633 634 635 636 637 638 639 640 641 642 643
		else
			start_off = PFN_PHYS(sidx);

		merge = PFN_DOWN(start_off) < sidx;
		end_off = start_off + size;

		bdata->last_end_off = end_off;
		bdata->hint_idx = PFN_UP(end_off);

		/*
		 * Reserve the area now:
		 */
J
Johannes Weiner 已提交
644 645 646
		if (__reserve(bdata, PFN_DOWN(start_off) + merge,
				PFN_UP(end_off), BOOTMEM_EXCLUSIVE))
			BUG();
647

648 649
		region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
				start_off);
650
		memset(region, 0, size);
651 652 653 654 655
		/*
		 * The min_count is set to 0 so that bootmem allocated blocks
		 * are never reported as leaks.
		 */
		kmemleak_alloc(region, size, 0, 0);
656
		return region;
L
Linus Torvalds 已提交
657 658
	}

659
	if (fallback) {
660
		sidx = align_idx(bdata, fallback - 1, step);
661 662 663 664 665 666 667
		fallback = 0;
		goto find_block;
	}

	return NULL;
}

668 669 670 671
static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata,
					unsigned long size, unsigned long align,
					unsigned long goal, unsigned long limit)
{
672 673 674
	if (WARN_ON_ONCE(slab_is_available()))
		return kzalloc(size, GFP_NOWAIT);

675
#ifdef CONFIG_HAVE_ARCH_BOOTMEM
676 677 678 679 680 681 682 683 684
	{
		bootmem_data_t *p_bdata;

		p_bdata = bootmem_arch_preferred_node(bdata, size, align,
							goal, limit);
		if (p_bdata)
			return alloc_bootmem_core(p_bdata, size, align,
							goal, limit);
	}
685 686 687
#endif
	return NULL;
}
688
#endif
689

690 691 692 693 694
static void * __init ___alloc_bootmem_nopanic(unsigned long size,
					unsigned long align,
					unsigned long goal,
					unsigned long limit)
{
695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714
#ifdef CONFIG_NO_BOOTMEM
	void *ptr;

	if (WARN_ON_ONCE(slab_is_available()))
		return kzalloc(size, GFP_NOWAIT);

restart:

	ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit);

	if (ptr)
		return ptr;

	if (goal != 0) {
		goal = 0;
		goto restart;
	}

	return NULL;
#else
715
	bootmem_data_t *bdata;
716
	void *region;
717 718

restart:
719 720 721
	region = alloc_arch_preferred_bootmem(NULL, size, align, goal, limit);
	if (region)
		return region;
722

723
	list_for_each_entry(bdata, &bdata_list, list) {
724 725
		if (goal && bdata->node_low_pfn <= PFN_DOWN(goal))
			continue;
726
		if (limit && bdata->node_min_pfn >= PFN_DOWN(limit))
727 728 729 730 731 732 733
			break;

		region = alloc_bootmem_core(bdata, size, align, goal, limit);
		if (region)
			return region;
	}

734 735
	if (goal) {
		goal = 0;
736
		goto restart;
737
	}
738

739
	return NULL;
740
#endif
L
Linus Torvalds 已提交
741 742
}

743 744 745 746 747 748 749 750 751 752 753 754 755
/**
 * __alloc_bootmem_nopanic - allocate boot memory without panicking
 * @size: size of the request in bytes
 * @align: alignment of the region
 * @goal: preferred starting address of the region
 *
 * The goal is dropped if it can not be satisfied and the allocation will
 * fall back to memory below @goal.
 *
 * Allocation may happen on any node in the system.
 *
 * Returns NULL on failure.
 */
756
void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
757
					unsigned long goal)
L
Linus Torvalds 已提交
758
{
759 760 761 762 763 764 765
	unsigned long limit = 0;

#ifdef CONFIG_NO_BOOTMEM
	limit = -1UL;
#endif

	return ___alloc_bootmem_nopanic(size, align, goal, limit);
766
}
L
Linus Torvalds 已提交
767

768 769 770 771 772 773 774 775 776 777 778 779
static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
					unsigned long goal, unsigned long limit)
{
	void *mem = ___alloc_bootmem_nopanic(size, align, goal, limit);

	if (mem)
		return mem;
	/*
	 * Whoops, we cannot satisfy the allocation request.
	 */
	printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size);
	panic("Out of memory");
780 781
	return NULL;
}
L
Linus Torvalds 已提交
782

783 784 785 786 787 788 789 790 791 792 793 794 795
/**
 * __alloc_bootmem - allocate boot memory
 * @size: size of the request in bytes
 * @align: alignment of the region
 * @goal: preferred starting address of the region
 *
 * The goal is dropped if it can not be satisfied and the allocation will
 * fall back to memory below @goal.
 *
 * Allocation may happen on any node in the system.
 *
 * The function panics if the request can not be satisfied.
 */
796 797
void * __init __alloc_bootmem(unsigned long size, unsigned long align,
			      unsigned long goal)
798
{
799 800 801 802 803 804 805
	unsigned long limit = 0;

#ifdef CONFIG_NO_BOOTMEM
	limit = -1UL;
#endif

	return ___alloc_bootmem(size, align, goal, limit);
L
Linus Torvalds 已提交
806 807
}

808
#ifndef CONFIG_NO_BOOTMEM
809 810 811 812 813 814
static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
				unsigned long size, unsigned long align,
				unsigned long goal, unsigned long limit)
{
	void *ptr;

815 816 817 818
	ptr = alloc_arch_preferred_bootmem(bdata, size, align, goal, limit);
	if (ptr)
		return ptr;

819 820 821 822 823 824
	ptr = alloc_bootmem_core(bdata, size, align, goal, limit);
	if (ptr)
		return ptr;

	return ___alloc_bootmem(size, align, goal, limit);
}
825
#endif
826

827 828 829 830 831 832 833 834 835 836 837 838 839 840 841
/**
 * __alloc_bootmem_node - allocate boot memory from a specific node
 * @pgdat: node to allocate from
 * @size: size of the request in bytes
 * @align: alignment of the region
 * @goal: preferred starting address of the region
 *
 * The goal is dropped if it can not be satisfied and the allocation will
 * fall back to memory below @goal.
 *
 * Allocation may fall back to any node in the system if the specified node
 * can not hold the requested memory.
 *
 * The function panics if the request can not be satisfied.
 */
842 843
void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
				   unsigned long align, unsigned long goal)
L
Linus Torvalds 已提交
844
{
845 846
	void *ptr;

847 848 849
	if (WARN_ON_ONCE(slab_is_available()))
		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);

850
#ifdef CONFIG_NO_BOOTMEM
851 852 853 854 855 856
	ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
					 goal, -1ULL);
	if (ptr)
		return ptr;

	ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
857 858
					 goal, -1ULL);
#else
859
	ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
860
#endif
861 862

	return ptr;
863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896
}

void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
				   unsigned long align, unsigned long goal)
{
#ifdef MAX_DMA32_PFN
	unsigned long end_pfn;

	if (WARN_ON_ONCE(slab_is_available()))
		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);

	/* update goal according ...MAX_DMA32_PFN */
	end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages;

	if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) &&
	    (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) {
		void *ptr;
		unsigned long new_goal;

		new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
#ifdef CONFIG_NO_BOOTMEM
		ptr =  __alloc_memory_core_early(pgdat->node_id, size, align,
						 new_goal, -1ULL);
#else
		ptr = alloc_bootmem_core(pgdat->bdata, size, align,
						 new_goal, 0);
#endif
		if (ptr)
			return ptr;
	}
#endif

	return __alloc_bootmem_node(pgdat, size, align, goal);

L
Linus Torvalds 已提交
897 898
}

899
#ifdef CONFIG_SPARSEMEM
900 901 902 903 904 905 906
/**
 * alloc_bootmem_section - allocate boot memory from a specific section
 * @size: size of the request in bytes
 * @section_nr: sparse map section to allocate from
 *
 * Return NULL on failure.
 */
907 908 909
void * __init alloc_bootmem_section(unsigned long size,
				    unsigned long section_nr)
{
910 911 912 913 914 915 916 917 918 919
#ifdef CONFIG_NO_BOOTMEM
	unsigned long pfn, goal, limit;

	pfn = section_nr_to_pfn(section_nr);
	goal = pfn << PAGE_SHIFT;
	limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;

	return __alloc_memory_core_early(early_pfn_to_nid(pfn), size,
					 SMP_CACHE_BYTES, goal, limit);
#else
920 921
	bootmem_data_t *bdata;
	unsigned long pfn, goal, limit;
922 923

	pfn = section_nr_to_pfn(section_nr);
924 925 926
	goal = pfn << PAGE_SHIFT;
	limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
	bdata = &bootmem_node_data[early_pfn_to_nid(pfn)];
927

928
	return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit);
929
#endif
930 931 932
}
#endif

933 934 935 936 937
void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
				   unsigned long align, unsigned long goal)
{
	void *ptr;

938 939 940
	if (WARN_ON_ONCE(slab_is_available()))
		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);

941 942 943 944
#ifdef CONFIG_NO_BOOTMEM
	ptr =  __alloc_memory_core_early(pgdat->node_id, size, align,
						 goal, -1ULL);
#else
945 946 947 948
	ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0);
	if (ptr)
		return ptr;

949
	ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
950
#endif
951 952 953 954 955 956
	if (ptr)
		return ptr;

	return __alloc_bootmem_nopanic(size, align, goal);
}

957 958 959
#ifndef ARCH_LOW_ADDRESS_LIMIT
#define ARCH_LOW_ADDRESS_LIMIT	0xffffffffUL
#endif
960

961 962 963 964 965 966 967 968 969 970 971 972 973
/**
 * __alloc_bootmem_low - allocate low boot memory
 * @size: size of the request in bytes
 * @align: alignment of the region
 * @goal: preferred starting address of the region
 *
 * The goal is dropped if it can not be satisfied and the allocation will
 * fall back to memory below @goal.
 *
 * Allocation may happen on any node in the system.
 *
 * The function panics if the request can not be satisfied.
 */
974 975
void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
				  unsigned long goal)
976
{
977
	return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
978 979
}

980 981 982 983 984 985 986 987 988 989 990 991 992 993 994
/**
 * __alloc_bootmem_low_node - allocate low boot memory from a specific node
 * @pgdat: node to allocate from
 * @size: size of the request in bytes
 * @align: alignment of the region
 * @goal: preferred starting address of the region
 *
 * The goal is dropped if it can not be satisfied and the allocation will
 * fall back to memory below @goal.
 *
 * Allocation may fall back to any node in the system if the specified node
 * can not hold the requested memory.
 *
 * The function panics if the request can not be satisfied.
 */
995 996 997
void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
				       unsigned long align, unsigned long goal)
{
998 999
	void *ptr;

1000 1001 1002
	if (WARN_ON_ONCE(slab_is_available()))
		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);

1003
#ifdef CONFIG_NO_BOOTMEM
1004 1005 1006 1007 1008
	ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
				goal, ARCH_LOW_ADDRESS_LIMIT);
	if (ptr)
		return ptr;
	ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
1009 1010
				goal, ARCH_LOW_ADDRESS_LIMIT);
#else
1011
	ptr = ___alloc_bootmem_node(pgdat->bdata, size, align,
1012
				goal, ARCH_LOW_ADDRESS_LIMIT);
1013
#endif
1014
	return ptr;
1015
}