memblock.c 14.4 KB
Newer Older
Y
Yinghai Lu 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Procedures for maintaining information about logical memory blocks.
 *
 * Peter Bergner, IBM Corp.	June 2001.
 * Copyright (C) 2001 Peter Bergner.
 *
 *      This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/bitops.h>
16
#include <linux/poison.h>
Y
Yinghai Lu 已提交
17 18 19 20 21
#include <linux/memblock.h>

struct memblock memblock;

static int memblock_debug;
22 23
static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1];
static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1];
Y
Yinghai Lu 已提交
24

25 26
#define MEMBLOCK_ERROR	(~(phys_addr_t)0)

27 28 29
/*
 * Address comparison utilities
 */
Y
Yinghai Lu 已提交
30

31
static phys_addr_t memblock_align_down(phys_addr_t addr, phys_addr_t size)
Y
Yinghai Lu 已提交
32
{
33
	return addr & ~(size - 1);
Y
Yinghai Lu 已提交
34 35
}

36
static phys_addr_t memblock_align_up(phys_addr_t addr, phys_addr_t size)
Y
Yinghai Lu 已提交
37
{
38
	return (addr + (size - 1)) & ~(size - 1);
Y
Yinghai Lu 已提交
39 40
}

41 42
static unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
				       phys_addr_t base2, phys_addr_t size2)
Y
Yinghai Lu 已提交
43 44 45 46
{
	return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
}

47 48
static long memblock_addrs_adjacent(phys_addr_t base1, phys_addr_t size1,
			       phys_addr_t base2, phys_addr_t size2)
Y
Yinghai Lu 已提交
49 50 51 52 53 54 55 56 57
{
	if (base2 == base1 + size1)
		return 1;
	else if (base1 == base2 + size2)
		return -1;

	return 0;
}

58
static long memblock_regions_adjacent(struct memblock_type *type,
59
				 unsigned long r1, unsigned long r2)
Y
Yinghai Lu 已提交
60
{
61 62 63 64
	phys_addr_t base1 = type->regions[r1].base;
	phys_addr_t size1 = type->regions[r1].size;
	phys_addr_t base2 = type->regions[r2].base;
	phys_addr_t size2 = type->regions[r2].size;
Y
Yinghai Lu 已提交
65 66 67 68

	return memblock_addrs_adjacent(base1, size1, base2, size2);
}

69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
long memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
{
	unsigned long i;

	for (i = 0; i < type->cnt; i++) {
		phys_addr_t rgnbase = type->regions[i].base;
		phys_addr_t rgnsize = type->regions[i].size;
		if (memblock_addrs_overlap(base, size, rgnbase, rgnsize))
			break;
	}

	return (i < type->cnt) ? i : -1;
}

/*
 * Find, allocate, deallocate or reserve unreserved regions. All allocations
 * are top-down.
 */

static phys_addr_t __init memblock_find_region(phys_addr_t start, phys_addr_t end,
					  phys_addr_t size, phys_addr_t align)
{
	phys_addr_t base, res_base;
	long j;

	base = memblock_align_down((end - size), align);
	while (start <= base) {
		j = memblock_overlaps_region(&memblock.reserved, base, size);
		if (j < 0)
			return base;
		res_base = memblock.reserved.regions[j].base;
		if (res_base < size)
			break;
		base = memblock_align_down(res_base - size, align);
	}

	return MEMBLOCK_ERROR;
}

static phys_addr_t __init memblock_find_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
{
	long i;
	phys_addr_t base = 0;
	phys_addr_t res_base;

	BUG_ON(0 == size);

	size = memblock_align_up(size, align);

	/* Pump up max_addr */
	if (max_addr == MEMBLOCK_ALLOC_ACCESSIBLE)
		max_addr = memblock.current_limit;

	/* We do a top-down search, this tends to limit memory
	 * fragmentation by keeping early boot allocs near the
	 * top of memory
	 */
	for (i = memblock.memory.cnt - 1; i >= 0; i--) {
		phys_addr_t memblockbase = memblock.memory.regions[i].base;
		phys_addr_t memblocksize = memblock.memory.regions[i].size;

		if (memblocksize < size)
			continue;
		base = min(memblockbase + memblocksize, max_addr);
		res_base = memblock_find_region(memblockbase, base, size, align);
		if (res_base != MEMBLOCK_ERROR)
			return res_base;
	}
	return MEMBLOCK_ERROR;
}

140
static void memblock_remove_region(struct memblock_type *type, unsigned long r)
Y
Yinghai Lu 已提交
141 142 143
{
	unsigned long i;

144 145 146
	for (i = r; i < type->cnt - 1; i++) {
		type->regions[i].base = type->regions[i + 1].base;
		type->regions[i].size = type->regions[i + 1].size;
Y
Yinghai Lu 已提交
147
	}
148
	type->cnt--;
Y
Yinghai Lu 已提交
149 150 151
}

/* Assumption: base addr of region 1 < base addr of region 2 */
152
static void memblock_coalesce_regions(struct memblock_type *type,
Y
Yinghai Lu 已提交
153 154
		unsigned long r1, unsigned long r2)
{
155 156
	type->regions[r1].size += type->regions[r2].size;
	memblock_remove_region(type, r2);
Y
Yinghai Lu 已提交
157 158
}

159
static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
Y
Yinghai Lu 已提交
160 161 162 163
{
	unsigned long coalesced = 0;
	long adjacent, i;

164 165 166
	if ((type->cnt == 1) && (type->regions[0].size == 0)) {
		type->regions[0].base = base;
		type->regions[0].size = size;
Y
Yinghai Lu 已提交
167 168 169 170
		return 0;
	}

	/* First try and coalesce this MEMBLOCK with another. */
171
	for (i = 0; i < type->cnt; i++) {
172 173
		phys_addr_t rgnbase = type->regions[i].base;
		phys_addr_t rgnsize = type->regions[i].size;
Y
Yinghai Lu 已提交
174 175 176 177 178 179 180

		if ((rgnbase == base) && (rgnsize == size))
			/* Already have this region, so we're done */
			return 0;

		adjacent = memblock_addrs_adjacent(base, size, rgnbase, rgnsize);
		if (adjacent > 0) {
181 182
			type->regions[i].base -= size;
			type->regions[i].size += size;
Y
Yinghai Lu 已提交
183 184 185
			coalesced++;
			break;
		} else if (adjacent < 0) {
186
			type->regions[i].size += size;
Y
Yinghai Lu 已提交
187 188 189 190 191
			coalesced++;
			break;
		}
	}

192 193
	if ((i < type->cnt - 1) && memblock_regions_adjacent(type, i, i+1)) {
		memblock_coalesce_regions(type, i, i+1);
Y
Yinghai Lu 已提交
194 195 196 197 198
		coalesced++;
	}

	if (coalesced)
		return coalesced;
199
	if (type->cnt >= type->max)
Y
Yinghai Lu 已提交
200 201 202
		return -1;

	/* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */
203 204 205 206
	for (i = type->cnt - 1; i >= 0; i--) {
		if (base < type->regions[i].base) {
			type->regions[i+1].base = type->regions[i].base;
			type->regions[i+1].size = type->regions[i].size;
Y
Yinghai Lu 已提交
207
		} else {
208 209
			type->regions[i+1].base = base;
			type->regions[i+1].size = size;
Y
Yinghai Lu 已提交
210 211 212 213
			break;
		}
	}

214 215 216
	if (base < type->regions[0].base) {
		type->regions[0].base = base;
		type->regions[0].size = size;
Y
Yinghai Lu 已提交
217
	}
218
	type->cnt++;
Y
Yinghai Lu 已提交
219 220 221 222

	return 0;
}

223
long memblock_add(phys_addr_t base, phys_addr_t size)
Y
Yinghai Lu 已提交
224
{
225
	return memblock_add_region(&memblock.memory, base, size);
Y
Yinghai Lu 已提交
226 227 228

}

229
static long __memblock_remove(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
Y
Yinghai Lu 已提交
230
{
231 232
	phys_addr_t rgnbegin, rgnend;
	phys_addr_t end = base + size;
Y
Yinghai Lu 已提交
233 234 235 236 237
	int i;

	rgnbegin = rgnend = 0; /* supress gcc warnings */

	/* Find the region where (base, size) belongs to */
238 239 240
	for (i=0; i < type->cnt; i++) {
		rgnbegin = type->regions[i].base;
		rgnend = rgnbegin + type->regions[i].size;
Y
Yinghai Lu 已提交
241 242 243 244 245 246

		if ((rgnbegin <= base) && (end <= rgnend))
			break;
	}

	/* Didn't find the region */
247
	if (i == type->cnt)
Y
Yinghai Lu 已提交
248 249 250 251
		return -1;

	/* Check to see if we are removing entire region */
	if ((rgnbegin == base) && (rgnend == end)) {
252
		memblock_remove_region(type, i);
Y
Yinghai Lu 已提交
253 254 255 256 257
		return 0;
	}

	/* Check to see if region is matching at the front */
	if (rgnbegin == base) {
258 259
		type->regions[i].base = end;
		type->regions[i].size -= size;
Y
Yinghai Lu 已提交
260 261 262 263 264
		return 0;
	}

	/* Check to see if the region is matching at the end */
	if (rgnend == end) {
265
		type->regions[i].size -= size;
Y
Yinghai Lu 已提交
266 267 268 269 270 271 272
		return 0;
	}

	/*
	 * We need to split the entry -  adjust the current one to the
	 * beginging of the hole and add the region after hole.
	 */
273 274
	type->regions[i].size = base - type->regions[i].base;
	return memblock_add_region(type, end, rgnend - end);
Y
Yinghai Lu 已提交
275 276
}

277
long memblock_remove(phys_addr_t base, phys_addr_t size)
Y
Yinghai Lu 已提交
278 279 280 281
{
	return __memblock_remove(&memblock.memory, base, size);
}

282
long __init memblock_free(phys_addr_t base, phys_addr_t size)
Y
Yinghai Lu 已提交
283 284 285 286
{
	return __memblock_remove(&memblock.reserved, base, size);
}

287
long __init memblock_reserve(phys_addr_t base, phys_addr_t size)
Y
Yinghai Lu 已提交
288
{
289
	struct memblock_type *_rgn = &memblock.reserved;
Y
Yinghai Lu 已提交
290 291 292 293 294 295

	BUG_ON(0 == size);

	return memblock_add_region(_rgn, base, size);
}

296
phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
Y
Yinghai Lu 已提交
297
{
298
	phys_addr_t found;
Y
Yinghai Lu 已提交
299

300 301 302 303
	/* We align the size to limit fragmentation. Without this, a lot of
	 * small allocs quickly eat up the whole reserve array on sparc
	 */
	size = memblock_align_up(size, align);
Y
Yinghai Lu 已提交
304

305 306 307 308
	found = memblock_find_base(size, align, max_addr);
	if (found != MEMBLOCK_ERROR &&
	    memblock_add_region(&memblock.reserved, found, size) >= 0)
		return found;
Y
Yinghai Lu 已提交
309

310
	return 0;
Y
Yinghai Lu 已提交
311 312
}

313
phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
Y
Yinghai Lu 已提交
314
{
315 316 317 318 319 320 321 322 323
	phys_addr_t alloc;

	alloc = __memblock_alloc_base(size, align, max_addr);

	if (alloc == 0)
		panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
		      (unsigned long long) size, (unsigned long long) max_addr);

	return alloc;
Y
Yinghai Lu 已提交
324 325
}

326
phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align)
Y
Yinghai Lu 已提交
327
{
328 329
	return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
}
Y
Yinghai Lu 已提交
330 331


332 333 334 335 336
/*
 * Additional node-local allocators. Search for node memory is bottom up
 * and walks memblock regions within that node bottom-up as well, but allocation
 * within an memblock region is top-down.
 */
Y
Yinghai Lu 已提交
337

338
phys_addr_t __weak __init memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid)
339 340 341 342 343 344
{
	*nid = 0;

	return end;
}

345 346 347
static phys_addr_t __init memblock_alloc_nid_region(struct memblock_region *mp,
					       phys_addr_t size,
					       phys_addr_t align, int nid)
Y
Yinghai Lu 已提交
348
{
349
	phys_addr_t start, end;
Y
Yinghai Lu 已提交
350 351 352 353 354 355

	start = mp->base;
	end = start + mp->size;

	start = memblock_align_up(start, align);
	while (start < end) {
356
		phys_addr_t this_end;
Y
Yinghai Lu 已提交
357 358
		int this_nid;

359
		this_end = memblock_nid_range(start, end, &this_nid);
Y
Yinghai Lu 已提交
360
		if (this_nid == nid) {
361
			phys_addr_t ret = memblock_find_region(start, this_end, size, align);
362
			if (ret != MEMBLOCK_ERROR &&
363
			    memblock_add_region(&memblock.reserved, ret, size) >= 0)
Y
Yinghai Lu 已提交
364 365 366 367 368
				return ret;
		}
		start = this_end;
	}

369
	return MEMBLOCK_ERROR;
Y
Yinghai Lu 已提交
370 371
}

372
phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
Y
Yinghai Lu 已提交
373
{
374
	struct memblock_type *mem = &memblock.memory;
Y
Yinghai Lu 已提交
375 376 377 378
	int i;

	BUG_ON(0 == size);

379 380 381 382 383
	/* We align the size to limit fragmentation. Without this, a lot of
	 * small allocs quickly eat up the whole reserve array on sparc
	 */
	size = memblock_align_up(size, align);

384 385 386 387
	/* We do a bottom-up search for a region with the right
	 * nid since that's easier considering how memblock_nid_range()
	 * works
	 */
Y
Yinghai Lu 已提交
388
	for (i = 0; i < mem->cnt; i++) {
389
		phys_addr_t ret = memblock_alloc_nid_region(&mem->regions[i],
Y
Yinghai Lu 已提交
390
					       size, align, nid);
391
		if (ret != MEMBLOCK_ERROR)
Y
Yinghai Lu 已提交
392 393 394 395 396 397 398
			return ret;
	}

	return memblock_alloc(size, align);
}

/* You must call memblock_analyze() before this. */
399
phys_addr_t __init memblock_phys_mem_size(void)
Y
Yinghai Lu 已提交
400
{
401
	return memblock.memory_size;
Y
Yinghai Lu 已提交
402 403
}

404
phys_addr_t memblock_end_of_DRAM(void)
Y
Yinghai Lu 已提交
405 406 407
{
	int idx = memblock.memory.cnt - 1;

408
	return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size);
Y
Yinghai Lu 已提交
409 410 411
}

/* You must call memblock_analyze() after this. */
412
void __init memblock_enforce_memory_limit(phys_addr_t memory_limit)
Y
Yinghai Lu 已提交
413 414
{
	unsigned long i;
415
	phys_addr_t limit;
416
	struct memblock_region *p;
Y
Yinghai Lu 已提交
417 418 419 420 421 422 423

	if (!memory_limit)
		return;

	/* Truncate the memblock regions to satisfy the memory limit. */
	limit = memory_limit;
	for (i = 0; i < memblock.memory.cnt; i++) {
424 425
		if (limit > memblock.memory.regions[i].size) {
			limit -= memblock.memory.regions[i].size;
Y
Yinghai Lu 已提交
426 427 428
			continue;
		}

429
		memblock.memory.regions[i].size = limit;
Y
Yinghai Lu 已提交
430 431 432 433 434 435 436 437
		memblock.memory.cnt = i + 1;
		break;
	}

	memory_limit = memblock_end_of_DRAM();

	/* And truncate any reserves above the limit also. */
	for (i = 0; i < memblock.reserved.cnt; i++) {
438
		p = &memblock.reserved.regions[i];
Y
Yinghai Lu 已提交
439 440 441 442 443 444 445 446 447 448 449 450 451

		if (p->base > memory_limit)
			p->size = 0;
		else if ((p->base + p->size) > memory_limit)
			p->size = memory_limit - p->base;

		if (p->size == 0) {
			memblock_remove_region(&memblock.reserved, i);
			i--;
		}
	}
}

452
static int memblock_search(struct memblock_type *type, phys_addr_t addr)
453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469
{
	unsigned int left = 0, right = type->cnt;

	do {
		unsigned int mid = (right + left) / 2;

		if (addr < type->regions[mid].base)
			right = mid;
		else if (addr >= (type->regions[mid].base +
				  type->regions[mid].size))
			left = mid + 1;
		else
			return mid;
	} while (left < right);
	return -1;
}

470
int __init memblock_is_reserved(phys_addr_t addr)
Y
Yinghai Lu 已提交
471
{
472 473
	return memblock_search(&memblock.reserved, addr) != -1;
}
Y
Yinghai Lu 已提交
474

475
int memblock_is_memory(phys_addr_t addr)
476 477 478 479
{
	return memblock_search(&memblock.memory, addr) != -1;
}

480
int memblock_is_region_memory(phys_addr_t base, phys_addr_t size)
481 482 483 484 485 486 487 488
{
	int idx = memblock_search(&memblock.reserved, base);

	if (idx == -1)
		return 0;
	return memblock.reserved.regions[idx].base <= base &&
		(memblock.reserved.regions[idx].base +
		 memblock.reserved.regions[idx].size) >= (base + size);
Y
Yinghai Lu 已提交
489 490
}

491
int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
Y
Yinghai Lu 已提交
492
{
493
	return memblock_overlaps_region(&memblock.reserved, base, size) >= 0;
Y
Yinghai Lu 已提交
494 495
}

496

497
void __init memblock_set_current_limit(phys_addr_t limit)
498 499 500 501
{
	memblock.current_limit = limit;
}

502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545
static void memblock_dump(struct memblock_type *region, char *name)
{
	unsigned long long base, size;
	int i;

	pr_info(" %s.cnt  = 0x%lx\n", name, region->cnt);

	for (i = 0; i < region->cnt; i++) {
		base = region->regions[i].base;
		size = region->regions[i].size;

		pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n",
		    name, i, base, base + size - 1, size);
	}
}

void memblock_dump_all(void)
{
	if (!memblock_debug)
		return;

	pr_info("MEMBLOCK configuration:\n");
	pr_info(" memory size = 0x%llx\n", (unsigned long long)memblock.memory_size);

	memblock_dump(&memblock.memory, "memory");
	memblock_dump(&memblock.reserved, "reserved");
}

void __init memblock_analyze(void)
{
	int i;

	/* Check marker in the unused last array entry */
	WARN_ON(memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS].base
		!= (phys_addr_t)RED_INACTIVE);
	WARN_ON(memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS].base
		!= (phys_addr_t)RED_INACTIVE);

	memblock.memory_size = 0;

	for (i = 0; i < memblock.memory.cnt; i++)
		memblock.memory_size += memblock.memory.regions[i].size;
}

546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
void __init memblock_init(void)
{
	/* Hookup the initial arrays */
	memblock.memory.regions	= memblock_memory_init_regions;
	memblock.memory.max		= INIT_MEMBLOCK_REGIONS;
	memblock.reserved.regions	= memblock_reserved_init_regions;
	memblock.reserved.max	= INIT_MEMBLOCK_REGIONS;

	/* Write a marker in the unused last array entry */
	memblock.memory.regions[INIT_MEMBLOCK_REGIONS].base = (phys_addr_t)RED_INACTIVE;
	memblock.reserved.regions[INIT_MEMBLOCK_REGIONS].base = (phys_addr_t)RED_INACTIVE;

	/* Create a dummy zero size MEMBLOCK which will get coalesced away later.
	 * This simplifies the memblock_add() code below...
	 */
	memblock.memory.regions[0].base = 0;
	memblock.memory.regions[0].size = 0;
	memblock.memory.cnt = 1;

	/* Ditto. */
	memblock.reserved.regions[0].base = 0;
	memblock.reserved.regions[0].size = 0;
	memblock.reserved.cnt = 1;

	memblock.current_limit = MEMBLOCK_ALLOC_ANYWHERE;
}

573 574 575 576 577 578 579 580
static int __init early_memblock(char *p)
{
	if (p && strstr(p, "debug"))
		memblock_debug = 1;
	return 0;
}
early_param("memblock", early_memblock);