memory.c 20.2 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
 * Memory subsystem support
4 5 6 7 8 9 10 11 12 13 14 15 16
 *
 * Written by Matt Tolentino <matthew.e.tolentino@intel.com>
 *            Dave Hansen <haveblue@us.ibm.com>
 *
 * This file provides the necessary infrastructure to represent
 * a SPARSEMEM-memory-model system's physical memory in /sysfs.
 * All arch-independent code that assumes MEMORY_HOTPLUG requires
 * SPARSEMEM should be contained here, or in mm/memory_hotplug.c.
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/topology.h>
17
#include <linux/capability.h>
18 19 20 21
#include <linux/device.h>
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
#include <linux/mm.h>
22
#include <linux/stat.h>
23
#include <linux/slab.h>
24
#include <linux/xarray.h>
25

A
Arun Sharma 已提交
26
#include <linux/atomic.h>
27
#include <linux/uaccess.h>
28 29

#define MEMORY_CLASS_NAME	"memory"
30

31 32 33 34 35 36 37
static const char *const online_type_to_str[] = {
	[MMOP_OFFLINE] = "offline",
	[MMOP_ONLINE] = "online",
	[MMOP_ONLINE_KERNEL] = "online_kernel",
	[MMOP_ONLINE_MOVABLE] = "online_movable",
};

38
int memhp_online_type_from_str(const char *str)
39 40 41 42 43 44 45 46 47 48
{
	int i;

	for (i = 0; i < ARRAY_SIZE(online_type_to_str); i++) {
		if (sysfs_streq(str, online_type_to_str[i]))
			return i;
	}
	return -EINVAL;
}

49 50
#define to_memory_block(dev) container_of(dev, struct memory_block, dev)

51 52
static int sections_per_block;

53
static inline unsigned long memory_block_id(unsigned long section_nr)
54 55 56
{
	return section_nr / sections_per_block;
}
57

58
static inline unsigned long pfn_to_block_id(unsigned long pfn)
59
{
60
	return memory_block_id(pfn_to_section_nr(pfn));
61 62
}

63 64 65 66 67
static inline unsigned long phys_to_block_id(unsigned long phys)
{
	return pfn_to_block_id(PFN_DOWN(phys));
}

68 69 70
static int memory_subsys_online(struct device *dev);
static int memory_subsys_offline(struct device *dev);

71
static struct bus_type memory_subsys = {
72
	.name = MEMORY_CLASS_NAME,
73
	.dev_name = MEMORY_CLASS_NAME,
74 75
	.online = memory_subsys_online,
	.offline = memory_subsys_offline,
76 77
};

78 79 80 81 82 83 84
/*
 * Memory blocks are cached in a local radix tree to avoid
 * a costly linear search for the corresponding device on
 * the subsystem bus.
 */
static DEFINE_XARRAY(memory_blocks);

85
static BLOCKING_NOTIFIER_HEAD(memory_chain);
86

87
int register_memory_notifier(struct notifier_block *nb)
88
{
89
	return blocking_notifier_chain_register(&memory_chain, nb);
90
}
91
EXPORT_SYMBOL(register_memory_notifier);
92

93
void unregister_memory_notifier(struct notifier_block *nb)
94
{
95
	blocking_notifier_chain_unregister(&memory_chain, nb);
96
}
97
EXPORT_SYMBOL(unregister_memory_notifier);
98

99 100
static void memory_block_release(struct device *dev)
{
101
	struct memory_block *mem = to_memory_block(dev);
102 103 104 105

	kfree(mem);
}

106 107 108 109
unsigned long __weak memory_block_size_bytes(void)
{
	return MIN_MEMORY_BLOCK_SIZE;
}
110
EXPORT_SYMBOL_GPL(memory_block_size_bytes);
111

112
/*
113
 * Show the first physical section index (number) of this memory block.
114
 */
115 116
static ssize_t phys_index_show(struct device *dev,
			       struct device_attribute *attr, char *buf)
117
{
118
	struct memory_block *mem = to_memory_block(dev);
119 120 121 122 123 124
	unsigned long phys_index;

	phys_index = mem->start_section_nr / sections_per_block;
	return sprintf(buf, "%08lx\n", phys_index);
}

125
/*
126 127
 * Legacy interface that we cannot remove. Always indicate "removable"
 * with CONFIG_MEMORY_HOTREMOVE - bad heuristic.
128
 */
129 130
static ssize_t removable_show(struct device *dev, struct device_attribute *attr,
			      char *buf)
131
{
132
	return sprintf(buf, "%d\n", (int)IS_ENABLED(CONFIG_MEMORY_HOTREMOVE));
133 134
}

135 136 137
/*
 * online, offline, going offline, etc.
 */
138 139
static ssize_t state_show(struct device *dev, struct device_attribute *attr,
			  char *buf)
140
{
141
	struct memory_block *mem = to_memory_block(dev);
142 143 144 145 146 147 148
	ssize_t len = 0;

	/*
	 * We can probably put these states in a nice little array
	 * so that they're not open-coded
	 */
	switch (mem->state) {
149 150 151 152 153 154 155 156 157 158 159 160 161 162
	case MEM_ONLINE:
		len = sprintf(buf, "online\n");
		break;
	case MEM_OFFLINE:
		len = sprintf(buf, "offline\n");
		break;
	case MEM_GOING_OFFLINE:
		len = sprintf(buf, "going-offline\n");
		break;
	default:
		len = sprintf(buf, "ERROR-UNKNOWN-%ld\n",
				mem->state);
		WARN_ON(1);
		break;
163 164 165 166 167
	}

	return len;
}

168
int memory_notify(unsigned long val, void *v)
169
{
170
	return blocking_notifier_call_chain(&memory_chain, val, v);
171 172 173 174 175 176 177
}

/*
 * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
 * OK to have direct references to sparsemem variables in here.
 */
static int
178
memory_block_action(unsigned long start_section_nr, unsigned long action,
179
		    int online_type, int nid)
180
{
181
	unsigned long start_pfn;
182
	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
183 184
	int ret;

185
	start_pfn = section_nr_to_pfn(start_section_nr);
186

187
	switch (action) {
188
	case MEM_ONLINE:
189
		ret = online_pages(start_pfn, nr_pages, online_type, nid);
190 191 192 193 194 195
		break;
	case MEM_OFFLINE:
		ret = offline_pages(start_pfn, nr_pages);
		break;
	default:
		WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: "
196
		     "%ld\n", __func__, start_section_nr, action, action);
197
		ret = -EINVAL;
198 199 200 201 202
	}

	return ret;
}

203
static int memory_block_change_state(struct memory_block *mem,
204
		unsigned long to_state, unsigned long from_state_req)
205
{
206
	int ret = 0;
207

208 209
	if (mem->state != from_state_req)
		return -EINVAL;
210

211 212 213
	if (to_state == MEM_OFFLINE)
		mem->state = MEM_GOING_OFFLINE;

214
	ret = memory_block_action(mem->start_section_nr, to_state,
215
				  mem->online_type, mem->nid);
216

217
	mem->state = ret ? from_state_req : to_state;
218

219 220
	return ret;
}
221

222
/* The device lock serializes operations on memory_subsys_[online|offline] */
223 224
static int memory_subsys_online(struct device *dev)
{
225
	struct memory_block *mem = to_memory_block(dev);
226
	int ret;
227

228 229
	if (mem->state == MEM_ONLINE)
		return 0;
230

231
	/*
232 233
	 * When called via device_online() without configuring the online_type,
	 * we want to default to MMOP_ONLINE.
234
	 */
235
	if (mem->online_type == MMOP_OFFLINE)
236
		mem->online_type = MMOP_ONLINE;
237

238
	ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
239
	mem->online_type = MMOP_OFFLINE;
240 241 242 243 244

	return ret;
}

static int memory_subsys_offline(struct device *dev)
245
{
246
	struct memory_block *mem = to_memory_block(dev);
247

248 249
	if (mem->state == MEM_OFFLINE)
		return 0;
250

251
	return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
252
}
253

254 255
static ssize_t state_store(struct device *dev, struct device_attribute *attr,
			   const char *buf, size_t count)
256
{
257
	const int online_type = memhp_online_type_from_str(buf);
258
	struct memory_block *mem = to_memory_block(dev);
259 260 261 262
	int ret;

	if (online_type < 0)
		return -EINVAL;
263

264 265 266
	ret = lock_device_hotplug_sysfs();
	if (ret)
		return ret;
267

268
	switch (online_type) {
269 270
	case MMOP_ONLINE_KERNEL:
	case MMOP_ONLINE_MOVABLE:
271
	case MMOP_ONLINE:
272
		/* mem->online_type is protected by device_hotplug_lock */
273 274 275
		mem->online_type = online_type;
		ret = device_online(&mem->dev);
		break;
276
	case MMOP_OFFLINE:
277 278 279 280
		ret = device_offline(&mem->dev);
		break;
	default:
		ret = -EINVAL; /* should never happen */
281 282 283
	}

	unlock_device_hotplug();
284

285
	if (ret < 0)
286
		return ret;
287 288 289
	if (ret)
		return -EINVAL;

290 291 292 293 294 295 296 297 298 299 300 301
	return count;
}

/*
 * phys_device is a bad name for this.  What I really want
 * is a way to differentiate between memory ranges that
 * are part of physical devices that constitute
 * a complete removable unit or fru.
 * i.e. do these ranges belong to the same physical device,
 * s.t. if I offline all of these sections I can then
 * remove the physical device?
 */
302
static ssize_t phys_device_show(struct device *dev,
303
				struct device_attribute *attr, char *buf)
304
{
305
	struct memory_block *mem = to_memory_block(dev);
306 307 308
	return sprintf(buf, "%d\n", mem->phys_device);
}

309
#ifdef CONFIG_MEMORY_HOTREMOVE
310 311 312 313 314 315 316 317 318 319 320 321 322
static void print_allowed_zone(char *buf, int nid, unsigned long start_pfn,
		unsigned long nr_pages, int online_type,
		struct zone *default_zone)
{
	struct zone *zone;

	zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages);
	if (zone != default_zone) {
		strcat(buf, " ");
		strcat(buf, zone->name);
	}
}

323
static ssize_t valid_zones_show(struct device *dev,
324 325 326
				struct device_attribute *attr, char *buf)
{
	struct memory_block *mem = to_memory_block(dev);
327
	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
328
	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
329
	struct zone *default_zone;
330
	int nid;
331

332 333 334 335 336
	/*
	 * Check the existing zone. Make sure that we do that only on the
	 * online nodes otherwise the page_zone is not reliable
	 */
	if (mem->state == MEM_ONLINE) {
337 338 339 340
		/*
		 * The block contains more than one zone can not be offlined.
		 * This can happen e.g. for ZONE_DMA and ZONE_DMA32
		 */
341 342 343
		default_zone = test_pages_in_a_zone(start_pfn,
						    start_pfn + nr_pages);
		if (!default_zone)
344
			return sprintf(buf, "none\n");
345
		strcat(buf, default_zone->name);
346
		goto out;
347 348
	}

349
	nid = mem->nid;
350 351
	default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, start_pfn,
					  nr_pages);
352
	strcat(buf, default_zone->name);
353

354 355 356 357
	print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_KERNEL,
			default_zone);
	print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_MOVABLE,
			default_zone);
358
out:
359 360 361
	strcat(buf, "\n");

	return strlen(buf);
362
}
363
static DEVICE_ATTR_RO(valid_zones);
364 365
#endif

366 367 368 369
static DEVICE_ATTR_RO(phys_index);
static DEVICE_ATTR_RW(state);
static DEVICE_ATTR_RO(phys_device);
static DEVICE_ATTR_RO(removable);
370 371

/*
372
 * Show the memory block size (shared by all memory blocks).
373
 */
374 375
static ssize_t block_size_bytes_show(struct device *dev,
				     struct device_attribute *attr, char *buf)
376
{
377
	return sprintf(buf, "%lx\n", memory_block_size_bytes());
378 379
}

380
static DEVICE_ATTR_RO(block_size_bytes);
381

382 383 384 385
/*
 * Memory auto online policy.
 */

386 387
static ssize_t auto_online_blocks_show(struct device *dev,
				       struct device_attribute *attr, char *buf)
388
{
389 390
	return sprintf(buf, "%s\n",
		       online_type_to_str[memhp_default_online_type]);
391 392
}

393 394 395
static ssize_t auto_online_blocks_store(struct device *dev,
					struct device_attribute *attr,
					const char *buf, size_t count)
396
{
397 398 399
	const int online_type = memhp_online_type_from_str(buf);

	if (online_type < 0)
400 401
		return -EINVAL;

402
	memhp_default_online_type = online_type;
403 404 405
	return count;
}

406
static DEVICE_ATTR_RW(auto_online_blocks);
407

408 409 410 411 412 413 414
/*
 * Some architectures will have custom drivers to do this, and
 * will not need to do it from userspace.  The fake hot-add code
 * as well as ppc64 will do all of their discovery in userspace
 * and will require this interface.
 */
#ifdef CONFIG_ARCH_MEMORY_PROBE
415 416
static ssize_t probe_store(struct device *dev, struct device_attribute *attr,
			   const char *buf, size_t count)
417 418
{
	u64 phys_addr;
419
	int nid, ret;
420
	unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block;
421

422 423 424
	ret = kstrtoull(buf, 0, &phys_addr);
	if (ret)
		return ret;
425

426 427 428
	if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1))
		return -EINVAL;

429 430
	ret = lock_device_hotplug_sysfs();
	if (ret)
431
		return ret;
432

433
	nid = memory_add_physaddr_to_nid(phys_addr);
434 435
	ret = __add_memory(nid, phys_addr,
			   MIN_MEMORY_BLOCK_SIZE * sections_per_block);
436

437 438
	if (ret)
		goto out;
439

440 441
	ret = count;
out:
442
	unlock_device_hotplug();
443
	return ret;
444 445
}

446
static DEVICE_ATTR_WO(probe);
447 448
#endif

449 450 451 452 453 454
#ifdef CONFIG_MEMORY_FAILURE
/*
 * Support for offlining pages of memory
 */

/* Soft offline a page */
455 456 457
static ssize_t soft_offline_page_store(struct device *dev,
				       struct device_attribute *attr,
				       const char *buf, size_t count)
458 459 460 461 462
{
	int ret;
	u64 pfn;
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;
463
	if (kstrtoull(buf, 0, &pfn) < 0)
464 465
		return -EINVAL;
	pfn >>= PAGE_SHIFT;
466
	ret = soft_offline_page(pfn, 0);
467 468 469 470
	return ret == 0 ? count : ret;
}

/* Forcibly offline a page, including killing processes. */
471 472 473
static ssize_t hard_offline_page_store(struct device *dev,
				       struct device_attribute *attr,
				       const char *buf, size_t count)
474 475 476 477 478
{
	int ret;
	u64 pfn;
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;
479
	if (kstrtoull(buf, 0, &pfn) < 0)
480 481
		return -EINVAL;
	pfn >>= PAGE_SHIFT;
482
	ret = memory_failure(pfn, 0);
483 484 485
	return ret ? ret : count;
}

486 487
static DEVICE_ATTR_WO(soft_offline_page);
static DEVICE_ATTR_WO(hard_offline_page);
488 489
#endif

490 491 492 493 494
/*
 * Note that phys_device is optional.  It is here to allow for
 * differentiation between which *physical* devices each
 * section belongs to...
 */
495 496 497 498
int __weak arch_get_memory_phys_device(unsigned long start_pfn)
{
	return 0;
}
499

500 501 502 503 504
/*
 * A reference for the returned memory block device is acquired.
 *
 * Called under device_hotplug_lock.
 */
505
static struct memory_block *find_memory_block_by_id(unsigned long block_id)
506
{
507
	struct memory_block *mem;
508

509 510 511 512
	mem = xa_load(&memory_blocks, block_id);
	if (mem)
		get_device(&mem->dev);
	return mem;
513 514
}

515
/*
516
 * Called under device_hotplug_lock.
517 518 519
 */
struct memory_block *find_memory_block(struct mem_section *section)
{
520
	unsigned long block_id = memory_block_id(__section_nr(section));
521 522

	return find_memory_block_by_id(block_id);
523 524
}

525 526 527 528 529
static struct attribute *memory_memblk_attrs[] = {
	&dev_attr_phys_index.attr,
	&dev_attr_state.attr,
	&dev_attr_phys_device.attr,
	&dev_attr_removable.attr,
530 531 532
#ifdef CONFIG_MEMORY_HOTREMOVE
	&dev_attr_valid_zones.attr,
#endif
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550
	NULL
};

static struct attribute_group memory_memblk_attr_group = {
	.attrs = memory_memblk_attrs,
};

static const struct attribute_group *memory_memblk_attr_groups[] = {
	&memory_memblk_attr_group,
	NULL,
};

/*
 * register_memory - Setup a sysfs device for a memory block
 */
static
int register_memory(struct memory_block *memory)
{
551 552
	int ret;

553 554 555 556
	memory->dev.bus = &memory_subsys;
	memory->dev.id = memory->start_section_nr / sections_per_block;
	memory->dev.release = memory_block_release;
	memory->dev.groups = memory_memblk_attr_groups;
557
	memory->dev.offline = memory->state == MEM_OFFLINE;
558

559
	ret = device_register(&memory->dev);
560
	if (ret) {
561
		put_device(&memory->dev);
562 563 564 565 566 567 568 569
		return ret;
	}
	ret = xa_err(xa_store(&memory_blocks, memory->dev.id, memory,
			      GFP_KERNEL));
	if (ret) {
		put_device(&memory->dev);
		device_unregister(&memory->dev);
	}
570
	return ret;
571 572
}

573
static int init_memory_block(unsigned long block_id, unsigned long state)
574
{
575
	struct memory_block *mem;
576 577 578
	unsigned long start_pfn;
	int ret = 0;

579
	mem = find_memory_block_by_id(block_id);
580 581 582 583
	if (mem) {
		put_device(&mem->dev);
		return -EEXIST;
	}
584
	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
585 586 587
	if (!mem)
		return -ENOMEM;

588
	mem->start_section_nr = block_id * sections_per_block;
589
	mem->state = state;
590
	start_pfn = section_nr_to_pfn(mem->start_section_nr);
591
	mem->phys_device = arch_get_memory_phys_device(start_pfn);
592
	mem->nid = NUMA_NO_NODE;
593

594 595 596 597 598
	ret = register_memory(mem);

	return ret;
}

599
static int add_memory_block(unsigned long base_section_nr)
600
{
601
	int section_count = 0;
602
	unsigned long nr;
603

604 605 606
	for (nr = base_section_nr; nr < base_section_nr + sections_per_block;
	     nr++)
		if (present_section_nr(nr))
607
			section_count++;
608

609 610
	if (section_count == 0)
		return 0;
611
	return init_memory_block(memory_block_id(base_section_nr),
612
				 MEM_ONLINE);
613 614
}

615 616 617 618 619
static void unregister_memory(struct memory_block *memory)
{
	if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys))
		return;

620 621
	WARN_ON(xa_erase(&memory_blocks, memory->dev.id) == NULL);

622 623 624 625 626
	/* drop the ref. we got via find_memory_block() */
	put_device(&memory->dev);
	device_unregister(&memory->dev);
}

627
/*
628 629 630
 * Create memory block devices for the given memory area. Start and size
 * have to be aligned to memory block granularity. Memory block devices
 * will be initialized as offline.
631 632
 *
 * Called under device_hotplug_lock.
633
 */
634
int create_memory_block_devices(unsigned long start, unsigned long size)
635
{
636 637
	const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
	unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
638
	struct memory_block *mem;
639 640
	unsigned long block_id;
	int ret = 0;
641

642 643 644
	if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) ||
			 !IS_ALIGNED(size, memory_block_size_bytes())))
		return -EINVAL;
645

646
	for (block_id = start_block_id; block_id != end_block_id; block_id++) {
647
		ret = init_memory_block(block_id, MEM_OFFLINE);
648
		if (ret)
649 650 651 652 653 654
			break;
	}
	if (ret) {
		end_block_id = block_id;
		for (block_id = start_block_id; block_id != end_block_id;
		     block_id++) {
655
			mem = find_memory_block_by_id(block_id);
656 657
			if (WARN_ON_ONCE(!mem))
				continue;
658 659
			unregister_memory(mem);
		}
660
	}
661
	return ret;
662 663
}

664 665 666 667
/*
 * Remove memory block devices for the given memory area. Start and size
 * have to be aligned to memory block granularity. Memory block devices
 * have to be offline.
668 669
 *
 * Called under device_hotplug_lock.
670 671
 */
void remove_memory_block_devices(unsigned long start, unsigned long size)
672
{
673 674
	const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
	const unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
675
	struct memory_block *mem;
676
	unsigned long block_id;
677

678 679
	if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) ||
			 !IS_ALIGNED(size, memory_block_size_bytes())))
680 681
		return;

682
	for (block_id = start_block_id; block_id != end_block_id; block_id++) {
683
		mem = find_memory_block_by_id(block_id);
684 685 686
		if (WARN_ON_ONCE(!mem))
			continue;
		unregister_memory_block_under_nodes(mem);
687
		unregister_memory(mem);
688
	}
689 690
}

691 692 693 694 695 696
/* return true if the memory block is offlined, otherwise, return false */
bool is_memblock_offlined(struct memory_block *mem)
{
	return mem->state == MEM_OFFLINE;
}

697 698 699 700 701 702 703 704 705 706 707
static struct attribute *memory_root_attrs[] = {
#ifdef CONFIG_ARCH_MEMORY_PROBE
	&dev_attr_probe.attr,
#endif

#ifdef CONFIG_MEMORY_FAILURE
	&dev_attr_soft_offline_page.attr,
	&dev_attr_hard_offline_page.attr,
#endif

	&dev_attr_block_size_bytes.attr,
708
	&dev_attr_auto_online_blocks.attr,
709 710 711 712 713 714 715 716 717 718 719 720
	NULL
};

static struct attribute_group memory_root_attr_group = {
	.attrs = memory_root_attrs,
};

static const struct attribute_group *memory_root_attr_groups[] = {
	&memory_root_attr_group,
	NULL,
};

721
/*
722 723 724
 * Initialize the sysfs support for memory devices. At the time this function
 * is called, we cannot have concurrent creation/deletion of memory block
 * devices, the device_hotplug_lock is not needed.
725
 */
726
void __init memory_dev_init(void)
727 728
{
	int ret;
729
	unsigned long block_sz, nr;
730

731 732 733 734 735 736
	/* Validate the configured memory block size */
	block_sz = memory_block_size_bytes();
	if (!is_power_of_2(block_sz) || block_sz < MIN_MEMORY_BLOCK_SIZE)
		panic("Memory block size not suitable: 0x%lx\n", block_sz);
	sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;

737
	ret = subsys_system_register(&memory_subsys, memory_root_attr_groups);
738
	if (ret)
739
		panic("%s() failed to register subsystem: %d\n", __func__, ret);
740 741 742 743 744

	/*
	 * Create entries for memory sections that were found
	 * during boot and have been initialized
	 */
745 746
	for (nr = 0; nr <= __highest_present_section_nr;
	     nr += sections_per_block) {
747 748 749 750
		ret = add_memory_block(nr);
		if (ret)
			panic("%s() failed to add memory block: %d\n", __func__,
			      ret);
751 752
	}
}
753 754 755 756 757 758 759 760 761 762 763 764 765 766 767

/**
 * walk_memory_blocks - walk through all present memory blocks overlapped
 *			by the range [start, start + size)
 *
 * @start: start address of the memory range
 * @size: size of the memory range
 * @arg: argument passed to func
 * @func: callback for each memory section walked
 *
 * This function walks through all present memory blocks overlapped by the
 * range [start, start + size), calling func on each memory block.
 *
 * In case func() returns an error, walking is aborted and the error is
 * returned.
768 769
 *
 * Called under device_hotplug_lock.
770 771 772 773 774 775 776 777 778 779
 */
int walk_memory_blocks(unsigned long start, unsigned long size,
		       void *arg, walk_memory_blocks_func_t func)
{
	const unsigned long start_block_id = phys_to_block_id(start);
	const unsigned long end_block_id = phys_to_block_id(start + size - 1);
	struct memory_block *mem;
	unsigned long block_id;
	int ret = 0;

780 781 782
	if (!size)
		return 0;

783
	for (block_id = start_block_id; block_id <= end_block_id; block_id++) {
784
		mem = find_memory_block_by_id(block_id);
785 786 787 788 789 790 791 792 793 794
		if (!mem)
			continue;

		ret = func(mem, arg);
		put_device(&mem->dev);
		if (ret)
			break;
	}
	return ret;
}
795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830

struct for_each_memory_block_cb_data {
	walk_memory_blocks_func_t func;
	void *arg;
};

static int for_each_memory_block_cb(struct device *dev, void *data)
{
	struct memory_block *mem = to_memory_block(dev);
	struct for_each_memory_block_cb_data *cb_data = data;

	return cb_data->func(mem, cb_data->arg);
}

/**
 * for_each_memory_block - walk through all present memory blocks
 *
 * @arg: argument passed to func
 * @func: callback for each memory block walked
 *
 * This function walks through all present memory blocks, calling func on
 * each memory block.
 *
 * In case func() returns an error, walking is aborted and the error is
 * returned.
 */
int for_each_memory_block(void *arg, walk_memory_blocks_func_t func)
{
	struct for_each_memory_block_cb_data cb_data = {
		.func = func,
		.arg = arg,
	};

	return bus_for_each_dev(&memory_subsys, NULL, &cb_data,
				for_each_memory_block_cb);
}