memory.c 20.3 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
 * Memory subsystem support
4 5 6 7 8 9 10 11 12 13 14 15 16
 *
 * Written by Matt Tolentino <matthew.e.tolentino@intel.com>
 *            Dave Hansen <haveblue@us.ibm.com>
 *
 * This file provides the necessary infrastructure to represent
 * a SPARSEMEM-memory-model system's physical memory in /sysfs.
 * All arch-independent code that assumes MEMORY_HOTPLUG requires
 * SPARSEMEM should be contained here, or in mm/memory_hotplug.c.
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/topology.h>
17
#include <linux/capability.h>
18 19 20 21
#include <linux/device.h>
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
#include <linux/mm.h>
22
#include <linux/stat.h>
23
#include <linux/slab.h>
24
#include <linux/xarray.h>
25

A
Arun Sharma 已提交
26
#include <linux/atomic.h>
27
#include <linux/uaccess.h>
28 29

#define MEMORY_CLASS_NAME	"memory"
30

31 32 33 34 35 36 37
static const char *const online_type_to_str[] = {
	[MMOP_OFFLINE] = "offline",
	[MMOP_ONLINE] = "online",
	[MMOP_ONLINE_KERNEL] = "online_kernel",
	[MMOP_ONLINE_MOVABLE] = "online_movable",
};

38
int memhp_online_type_from_str(const char *str)
39 40 41 42 43 44 45 46 47 48
{
	int i;

	for (i = 0; i < ARRAY_SIZE(online_type_to_str); i++) {
		if (sysfs_streq(str, online_type_to_str[i]))
			return i;
	}
	return -EINVAL;
}

49 50
#define to_memory_block(dev) container_of(dev, struct memory_block, dev)

51 52
static int sections_per_block;

53
static inline unsigned long memory_block_id(unsigned long section_nr)
54 55 56
{
	return section_nr / sections_per_block;
}
57

58
static inline unsigned long pfn_to_block_id(unsigned long pfn)
59
{
60
	return memory_block_id(pfn_to_section_nr(pfn));
61 62
}

63 64 65 66 67
static inline unsigned long phys_to_block_id(unsigned long phys)
{
	return pfn_to_block_id(PFN_DOWN(phys));
}

68 69 70
static int memory_subsys_online(struct device *dev);
static int memory_subsys_offline(struct device *dev);

71
static struct bus_type memory_subsys = {
72
	.name = MEMORY_CLASS_NAME,
73
	.dev_name = MEMORY_CLASS_NAME,
74 75
	.online = memory_subsys_online,
	.offline = memory_subsys_offline,
76 77
};

78 79 80 81 82 83 84
/*
 * Memory blocks are cached in a local radix tree to avoid
 * a costly linear search for the corresponding device on
 * the subsystem bus.
 */
static DEFINE_XARRAY(memory_blocks);

85
static BLOCKING_NOTIFIER_HEAD(memory_chain);
86

87
int register_memory_notifier(struct notifier_block *nb)
88
{
89
	return blocking_notifier_chain_register(&memory_chain, nb);
90
}
91
EXPORT_SYMBOL(register_memory_notifier);
92

93
void unregister_memory_notifier(struct notifier_block *nb)
94
{
95
	blocking_notifier_chain_unregister(&memory_chain, nb);
96
}
97
EXPORT_SYMBOL(unregister_memory_notifier);
98

99 100
static void memory_block_release(struct device *dev)
{
101
	struct memory_block *mem = to_memory_block(dev);
102 103 104 105

	kfree(mem);
}

106 107 108 109
unsigned long __weak memory_block_size_bytes(void)
{
	return MIN_MEMORY_BLOCK_SIZE;
}
110
EXPORT_SYMBOL_GPL(memory_block_size_bytes);
111

112
/*
113
 * Show the first physical section index (number) of this memory block.
114
 */
115 116
static ssize_t phys_index_show(struct device *dev,
			       struct device_attribute *attr, char *buf)
117
{
118
	struct memory_block *mem = to_memory_block(dev);
119 120 121
	unsigned long phys_index;

	phys_index = mem->start_section_nr / sections_per_block;
122
	return sysfs_emit(buf, "%08lx\n", phys_index);
123 124
}

125
/*
126 127
 * Legacy interface that we cannot remove. Always indicate "removable"
 * with CONFIG_MEMORY_HOTREMOVE - bad heuristic.
128
 */
129 130
static ssize_t removable_show(struct device *dev, struct device_attribute *attr,
			      char *buf)
131
{
132
	return sysfs_emit(buf, "%d\n", (int)IS_ENABLED(CONFIG_MEMORY_HOTREMOVE));
133 134
}

135 136 137
/*
 * online, offline, going offline, etc.
 */
138 139
static ssize_t state_show(struct device *dev, struct device_attribute *attr,
			  char *buf)
140
{
141
	struct memory_block *mem = to_memory_block(dev);
142
	const char *output;
143 144 145 146 147 148

	/*
	 * We can probably put these states in a nice little array
	 * so that they're not open-coded
	 */
	switch (mem->state) {
149
	case MEM_ONLINE:
150
		output = "online";
151 152
		break;
	case MEM_OFFLINE:
153
		output = "offline";
154 155
		break;
	case MEM_GOING_OFFLINE:
156
		output = "going-offline";
157 158 159
		break;
	default:
		WARN_ON(1);
160
		return sysfs_emit(buf, "ERROR-UNKNOWN-%ld\n", mem->state);
161 162
	}

163
	return sysfs_emit(buf, "%s\n", output);
164 165
}

166
int memory_notify(unsigned long val, void *v)
167
{
168
	return blocking_notifier_call_chain(&memory_chain, val, v);
169 170 171 172 173 174 175
}

/*
 * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
 * OK to have direct references to sparsemem variables in here.
 */
static int
176
memory_block_action(unsigned long start_section_nr, unsigned long action,
177
		    int online_type, int nid)
178
{
179
	unsigned long start_pfn;
180
	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
181 182
	int ret;

183
	start_pfn = section_nr_to_pfn(start_section_nr);
184

185
	switch (action) {
186
	case MEM_ONLINE:
187
		ret = online_pages(start_pfn, nr_pages, online_type, nid);
188 189 190 191 192 193
		break;
	case MEM_OFFLINE:
		ret = offline_pages(start_pfn, nr_pages);
		break;
	default:
		WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: "
194
		     "%ld\n", __func__, start_section_nr, action, action);
195
		ret = -EINVAL;
196 197 198 199 200
	}

	return ret;
}

201
static int memory_block_change_state(struct memory_block *mem,
202
		unsigned long to_state, unsigned long from_state_req)
203
{
204
	int ret = 0;
205

206 207
	if (mem->state != from_state_req)
		return -EINVAL;
208

209 210 211
	if (to_state == MEM_OFFLINE)
		mem->state = MEM_GOING_OFFLINE;

212
	ret = memory_block_action(mem->start_section_nr, to_state,
213
				  mem->online_type, mem->nid);
214

215
	mem->state = ret ? from_state_req : to_state;
216

217 218
	return ret;
}
219

220
/* The device lock serializes operations on memory_subsys_[online|offline] */
221 222
static int memory_subsys_online(struct device *dev)
{
223
	struct memory_block *mem = to_memory_block(dev);
224
	int ret;
225

226 227
	if (mem->state == MEM_ONLINE)
		return 0;
228

229
	/*
230 231
	 * When called via device_online() without configuring the online_type,
	 * we want to default to MMOP_ONLINE.
232
	 */
233
	if (mem->online_type == MMOP_OFFLINE)
234
		mem->online_type = MMOP_ONLINE;
235

236
	ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
237
	mem->online_type = MMOP_OFFLINE;
238 239 240 241 242

	return ret;
}

static int memory_subsys_offline(struct device *dev)
243
{
244
	struct memory_block *mem = to_memory_block(dev);
245

246 247
	if (mem->state == MEM_OFFLINE)
		return 0;
248

249
	return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
250
}
251

252 253
static ssize_t state_store(struct device *dev, struct device_attribute *attr,
			   const char *buf, size_t count)
254
{
255
	const int online_type = memhp_online_type_from_str(buf);
256
	struct memory_block *mem = to_memory_block(dev);
257 258 259 260
	int ret;

	if (online_type < 0)
		return -EINVAL;
261

262 263 264
	ret = lock_device_hotplug_sysfs();
	if (ret)
		return ret;
265

266
	switch (online_type) {
267 268
	case MMOP_ONLINE_KERNEL:
	case MMOP_ONLINE_MOVABLE:
269
	case MMOP_ONLINE:
270
		/* mem->online_type is protected by device_hotplug_lock */
271 272 273
		mem->online_type = online_type;
		ret = device_online(&mem->dev);
		break;
274
	case MMOP_OFFLINE:
275 276 277 278
		ret = device_offline(&mem->dev);
		break;
	default:
		ret = -EINVAL; /* should never happen */
279 280 281
	}

	unlock_device_hotplug();
282

283
	if (ret < 0)
284
		return ret;
285 286 287
	if (ret)
		return -EINVAL;

288 289 290 291 292 293 294 295 296 297 298 299
	return count;
}

/*
 * phys_device is a bad name for this.  What I really want
 * is a way to differentiate between memory ranges that
 * are part of physical devices that constitute
 * a complete removable unit or fru.
 * i.e. do these ranges belong to the same physical device,
 * s.t. if I offline all of these sections I can then
 * remove the physical device?
 */
300
static ssize_t phys_device_show(struct device *dev,
301
				struct device_attribute *attr, char *buf)
302
{
303
	struct memory_block *mem = to_memory_block(dev);
304
	return sysfs_emit(buf, "%d\n", mem->phys_device);
305 306
}

307
#ifdef CONFIG_MEMORY_HOTREMOVE
308 309 310
static int print_allowed_zone(char *buf, int len, int nid,
			      unsigned long start_pfn, unsigned long nr_pages,
			      int online_type, struct zone *default_zone)
311 312 313 314
{
	struct zone *zone;

	zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages);
315 316 317
	if (zone == default_zone)
		return 0;
	return sysfs_emit_at(buf, len, " %s", zone->name);
318 319
}

320
static ssize_t valid_zones_show(struct device *dev,
321 322 323
				struct device_attribute *attr, char *buf)
{
	struct memory_block *mem = to_memory_block(dev);
324
	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
325
	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
326
	struct zone *default_zone;
327
	int len = 0;
328
	int nid;
329

330 331 332 333 334
	/*
	 * Check the existing zone. Make sure that we do that only on the
	 * online nodes otherwise the page_zone is not reliable
	 */
	if (mem->state == MEM_ONLINE) {
335 336 337 338
		/*
		 * The block contains more than one zone can not be offlined.
		 * This can happen e.g. for ZONE_DMA and ZONE_DMA32
		 */
339 340 341
		default_zone = test_pages_in_a_zone(start_pfn,
						    start_pfn + nr_pages);
		if (!default_zone)
342 343
			return sysfs_emit(buf, "%s\n", "none");
		len += sysfs_emit_at(buf, len, "%s", default_zone->name);
344
		goto out;
345 346
	}

347
	nid = mem->nid;
348 349
	default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, start_pfn,
					  nr_pages);
350

351 352 353 354 355
	len += sysfs_emit_at(buf, len, "%s", default_zone->name);
	len += print_allowed_zone(buf, len, nid, start_pfn, nr_pages,
				  MMOP_ONLINE_KERNEL, default_zone);
	len += print_allowed_zone(buf, len, nid, start_pfn, nr_pages,
				  MMOP_ONLINE_MOVABLE, default_zone);
356
out:
357 358
	len += sysfs_emit_at(buf, len, "%s", "\n");
	return len;
359
}
360
static DEVICE_ATTR_RO(valid_zones);
361 362
#endif

363 364 365 366
static DEVICE_ATTR_RO(phys_index);
static DEVICE_ATTR_RW(state);
static DEVICE_ATTR_RO(phys_device);
static DEVICE_ATTR_RO(removable);
367 368

/*
369
 * Show the memory block size (shared by all memory blocks).
370
 */
371 372
static ssize_t block_size_bytes_show(struct device *dev,
				     struct device_attribute *attr, char *buf)
373
{
374
	return sysfs_emit(buf, "%lx\n", memory_block_size_bytes());
375 376
}

377
static DEVICE_ATTR_RO(block_size_bytes);
378

379 380 381 382
/*
 * Memory auto online policy.
 */

383 384
static ssize_t auto_online_blocks_show(struct device *dev,
				       struct device_attribute *attr, char *buf)
385
{
386 387
	return sysfs_emit(buf, "%s\n",
			  online_type_to_str[memhp_default_online_type]);
388 389
}

390 391 392
static ssize_t auto_online_blocks_store(struct device *dev,
					struct device_attribute *attr,
					const char *buf, size_t count)
393
{
394 395 396
	const int online_type = memhp_online_type_from_str(buf);

	if (online_type < 0)
397 398
		return -EINVAL;

399
	memhp_default_online_type = online_type;
400 401 402
	return count;
}

403
static DEVICE_ATTR_RW(auto_online_blocks);
404

405 406 407 408 409 410 411
/*
 * Some architectures will have custom drivers to do this, and
 * will not need to do it from userspace.  The fake hot-add code
 * as well as ppc64 will do all of their discovery in userspace
 * and will require this interface.
 */
#ifdef CONFIG_ARCH_MEMORY_PROBE
412 413
static ssize_t probe_store(struct device *dev, struct device_attribute *attr,
			   const char *buf, size_t count)
414 415
{
	u64 phys_addr;
416
	int nid, ret;
417
	unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block;
418

419 420 421
	ret = kstrtoull(buf, 0, &phys_addr);
	if (ret)
		return ret;
422

423 424 425
	if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1))
		return -EINVAL;

426 427
	ret = lock_device_hotplug_sysfs();
	if (ret)
428
		return ret;
429

430
	nid = memory_add_physaddr_to_nid(phys_addr);
431 432
	ret = __add_memory(nid, phys_addr,
			   MIN_MEMORY_BLOCK_SIZE * sections_per_block);
433

434 435
	if (ret)
		goto out;
436

437 438
	ret = count;
out:
439
	unlock_device_hotplug();
440
	return ret;
441 442
}

443
static DEVICE_ATTR_WO(probe);
444 445
#endif

446 447 448 449 450 451
#ifdef CONFIG_MEMORY_FAILURE
/*
 * Support for offlining pages of memory
 */

/* Soft offline a page */
452 453 454
static ssize_t soft_offline_page_store(struct device *dev,
				       struct device_attribute *attr,
				       const char *buf, size_t count)
455 456 457 458 459
{
	int ret;
	u64 pfn;
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;
460
	if (kstrtoull(buf, 0, &pfn) < 0)
461 462
		return -EINVAL;
	pfn >>= PAGE_SHIFT;
463
	ret = soft_offline_page(pfn, 0);
464 465 466 467
	return ret == 0 ? count : ret;
}

/* Forcibly offline a page, including killing processes. */
468 469 470
static ssize_t hard_offline_page_store(struct device *dev,
				       struct device_attribute *attr,
				       const char *buf, size_t count)
471 472 473 474 475
{
	int ret;
	u64 pfn;
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;
476
	if (kstrtoull(buf, 0, &pfn) < 0)
477 478
		return -EINVAL;
	pfn >>= PAGE_SHIFT;
479
	ret = memory_failure(pfn, 0);
480 481 482
	return ret ? ret : count;
}

483 484
static DEVICE_ATTR_WO(soft_offline_page);
static DEVICE_ATTR_WO(hard_offline_page);
485 486
#endif

487 488 489 490 491
/*
 * Note that phys_device is optional.  It is here to allow for
 * differentiation between which *physical* devices each
 * section belongs to...
 */
492 493 494 495
int __weak arch_get_memory_phys_device(unsigned long start_pfn)
{
	return 0;
}
496

497 498 499 500 501
/*
 * A reference for the returned memory block device is acquired.
 *
 * Called under device_hotplug_lock.
 */
502
static struct memory_block *find_memory_block_by_id(unsigned long block_id)
503
{
504
	struct memory_block *mem;
505

506 507 508 509
	mem = xa_load(&memory_blocks, block_id);
	if (mem)
		get_device(&mem->dev);
	return mem;
510 511
}

512
/*
513
 * Called under device_hotplug_lock.
514 515 516
 */
struct memory_block *find_memory_block(struct mem_section *section)
{
517
	unsigned long block_id = memory_block_id(__section_nr(section));
518 519

	return find_memory_block_by_id(block_id);
520 521
}

522 523 524 525 526
static struct attribute *memory_memblk_attrs[] = {
	&dev_attr_phys_index.attr,
	&dev_attr_state.attr,
	&dev_attr_phys_device.attr,
	&dev_attr_removable.attr,
527 528 529
#ifdef CONFIG_MEMORY_HOTREMOVE
	&dev_attr_valid_zones.attr,
#endif
530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547
	NULL
};

static struct attribute_group memory_memblk_attr_group = {
	.attrs = memory_memblk_attrs,
};

static const struct attribute_group *memory_memblk_attr_groups[] = {
	&memory_memblk_attr_group,
	NULL,
};

/*
 * register_memory - Setup a sysfs device for a memory block
 */
static
int register_memory(struct memory_block *memory)
{
548 549
	int ret;

550 551 552 553
	memory->dev.bus = &memory_subsys;
	memory->dev.id = memory->start_section_nr / sections_per_block;
	memory->dev.release = memory_block_release;
	memory->dev.groups = memory_memblk_attr_groups;
554
	memory->dev.offline = memory->state == MEM_OFFLINE;
555

556
	ret = device_register(&memory->dev);
557
	if (ret) {
558
		put_device(&memory->dev);
559 560 561 562 563 564 565 566
		return ret;
	}
	ret = xa_err(xa_store(&memory_blocks, memory->dev.id, memory,
			      GFP_KERNEL));
	if (ret) {
		put_device(&memory->dev);
		device_unregister(&memory->dev);
	}
567
	return ret;
568 569
}

570
static int init_memory_block(unsigned long block_id, unsigned long state)
571
{
572
	struct memory_block *mem;
573 574 575
	unsigned long start_pfn;
	int ret = 0;

576
	mem = find_memory_block_by_id(block_id);
577 578 579 580
	if (mem) {
		put_device(&mem->dev);
		return -EEXIST;
	}
581
	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
582 583 584
	if (!mem)
		return -ENOMEM;

585
	mem->start_section_nr = block_id * sections_per_block;
586
	mem->state = state;
587
	start_pfn = section_nr_to_pfn(mem->start_section_nr);
588
	mem->phys_device = arch_get_memory_phys_device(start_pfn);
589
	mem->nid = NUMA_NO_NODE;
590

591 592 593 594 595
	ret = register_memory(mem);

	return ret;
}

596
static int add_memory_block(unsigned long base_section_nr)
597
{
598
	int section_count = 0;
599
	unsigned long nr;
600

601 602 603
	for (nr = base_section_nr; nr < base_section_nr + sections_per_block;
	     nr++)
		if (present_section_nr(nr))
604
			section_count++;
605

606 607
	if (section_count == 0)
		return 0;
608
	return init_memory_block(memory_block_id(base_section_nr),
609
				 MEM_ONLINE);
610 611
}

612 613 614 615 616
static void unregister_memory(struct memory_block *memory)
{
	if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys))
		return;

617 618
	WARN_ON(xa_erase(&memory_blocks, memory->dev.id) == NULL);

619 620 621 622 623
	/* drop the ref. we got via find_memory_block() */
	put_device(&memory->dev);
	device_unregister(&memory->dev);
}

624
/*
625 626 627
 * Create memory block devices for the given memory area. Start and size
 * have to be aligned to memory block granularity. Memory block devices
 * will be initialized as offline.
628 629
 *
 * Called under device_hotplug_lock.
630
 */
631
int create_memory_block_devices(unsigned long start, unsigned long size)
632
{
633 634
	const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
	unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
635
	struct memory_block *mem;
636 637
	unsigned long block_id;
	int ret = 0;
638

639 640 641
	if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) ||
			 !IS_ALIGNED(size, memory_block_size_bytes())))
		return -EINVAL;
642

643
	for (block_id = start_block_id; block_id != end_block_id; block_id++) {
644
		ret = init_memory_block(block_id, MEM_OFFLINE);
645
		if (ret)
646 647 648 649 650 651
			break;
	}
	if (ret) {
		end_block_id = block_id;
		for (block_id = start_block_id; block_id != end_block_id;
		     block_id++) {
652
			mem = find_memory_block_by_id(block_id);
653 654
			if (WARN_ON_ONCE(!mem))
				continue;
655 656
			unregister_memory(mem);
		}
657
	}
658
	return ret;
659 660
}

661 662 663 664
/*
 * Remove memory block devices for the given memory area. Start and size
 * have to be aligned to memory block granularity. Memory block devices
 * have to be offline.
665 666
 *
 * Called under device_hotplug_lock.
667 668
 */
void remove_memory_block_devices(unsigned long start, unsigned long size)
669
{
670 671
	const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
	const unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
672
	struct memory_block *mem;
673
	unsigned long block_id;
674

675 676
	if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) ||
			 !IS_ALIGNED(size, memory_block_size_bytes())))
677 678
		return;

679
	for (block_id = start_block_id; block_id != end_block_id; block_id++) {
680
		mem = find_memory_block_by_id(block_id);
681 682 683
		if (WARN_ON_ONCE(!mem))
			continue;
		unregister_memory_block_under_nodes(mem);
684
		unregister_memory(mem);
685
	}
686 687
}

688 689 690 691 692 693
/* return true if the memory block is offlined, otherwise, return false */
bool is_memblock_offlined(struct memory_block *mem)
{
	return mem->state == MEM_OFFLINE;
}

694 695 696 697 698 699 700 701 702 703 704
static struct attribute *memory_root_attrs[] = {
#ifdef CONFIG_ARCH_MEMORY_PROBE
	&dev_attr_probe.attr,
#endif

#ifdef CONFIG_MEMORY_FAILURE
	&dev_attr_soft_offline_page.attr,
	&dev_attr_hard_offline_page.attr,
#endif

	&dev_attr_block_size_bytes.attr,
705
	&dev_attr_auto_online_blocks.attr,
706 707 708 709 710 711 712 713 714 715 716 717
	NULL
};

static struct attribute_group memory_root_attr_group = {
	.attrs = memory_root_attrs,
};

static const struct attribute_group *memory_root_attr_groups[] = {
	&memory_root_attr_group,
	NULL,
};

718
/*
719 720 721
 * Initialize the sysfs support for memory devices. At the time this function
 * is called, we cannot have concurrent creation/deletion of memory block
 * devices, the device_hotplug_lock is not needed.
722
 */
723
void __init memory_dev_init(void)
724 725
{
	int ret;
726
	unsigned long block_sz, nr;
727

728 729 730 731 732 733
	/* Validate the configured memory block size */
	block_sz = memory_block_size_bytes();
	if (!is_power_of_2(block_sz) || block_sz < MIN_MEMORY_BLOCK_SIZE)
		panic("Memory block size not suitable: 0x%lx\n", block_sz);
	sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;

734
	ret = subsys_system_register(&memory_subsys, memory_root_attr_groups);
735
	if (ret)
736
		panic("%s() failed to register subsystem: %d\n", __func__, ret);
737 738 739 740 741

	/*
	 * Create entries for memory sections that were found
	 * during boot and have been initialized
	 */
742 743
	for (nr = 0; nr <= __highest_present_section_nr;
	     nr += sections_per_block) {
744 745 746 747
		ret = add_memory_block(nr);
		if (ret)
			panic("%s() failed to add memory block: %d\n", __func__,
			      ret);
748 749
	}
}
750 751 752 753 754 755 756 757 758 759 760 761 762 763 764

/**
 * walk_memory_blocks - walk through all present memory blocks overlapped
 *			by the range [start, start + size)
 *
 * @start: start address of the memory range
 * @size: size of the memory range
 * @arg: argument passed to func
 * @func: callback for each memory section walked
 *
 * This function walks through all present memory blocks overlapped by the
 * range [start, start + size), calling func on each memory block.
 *
 * In case func() returns an error, walking is aborted and the error is
 * returned.
765 766
 *
 * Called under device_hotplug_lock.
767 768 769 770 771 772 773 774 775 776
 */
int walk_memory_blocks(unsigned long start, unsigned long size,
		       void *arg, walk_memory_blocks_func_t func)
{
	const unsigned long start_block_id = phys_to_block_id(start);
	const unsigned long end_block_id = phys_to_block_id(start + size - 1);
	struct memory_block *mem;
	unsigned long block_id;
	int ret = 0;

777 778 779
	if (!size)
		return 0;

780
	for (block_id = start_block_id; block_id <= end_block_id; block_id++) {
781
		mem = find_memory_block_by_id(block_id);
782 783 784 785 786 787 788 789 790 791
		if (!mem)
			continue;

		ret = func(mem, arg);
		put_device(&mem->dev);
		if (ret)
			break;
	}
	return ret;
}
792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827

struct for_each_memory_block_cb_data {
	walk_memory_blocks_func_t func;
	void *arg;
};

static int for_each_memory_block_cb(struct device *dev, void *data)
{
	struct memory_block *mem = to_memory_block(dev);
	struct for_each_memory_block_cb_data *cb_data = data;

	return cb_data->func(mem, cb_data->arg);
}

/**
 * for_each_memory_block - walk through all present memory blocks
 *
 * @arg: argument passed to func
 * @func: callback for each memory block walked
 *
 * This function walks through all present memory blocks, calling func on
 * each memory block.
 *
 * In case func() returns an error, walking is aborted and the error is
 * returned.
 */
int for_each_memory_block(void *arg, walk_memory_blocks_func_t func)
{
	struct for_each_memory_block_cb_data cb_data = {
		.func = func,
		.arg = arg,
	};

	return bus_for_each_dev(&memory_subsys, NULL, &cb_data,
				for_each_memory_block_cb);
}