memory_hotplug.c 7.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 *  linux/mm/memory_hotplug.c
 *
 *  Copyright (C)
 */

#include <linux/stddef.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/interrupt.h>
#include <linux/pagemap.h>
#include <linux/bootmem.h>
#include <linux/compiler.h>
#include <linux/module.h>
#include <linux/pagevec.h>
16
#include <linux/writeback.h>
17 18 19 20 21 22 23
#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/cpu.h>
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
#include <linux/highmem.h>
#include <linux/vmalloc.h>
24
#include <linux/ioport.h>
25
#include <linux/cpuset.h>
26 27 28

#include <asm/tlbflush.h>

29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
/* add this memory to iomem resource */
static struct resource *register_memory_resource(u64 start, u64 size)
{
	struct resource *res;
	res = kzalloc(sizeof(struct resource), GFP_KERNEL);
	BUG_ON(!res);

	res->name = "System RAM";
	res->start = start;
	res->end = start + size - 1;
	res->flags = IORESOURCE_MEM;
	if (request_resource(&iomem_resource, res) < 0) {
		printk("System RAM resource %llx - %llx cannot be added\n",
		(unsigned long long)res->start, (unsigned long long)res->end);
		kfree(res);
		res = NULL;
	}
	return res;
}

static void release_memory_resource(struct resource *res)
{
	if (!res)
		return;
	release_resource(res);
	kfree(res);
	return;
}


59
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
60
static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
61 62 63 64 65 66 67
{
	struct pglist_data *pgdat = zone->zone_pgdat;
	int nr_pages = PAGES_PER_SECTION;
	int nid = pgdat->node_id;
	int zone_type;

	zone_type = zone - pgdat->node_zones;
68
	if (!zone->wait_table) {
69
		int ret = 0;
D
Dave Hansen 已提交
70 71
		ret = init_currently_empty_zone(zone, phys_start_pfn,
						nr_pages, MEMMAP_HOTPLUG);
72 73 74
		if (ret < 0)
			return ret;
	}
D
Dave Hansen 已提交
75 76
	memmap_init_zone(nr_pages, nid, zone_type,
			 phys_start_pfn, MEMMAP_HOTPLUG);
77
	return 0;
78 79 80 81 82 83 84
}

static int __add_section(struct zone *zone, unsigned long phys_start_pfn)
{
	int nr_pages = PAGES_PER_SECTION;
	int ret;

85 86 87
	if (pfn_valid(phys_start_pfn))
		return -EEXIST;

88
	ret = sparse_add_one_section(zone, phys_start_pfn, nr_pages);
89 90 91 92

	if (ret < 0)
		return ret;

93 94 95 96 97
	ret = __add_zone(zone, phys_start_pfn);

	if (ret < 0)
		return ret;

98 99 100 101 102 103 104 105 106 107 108 109 110 111
	return register_new_memory(__pfn_to_section(phys_start_pfn));
}

/*
 * Reasonably generic function for adding memory.  It is
 * expected that archs that support memory hotplug will
 * call this function after deciding the zone to which to
 * add the new pages.
 */
int __add_pages(struct zone *zone, unsigned long phys_start_pfn,
		 unsigned long nr_pages)
{
	unsigned long i;
	int err = 0;
112 113 114 115
	int start_sec, end_sec;
	/* during initialize mem_map, align hot-added range to section */
	start_sec = pfn_to_section_nr(phys_start_pfn);
	end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
116

117 118
	for (i = start_sec; i <= end_sec; i++) {
		err = __add_section(zone, i << PFN_SECTION_SHIFT);
119

120 121 122 123
		/*
		 * EEXIST is finally dealed with by ioresource collision
		 * check. see add_memory() => register_memory_resource()
		 * Warning will be printed if there is collision.
124 125
		 */
		if (err && (err != -EEXIST))
126
			break;
127
		err = 0;
128 129 130 131
	}

	return err;
}
132
EXPORT_SYMBOL_GPL(__add_pages);
133 134 135 136 137 138 139 140 141 142 143 144

static void grow_zone_span(struct zone *zone,
		unsigned long start_pfn, unsigned long end_pfn)
{
	unsigned long old_zone_end_pfn;

	zone_span_writelock(zone);

	old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
	if (start_pfn < zone->zone_start_pfn)
		zone->zone_start_pfn = start_pfn;

145 146
	zone->spanned_pages = max(old_zone_end_pfn, end_pfn) -
				zone->zone_start_pfn;
147 148 149 150 151 152 153 154 155 156 157 158 159

	zone_span_writeunlock(zone);
}

static void grow_pgdat_span(struct pglist_data *pgdat,
		unsigned long start_pfn, unsigned long end_pfn)
{
	unsigned long old_pgdat_end_pfn =
		pgdat->node_start_pfn + pgdat->node_spanned_pages;

	if (start_pfn < pgdat->node_start_pfn)
		pgdat->node_start_pfn = start_pfn;

160 161
	pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) -
					pgdat->node_start_pfn;
162 163
}

164 165
static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
			void *arg)
166 167
{
	unsigned long i;
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
	unsigned long onlined_pages = *(unsigned long *)arg;
	struct page *page;
	if (PageReserved(pfn_to_page(start_pfn)))
		for (i = 0; i < nr_pages; i++) {
			page = pfn_to_page(start_pfn + i);
			online_page(page);
			onlined_pages++;
		}
	*(unsigned long *)arg = onlined_pages;
	return 0;
}


int online_pages(unsigned long pfn, unsigned long nr_pages)
{
183 184 185
	unsigned long flags;
	unsigned long onlined_pages = 0;
	struct zone *zone;
186
	int need_zonelists_rebuild = 0;
187 188 189 190 191 192 193 194 195 196 197 198

	/*
	 * This doesn't need a lock to do pfn_to_page().
	 * The section can't be removed here because of the
	 * memory_block->state_sem.
	 */
	zone = page_zone(pfn_to_page(pfn));
	pgdat_resize_lock(zone->zone_pgdat, &flags);
	grow_zone_span(zone, pfn, pfn + nr_pages);
	grow_pgdat_span(zone->zone_pgdat, pfn, pfn + nr_pages);
	pgdat_resize_unlock(zone->zone_pgdat, &flags);

199 200 201 202 203 204 205 206
	/*
	 * If this zone is not populated, then it is not in zonelist.
	 * This means the page allocator ignores this zone.
	 * So, zonelist must be updated after online.
	 */
	if (!populated_zone(zone))
		need_zonelists_rebuild = 1;

207 208
	walk_memory_resource(pfn, nr_pages, &onlined_pages,
		online_pages_range);
209
	zone->present_pages += onlined_pages;
210
	zone->zone_pgdat->node_present_pages += onlined_pages;
211

212
	setup_per_zone_pages_min();
213 214 215 216
	if (onlined_pages) {
		kswapd_run(zone_to_nid(zone));
		node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
	}
217

218 219
	if (need_zonelists_rebuild)
		build_all_zonelists();
220
	vm_total_pages = nr_free_pagecache_pages();
221
	writeback_set_ratelimit();
222 223
	return 0;
}
224
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
225

226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
static pg_data_t *hotadd_new_pgdat(int nid, u64 start)
{
	struct pglist_data *pgdat;
	unsigned long zones_size[MAX_NR_ZONES] = {0};
	unsigned long zholes_size[MAX_NR_ZONES] = {0};
	unsigned long start_pfn = start >> PAGE_SHIFT;

	pgdat = arch_alloc_nodedata(nid);
	if (!pgdat)
		return NULL;

	arch_refresh_nodedata(nid, pgdat);

	/* we can use NODE_DATA(nid) from here */

	/* init node's zones as empty zones, we don't have any present pages.*/
	free_area_init_node(nid, pgdat, zones_size, start_pfn, zholes_size);

	return pgdat;
}

static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
{
	arch_refresh_nodedata(nid, NULL);
	arch_free_nodedata(pgdat);
	return;
}

254

255 256
int add_memory(int nid, u64 start, u64 size)
{
257 258
	pg_data_t *pgdat = NULL;
	int new_pgdat = 0;
259
	struct resource *res;
260 261
	int ret;

262 263 264 265
	res = register_memory_resource(start, size);
	if (!res)
		return -EEXIST;

266 267 268 269 270 271 272
	if (!node_online(nid)) {
		pgdat = hotadd_new_pgdat(nid, start);
		if (!pgdat)
			return -ENOMEM;
		new_pgdat = 1;
	}

273 274 275
	/* call arch's memory hotadd */
	ret = arch_add_memory(nid, start, size);

276 277 278
	if (ret < 0)
		goto error;

279
	/* we online node here. we can't roll back from here. */
280 281
	node_set_online(nid);

282 283
	cpuset_track_online_nodes();

284 285 286 287 288 289 290 291 292 293
	if (new_pgdat) {
		ret = register_one_node(nid);
		/*
		 * If sysfs file of new node can't create, cpu on the node
		 * can't be hot-added. There is no rollback way now.
		 * So, check by BUG_ON() to catch it reluctantly..
		 */
		BUG_ON(ret);
	}

294 295 296 297 298
	return ret;
error:
	/* rollback pgdat allocation and others */
	if (new_pgdat)
		rollback_node_hotadd(nid, pgdat);
299 300
	if (res)
		release_memory_resource(res);
301

302 303 304
	return ret;
}
EXPORT_SYMBOL_GPL(add_memory);