memory_hotplug.c 7.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 *  linux/mm/memory_hotplug.c
 *
 *  Copyright (C)
 */

#include <linux/stddef.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/interrupt.h>
#include <linux/pagemap.h>
#include <linux/bootmem.h>
#include <linux/compiler.h>
#include <linux/module.h>
#include <linux/pagevec.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/cpu.h>
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
#include <linux/highmem.h>
#include <linux/vmalloc.h>
23
#include <linux/ioport.h>
24
#include <linux/cpuset.h>
25 26 27 28 29

#include <asm/tlbflush.h>

extern void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn,
			  unsigned long size);
30
static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
31 32 33 34 35 36 37
{
	struct pglist_data *pgdat = zone->zone_pgdat;
	int nr_pages = PAGES_PER_SECTION;
	int nid = pgdat->node_id;
	int zone_type;

	zone_type = zone - pgdat->node_zones;
38 39 40 41 42 43
	if (!populated_zone(zone)) {
		int ret = 0;
		ret = init_currently_empty_zone(zone, phys_start_pfn, nr_pages);
		if (ret < 0)
			return ret;
	}
44 45
	memmap_init_zone(nr_pages, nid, zone_type, phys_start_pfn);
	zonetable_add(zone, nid, zone_type, phys_start_pfn, nr_pages);
46
	return 0;
47 48
}

49 50
extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
				  int nr_pages);
51 52 53 54 55
static int __add_section(struct zone *zone, unsigned long phys_start_pfn)
{
	int nr_pages = PAGES_PER_SECTION;
	int ret;

56 57 58
	if (pfn_valid(phys_start_pfn))
		return -EEXIST;

59
	ret = sparse_add_one_section(zone, phys_start_pfn, nr_pages);
60 61 62 63

	if (ret < 0)
		return ret;

64 65 66 67 68
	ret = __add_zone(zone, phys_start_pfn);

	if (ret < 0)
		return ret;

69 70 71 72 73 74 75 76 77 78 79 80 81 82
	return register_new_memory(__pfn_to_section(phys_start_pfn));
}

/*
 * Reasonably generic function for adding memory.  It is
 * expected that archs that support memory hotplug will
 * call this function after deciding the zone to which to
 * add the new pages.
 */
int __add_pages(struct zone *zone, unsigned long phys_start_pfn,
		 unsigned long nr_pages)
{
	unsigned long i;
	int err = 0;
83 84 85 86
	int start_sec, end_sec;
	/* during initialize mem_map, align hot-added range to section */
	start_sec = pfn_to_section_nr(phys_start_pfn);
	end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
87

88 89
	for (i = start_sec; i <= end_sec; i++) {
		err = __add_section(zone, i << PFN_SECTION_SHIFT);
90

91 92 93 94
		/*
		 * EEXIST is finally dealed with by ioresource collision
		 * check. see add_memory() => register_memory_resource()
		 * Warning will be printed if there is collision.
95 96
		 */
		if (err && (err != -EEXIST))
97
			break;
98
		err = 0;
99 100 101 102
	}

	return err;
}
103
EXPORT_SYMBOL_GPL(__add_pages);
104 105 106 107 108 109 110 111 112 113 114 115

static void grow_zone_span(struct zone *zone,
		unsigned long start_pfn, unsigned long end_pfn)
{
	unsigned long old_zone_end_pfn;

	zone_span_writelock(zone);

	old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
	if (start_pfn < zone->zone_start_pfn)
		zone->zone_start_pfn = start_pfn;

116 117
	zone->spanned_pages = max(old_zone_end_pfn, end_pfn) -
				zone->zone_start_pfn;
118 119 120 121 122 123 124 125 126 127 128 129 130

	zone_span_writeunlock(zone);
}

static void grow_pgdat_span(struct pglist_data *pgdat,
		unsigned long start_pfn, unsigned long end_pfn)
{
	unsigned long old_pgdat_end_pfn =
		pgdat->node_start_pfn + pgdat->node_spanned_pages;

	if (start_pfn < pgdat->node_start_pfn)
		pgdat->node_start_pfn = start_pfn;

131 132
	pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) -
					pgdat->node_start_pfn;
133 134 135 136 137 138 139
}

int online_pages(unsigned long pfn, unsigned long nr_pages)
{
	unsigned long i;
	unsigned long flags;
	unsigned long onlined_pages = 0;
140 141 142
	struct resource res;
	u64 section_end;
	unsigned long start_pfn;
143
	struct zone *zone;
144
	int need_zonelists_rebuild = 0;
145 146 147 148 149 150 151 152 153 154 155 156

	/*
	 * This doesn't need a lock to do pfn_to_page().
	 * The section can't be removed here because of the
	 * memory_block->state_sem.
	 */
	zone = page_zone(pfn_to_page(pfn));
	pgdat_resize_lock(zone->zone_pgdat, &flags);
	grow_zone_span(zone, pfn, pfn + nr_pages);
	grow_pgdat_span(zone->zone_pgdat, pfn, pfn + nr_pages);
	pgdat_resize_unlock(zone->zone_pgdat, &flags);

157 158 159 160 161 162 163 164
	/*
	 * If this zone is not populated, then it is not in zonelist.
	 * This means the page allocator ignores this zone.
	 * So, zonelist must be updated after online.
	 */
	if (!populated_zone(zone))
		need_zonelists_rebuild = 1;

165 166 167 168 169
	res.start = (u64)pfn << PAGE_SHIFT;
	res.end = res.start + ((u64)nr_pages << PAGE_SHIFT) - 1;
	res.flags = IORESOURCE_MEM; /* we just need system ram */
	section_end = res.end;

170
	while ((res.start < res.end) && (find_next_system_ram(&res) >= 0)) {
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
		start_pfn = (unsigned long)(res.start >> PAGE_SHIFT);
		nr_pages = (unsigned long)
                           ((res.end + 1 - res.start) >> PAGE_SHIFT);

		if (PageReserved(pfn_to_page(start_pfn))) {
			/* this region's page is not onlined now */
			for (i = 0; i < nr_pages; i++) {
				struct page *page = pfn_to_page(start_pfn + i);
				online_page(page);
				onlined_pages++;
			}
		}

		res.start = res.end + 1;
		res.end = section_end;
186 187
	}
	zone->present_pages += onlined_pages;
188
	zone->zone_pgdat->node_present_pages += onlined_pages;
189

190 191
	setup_per_zone_pages_min();

192 193
	if (need_zonelists_rebuild)
		build_all_zonelists();
194
	vm_total_pages = nr_free_pagecache_pages();
195 196
	return 0;
}
197

198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
static pg_data_t *hotadd_new_pgdat(int nid, u64 start)
{
	struct pglist_data *pgdat;
	unsigned long zones_size[MAX_NR_ZONES] = {0};
	unsigned long zholes_size[MAX_NR_ZONES] = {0};
	unsigned long start_pfn = start >> PAGE_SHIFT;

	pgdat = arch_alloc_nodedata(nid);
	if (!pgdat)
		return NULL;

	arch_refresh_nodedata(nid, pgdat);

	/* we can use NODE_DATA(nid) from here */

	/* init node's zones as empty zones, we don't have any present pages.*/
	free_area_init_node(nid, pgdat, zones_size, start_pfn, zholes_size);

	return pgdat;
}

static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
{
	arch_refresh_nodedata(nid, NULL);
	arch_free_nodedata(pgdat);
	return;
}

226
/* add this memory to iomem resource */
227
static struct resource *register_memory_resource(u64 start, u64 size)
228 229 230 231 232 233 234 235 236 237 238 239 240
{
	struct resource *res;
	res = kzalloc(sizeof(struct resource), GFP_KERNEL);
	BUG_ON(!res);

	res->name = "System RAM";
	res->start = start;
	res->end = start + size - 1;
	res->flags = IORESOURCE_MEM;
	if (request_resource(&iomem_resource, res) < 0) {
		printk("System RAM resource %llx - %llx cannot be added\n",
		(unsigned long long)res->start, (unsigned long long)res->end);
		kfree(res);
241
		res = NULL;
242
	}
243 244 245 246 247 248 249 250 251 252
	return res;
}

static void release_memory_resource(struct resource *res)
{
	if (!res)
		return;
	release_resource(res);
	kfree(res);
	return;
253 254 255 256
}



257 258
int add_memory(int nid, u64 start, u64 size)
{
259 260
	pg_data_t *pgdat = NULL;
	int new_pgdat = 0;
261
	struct resource *res;
262 263
	int ret;

264 265 266 267
	res = register_memory_resource(start, size);
	if (!res)
		return -EEXIST;

268 269 270 271 272 273 274 275 276 277
	if (!node_online(nid)) {
		pgdat = hotadd_new_pgdat(nid, start);
		if (!pgdat)
			return -ENOMEM;
		new_pgdat = 1;
		ret = kswapd_run(nid);
		if (ret)
			goto error;
	}

278 279 280
	/* call arch's memory hotadd */
	ret = arch_add_memory(nid, start, size);

281 282 283
	if (ret < 0)
		goto error;

284
	/* we online node here. we can't roll back from here. */
285 286
	node_set_online(nid);

287 288
	cpuset_track_online_nodes();

289 290 291 292 293 294 295 296 297 298
	if (new_pgdat) {
		ret = register_one_node(nid);
		/*
		 * If sysfs file of new node can't create, cpu on the node
		 * can't be hot-added. There is no rollback way now.
		 * So, check by BUG_ON() to catch it reluctantly..
		 */
		BUG_ON(ret);
	}

299 300 301 302 303
	return ret;
error:
	/* rollback pgdat allocation and others */
	if (new_pgdat)
		rollback_node_hotadd(nid, pgdat);
304 305
	if (res)
		release_memory_resource(res);
306

307 308 309
	return ret;
}
EXPORT_SYMBOL_GPL(add_memory);