memory_hotplug.c 5.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
/*
 *  linux/mm/memory_hotplug.c
 *
 *  Copyright (C)
 */

#include <linux/config.h>
#include <linux/stddef.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/interrupt.h>
#include <linux/pagemap.h>
#include <linux/bootmem.h>
#include <linux/compiler.h>
#include <linux/module.h>
#include <linux/pagevec.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/cpu.h>
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
#include <linux/highmem.h>
#include <linux/vmalloc.h>
24
#include <linux/ioport.h>
25 26 27 28 29

#include <asm/tlbflush.h>

extern void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn,
			  unsigned long size);
30
static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
31 32 33 34 35 36 37
{
	struct pglist_data *pgdat = zone->zone_pgdat;
	int nr_pages = PAGES_PER_SECTION;
	int nid = pgdat->node_id;
	int zone_type;

	zone_type = zone - pgdat->node_zones;
38 39 40 41 42 43
	if (!populated_zone(zone)) {
		int ret = 0;
		ret = init_currently_empty_zone(zone, phys_start_pfn, nr_pages);
		if (ret < 0)
			return ret;
	}
44 45
	memmap_init_zone(nr_pages, nid, zone_type, phys_start_pfn);
	zonetable_add(zone, nid, zone_type, phys_start_pfn, nr_pages);
46
	return 0;
47 48
}

49 50
extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
				  int nr_pages);
51 52 53 54 55
static int __add_section(struct zone *zone, unsigned long phys_start_pfn)
{
	int nr_pages = PAGES_PER_SECTION;
	int ret;

56
	ret = sparse_add_one_section(zone, phys_start_pfn, nr_pages);
57 58 59 60

	if (ret < 0)
		return ret;

61 62 63 64 65
	ret = __add_zone(zone, phys_start_pfn);

	if (ret < 0)
		return ret;

66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
	return register_new_memory(__pfn_to_section(phys_start_pfn));
}

/*
 * Reasonably generic function for adding memory.  It is
 * expected that archs that support memory hotplug will
 * call this function after deciding the zone to which to
 * add the new pages.
 */
int __add_pages(struct zone *zone, unsigned long phys_start_pfn,
		 unsigned long nr_pages)
{
	unsigned long i;
	int err = 0;

	for (i = 0; i < nr_pages; i += PAGES_PER_SECTION) {
		err = __add_section(zone, phys_start_pfn + i);

84 85 86 87
		/* We want to keep adding the rest of the
		 * sections if the first ones already exist
		 */
		if (err && (err != -EEXIST))
88 89 90 91 92
			break;
	}

	return err;
}
93
EXPORT_SYMBOL_GPL(__add_pages);
94 95 96 97 98 99 100 101 102 103 104 105

static void grow_zone_span(struct zone *zone,
		unsigned long start_pfn, unsigned long end_pfn)
{
	unsigned long old_zone_end_pfn;

	zone_span_writelock(zone);

	old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
	if (start_pfn < zone->zone_start_pfn)
		zone->zone_start_pfn = start_pfn;

106 107
	zone->spanned_pages = max(old_zone_end_pfn, end_pfn) -
				zone->zone_start_pfn;
108 109 110 111 112 113 114 115 116 117 118 119 120

	zone_span_writeunlock(zone);
}

static void grow_pgdat_span(struct pglist_data *pgdat,
		unsigned long start_pfn, unsigned long end_pfn)
{
	unsigned long old_pgdat_end_pfn =
		pgdat->node_start_pfn + pgdat->node_spanned_pages;

	if (start_pfn < pgdat->node_start_pfn)
		pgdat->node_start_pfn = start_pfn;

121 122
	pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) -
					pgdat->node_start_pfn;
123 124 125 126 127 128 129 130
}

int online_pages(unsigned long pfn, unsigned long nr_pages)
{
	unsigned long i;
	unsigned long flags;
	unsigned long onlined_pages = 0;
	struct zone *zone;
131
	int need_zonelists_rebuild = 0;
132 133 134 135 136 137 138 139 140 141 142 143

	/*
	 * This doesn't need a lock to do pfn_to_page().
	 * The section can't be removed here because of the
	 * memory_block->state_sem.
	 */
	zone = page_zone(pfn_to_page(pfn));
	pgdat_resize_lock(zone->zone_pgdat, &flags);
	grow_zone_span(zone, pfn, pfn + nr_pages);
	grow_pgdat_span(zone->zone_pgdat, pfn, pfn + nr_pages);
	pgdat_resize_unlock(zone->zone_pgdat, &flags);

144 145 146 147 148 149 150 151
	/*
	 * If this zone is not populated, then it is not in zonelist.
	 * This means the page allocator ignores this zone.
	 * So, zonelist must be updated after online.
	 */
	if (!populated_zone(zone))
		need_zonelists_rebuild = 1;

152 153 154 155 156 157
	for (i = 0; i < nr_pages; i++) {
		struct page *page = pfn_to_page(pfn + i);
		online_page(page);
		onlined_pages++;
	}
	zone->present_pages += onlined_pages;
158
	zone->zone_pgdat->node_present_pages += onlined_pages;
159

160 161
	setup_per_zone_pages_min();

162 163
	if (need_zonelists_rebuild)
		build_all_zonelists();
164
	vm_total_pages = nr_free_pagecache_pages();
165 166
	return 0;
}
167

168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
static pg_data_t *hotadd_new_pgdat(int nid, u64 start)
{
	struct pglist_data *pgdat;
	unsigned long zones_size[MAX_NR_ZONES] = {0};
	unsigned long zholes_size[MAX_NR_ZONES] = {0};
	unsigned long start_pfn = start >> PAGE_SHIFT;

	pgdat = arch_alloc_nodedata(nid);
	if (!pgdat)
		return NULL;

	arch_refresh_nodedata(nid, pgdat);

	/* we can use NODE_DATA(nid) from here */

	/* init node's zones as empty zones, we don't have any present pages.*/
	free_area_init_node(nid, pgdat, zones_size, start_pfn, zholes_size);

	return pgdat;
}

static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
{
	arch_refresh_nodedata(nid, NULL);
	arch_free_nodedata(pgdat);
	return;
}

196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
/* add this memory to iomem resource */
static void register_memory_resource(u64 start, u64 size)
{
	struct resource *res;

	res = kzalloc(sizeof(struct resource), GFP_KERNEL);
	BUG_ON(!res);

	res->name = "System RAM";
	res->start = start;
	res->end = start + size - 1;
	res->flags = IORESOURCE_MEM;
	if (request_resource(&iomem_resource, res) < 0) {
		printk("System RAM resource %llx - %llx cannot be added\n",
		(unsigned long long)res->start, (unsigned long long)res->end);
		kfree(res);
	}
}



217 218
int add_memory(int nid, u64 start, u64 size)
{
219 220
	pg_data_t *pgdat = NULL;
	int new_pgdat = 0;
221 222
	int ret;

223 224 225 226 227 228 229 230 231 232
	if (!node_online(nid)) {
		pgdat = hotadd_new_pgdat(nid, start);
		if (!pgdat)
			return -ENOMEM;
		new_pgdat = 1;
		ret = kswapd_run(nid);
		if (ret)
			goto error;
	}

233 234 235
	/* call arch's memory hotadd */
	ret = arch_add_memory(nid, start, size);

236 237 238 239 240 241
	if (ret < 0)
		goto error;

	/* we online node here. we have no error path from here. */
	node_set_online(nid);

242 243 244
	/* register this memory as resource */
	register_memory_resource(start, size);

245 246 247 248 249 250
	return ret;
error:
	/* rollback pgdat allocation and others */
	if (new_pgdat)
		rollback_node_hotadd(nid, pgdat);

251 252 253
	return ret;
}
EXPORT_SYMBOL_GPL(add_memory);