swsusp.c 26.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3
/*
 * linux/kernel/power/swsusp.c
 *
4
 * This file provides code to write suspend image to swap and read it back.
L
Linus Torvalds 已提交
5 6
 *
 * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
7
 * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz>
L
Linus Torvalds 已提交
8 9 10 11
 *
 * This file is released under the GPLv2.
 *
 * I'd like to thank the following people for their work:
12
 *
L
Linus Torvalds 已提交
13 14 15 16
 * Pavel Machek <pavel@ucw.cz>:
 * Modifications, defectiveness pointing, being with me at the very beginning,
 * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
 *
17
 * Steve Doddi <dirk@loth.demon.co.uk>:
L
Linus Torvalds 已提交
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
 * Support the possibility of hardware state restoring.
 *
 * Raph <grey.havens@earthling.net>:
 * Support for preserving states of network devices and virtual console
 * (including X and svgatextmode)
 *
 * Kurt Garloff <garloff@suse.de>:
 * Straightened the critical function in order to prevent compilers from
 * playing tricks with local variables.
 *
 * Andreas Mohr <a.mohr@mailto.de>
 *
 * Alex Badea <vampire@go.ro>:
 * Fixed runaway init
 *
33 34 35
 * Rafael J. Wysocki <rjw@sisk.pl>
 * Added the swap map data structure and reworked the handling of swap
 *
L
Linus Torvalds 已提交
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 * More state savers are welcome. Especially for the scsi layer...
 *
 * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
 */

#include <linux/module.h>
#include <linux/mm.h>
#include <linux/suspend.h>
#include <linux/smp_lock.h>
#include <linux/file.h>
#include <linux/utsname.h>
#include <linux/version.h>
#include <linux/delay.h>
#include <linux/bitops.h>
#include <linux/spinlock.h>
#include <linux/genhd.h>
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/swap.h>
#include <linux/pm.h>
#include <linux/device.h>
#include <linux/buffer_head.h>
#include <linux/swapops.h>
#include <linux/bootmem.h>
#include <linux/syscalls.h>
#include <linux/highmem.h>
#include <linux/bio.h>

#include <asm/uaccess.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/io.h>

#include "power.h"

72
/*
73
 * Preferred image size in bytes (tunable via /sys/power/image_size).
74
 * When it is set to N, swsusp will do its best to ensure the image
75
 * size will not exceed N bytes, but if that is impossible, it will
76 77
 * try to create the smallest image possible.
 */
78
unsigned long image_size = 500 * 1024 * 1024;
79

80
#ifdef CONFIG_HIGHMEM
81
unsigned int count_highmem_pages(void);
82 83 84 85 86
int save_highmem(void);
int restore_highmem(void);
#else
static int save_highmem(void) { return 0; }
static int restore_highmem(void) { return 0; }
87
static unsigned int count_highmem_pages(void) { return 0; }
88 89
#endif

L
Linus Torvalds 已提交
90 91 92 93 94
extern char resume_file[];

#define SWSUSP_SIG	"S1SUSPEND"

static struct swsusp_header {
95
	char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)];
96
	swp_entry_t image;
L
Linus Torvalds 已提交
97 98 99 100 101 102 103 104 105 106
	char	orig_sig[10];
	char	sig[10];
} __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header;

static struct swsusp_info swsusp_info;

/*
 * Saving part...
 */

107
static unsigned short root_swap = 0xffff;
L
Linus Torvalds 已提交
108

109
static int mark_swapfiles(swp_entry_t start)
L
Linus Torvalds 已提交
110 111 112
{
	int error;

113
	rw_swap_page_sync(READ,
L
Linus Torvalds 已提交
114 115 116 117 118 119
			  swp_entry(root_swap, 0),
			  virt_to_page((unsigned long)&swsusp_header));
	if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
	    !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
		memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
		memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
120
		swsusp_header.image = start;
121
		error = rw_swap_page_sync(WRITE,
L
Linus Torvalds 已提交
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
					  swp_entry(root_swap, 0),
					  virt_to_page((unsigned long)
						       &swsusp_header));
	} else {
		pr_debug("swsusp: Partition is not swap space.\n");
		error = -ENODEV;
	}
	return error;
}

/*
 * Check whether the swap device is the specified resume
 * device, irrespective of whether they are specified by
 * identical names.
 *
 * (Thus, device inode aliasing is allowed.  You can say /dev/hda4
 * instead of /dev/ide/host0/bus0/target0/lun0/part4 [if using devfs]
 * and they'll be considered the same device.  This is *necessary* for
 * devfs, since the resume code can only recognize the form /dev/hda4,
 * but the suspend code would see the long name.)
 */
143
static inline int is_resume_device(const struct swap_info_struct *swap_info)
L
Linus Torvalds 已提交
144 145 146 147 148 149 150 151 152 153 154 155
{
	struct file *file = swap_info->swap_file;
	struct inode *inode = file->f_dentry->d_inode;

	return S_ISBLK(inode->i_mode) &&
		swsusp_resume_device == MKDEV(imajor(inode), iminor(inode));
}

static int swsusp_swap_check(void) /* This is called before saving image */
{
	int i;

156 157
	if (!swsusp_resume_device)
		return -ENODEV;
158
	spin_lock(&swap_lock);
159 160 161
	for (i = 0; i < MAX_SWAPFILES; i++) {
		if (!(swap_info[i].flags & SWP_WRITEOK))
			continue;
162
		if (is_resume_device(swap_info + i)) {
163 164 165
			spin_unlock(&swap_lock);
			root_swap = i;
			return 0;
L
Linus Torvalds 已提交
166
		}
167
	}
168
	spin_unlock(&swap_lock);
169
	return -ENODEV;
L
Linus Torvalds 已提交
170 171 172
}

/**
P
Pavel Machek 已提交
173
 *	write_page - Write one page to a fresh swap location.
L
Linus Torvalds 已提交
174 175 176 177
 *	@addr:	Address we're writing.
 *	@loc:	Place to store the entry we used.
 *
 *	Allocate a new swap entry and 'sync' it. Note we discard -EIO
178
 *	errors. That is an artifact left over from swsusp. It did not
L
Linus Torvalds 已提交
179 180 181 182 183
 *	check the return of rw_swap_page_sync() at all, since most pages
 *	written back to swap would return -EIO.
 *	This is a partial improvement, since we will at least return other
 *	errors, though we need to eventually fix the damn code.
 */
P
Pavel Machek 已提交
184
static int write_page(unsigned long addr, swp_entry_t *loc)
L
Linus Torvalds 已提交
185 186
{
	swp_entry_t entry;
187
	int error = -ENOSPC;
L
Linus Torvalds 已提交
188

189 190 191 192
	entry = get_swap_page_of_type(root_swap);
	if (swp_offset(entry)) {
		error = rw_swap_page_sync(WRITE, entry, virt_to_page(addr));
		if (!error || error == -EIO)
L
Linus Torvalds 已提交
193
			*loc = entry;
194
	}
L
Linus Torvalds 已提交
195 196 197 198
	return error;
}

/**
199 200 201 202 203 204 205
 *	Swap map-handling functions
 *
 *	The swap map is a data structure used for keeping track of each page
 *	written to the swap.  It consists of many swap_map_page structures
 *	that contain each an array of MAP_PAGE_SIZE swap entries.
 *	These structures are linked together with the help of either the
 *	.next (in memory) or the .next_swap (in swap) member.
L
Linus Torvalds 已提交
206
 *
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
 *	The swap map is created during suspend.  At that time we need to keep
 *	it in memory, because we have to free all of the allocated swap
 *	entries if an error occurs.  The memory needed is preallocated
 *	so that we know in advance if there's enough of it.
 *
 *	The first swap_map_page structure is filled with the swap entries that
 *	correspond to the first MAP_PAGE_SIZE data pages written to swap and
 *	so on.  After the all of the data pages have been written, the order
 *	of the swap_map_page structures in the map is reversed so that they
 *	can be read from swap in the original order.  This causes the data
 *	pages to be loaded in exactly the same order in which they have been
 *	saved.
 *
 *	During resume we only need to use one swap_map_page structure
 *	at a time, which means that we only need to use two memory pages for
 *	reading the image - one for reading the swap_map_page structures
 *	and the second for reading the data pages from swap.
L
Linus Torvalds 已提交
224
 */
225 226 227 228 229 230 231 232 233 234 235

#define MAP_PAGE_SIZE	((PAGE_SIZE - sizeof(swp_entry_t) - sizeof(void *)) \
			/ sizeof(swp_entry_t))

struct swap_map_page {
	swp_entry_t		entries[MAP_PAGE_SIZE];
	swp_entry_t		next_swap;
	struct swap_map_page	*next;
};

static inline void free_swap_map(struct swap_map_page *swap_map)
L
Linus Torvalds 已提交
236
{
237
	struct swap_map_page *swp;
L
Linus Torvalds 已提交
238

239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
	while (swap_map) {
		swp = swap_map->next;
		free_page((unsigned long)swap_map);
		swap_map = swp;
	}
}

static struct swap_map_page *alloc_swap_map(unsigned int nr_pages)
{
	struct swap_map_page *swap_map, *swp;
	unsigned n = 0;

	if (!nr_pages)
		return NULL;

	pr_debug("alloc_swap_map(): nr_pages = %d\n", nr_pages);
	swap_map = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
	swp = swap_map;
	for (n = MAP_PAGE_SIZE; n < nr_pages; n += MAP_PAGE_SIZE) {
		swp->next = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
		swp = swp->next;
		if (!swp) {
			free_swap_map(swap_map);
			return NULL;
		}
L
Linus Torvalds 已提交
264
	}
265
	return swap_map;
L
Linus Torvalds 已提交
266 267 268
}

/**
269 270
 *	reverse_swap_map - reverse the order of pages in the swap map
 *	@swap_map
L
Linus Torvalds 已提交
271
 */
272 273

static inline struct swap_map_page *reverse_swap_map(struct swap_map_page *swap_map)
L
Linus Torvalds 已提交
274
{
275 276 277 278 279 280 281 282 283 284 285
	struct swap_map_page *prev, *next;

	prev = NULL;
	while (swap_map) {
		next = swap_map->next;
		swap_map->next = prev;
		prev = swap_map;
		swap_map = next;
	}
	return prev;
}
L
Linus Torvalds 已提交
286

287 288 289 290 291 292 293 294 295 296 297 298
/**
 *	free_swap_map_entries - free the swap entries allocated to store
 *	the swap map @swap_map (this is only called in case of an error)
 */
static inline void free_swap_map_entries(struct swap_map_page *swap_map)
{
	while (swap_map) {
		if (swap_map->next_swap.val)
			swap_free(swap_map->next_swap);
		swap_map = swap_map->next;
	}
}
L
Linus Torvalds 已提交
299

300 301 302 303 304 305 306 307 308 309 310 311 312
/**
 *	save_swap_map - save the swap map used for tracing the data pages
 *	stored in the swap
 */

static int save_swap_map(struct swap_map_page *swap_map, swp_entry_t *start)
{
	swp_entry_t entry = (swp_entry_t){0};
	int error;

	while (swap_map) {
		swap_map->next_swap = entry;
		if ((error = write_page((unsigned long)swap_map, &entry)))
L
Linus Torvalds 已提交
313
			return error;
314
		swap_map = swap_map->next;
L
Linus Torvalds 已提交
315
	}
316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
	*start = entry;
	return 0;
}

/**
 *	free_image_entries - free the swap entries allocated to store
 *	the image data pages (this is only called in case of an error)
 */

static inline void free_image_entries(struct swap_map_page *swp)
{
	unsigned k;

	while (swp) {
		for (k = 0; k < MAP_PAGE_SIZE; k++)
			if (swp->entries[k].val)
				swap_free(swp->entries[k]);
		swp = swp->next;
	}
}

/**
 *	The swap_map_handle structure is used for handling the swap map in
 *	a file-alike way
 */

struct swap_map_handle {
	struct swap_map_page *cur;
	unsigned int k;
};

static inline void init_swap_map_handle(struct swap_map_handle *handle,
                                        struct swap_map_page *map)
{
	handle->cur = map;
	handle->k = 0;
}

static inline int swap_map_write_page(struct swap_map_handle *handle,
                                      unsigned long addr)
{
	int error;

	error = write_page(addr, handle->cur->entries + handle->k);
	if (error)
		return error;
	if (++handle->k >= MAP_PAGE_SIZE) {
		handle->cur = handle->cur->next;
		handle->k = 0;
	}
	return 0;
}

/**
 *	save_image_data - save the data pages pointed to by the PBEs
 *	from the list @pblist using the swap map handle @handle
 *	(assume there are @nr_pages data pages to save)
 */

static int save_image_data(struct pbe *pblist,
                           struct swap_map_handle *handle,
                           unsigned int nr_pages)
{
	unsigned int m;
	struct pbe *p;
	int error = 0;

	printk("Saving image data pages (%u pages) ...     ", nr_pages);
	m = nr_pages / 100;
	if (!m)
		m = 1;
	nr_pages = 0;
	for_each_pbe (p, pblist) {
		error = swap_map_write_page(handle, p->address);
		if (error)
			break;
		if (!(nr_pages % m))
			printk("\b\b\b\b%3d%%", nr_pages / m);
		nr_pages++;
	}
	if (!error)
		printk("\b\b\b\bdone\n");
L
Linus Torvalds 已提交
398 399 400 401 402 403 404 405 406 407 408 409 410 411 412
	return error;
}

static void dump_info(void)
{
	pr_debug(" swsusp: Version: %u\n",swsusp_info.version_code);
	pr_debug(" swsusp: Num Pages: %ld\n",swsusp_info.num_physpages);
	pr_debug(" swsusp: UTS Sys: %s\n",swsusp_info.uts.sysname);
	pr_debug(" swsusp: UTS Node: %s\n",swsusp_info.uts.nodename);
	pr_debug(" swsusp: UTS Release: %s\n",swsusp_info.uts.release);
	pr_debug(" swsusp: UTS Version: %s\n",swsusp_info.uts.version);
	pr_debug(" swsusp: UTS Machine: %s\n",swsusp_info.uts.machine);
	pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info.uts.domainname);
	pr_debug(" swsusp: CPUs: %d\n",swsusp_info.cpus);
	pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info.image_pages);
413
	pr_debug(" swsusp: Total: %ld Pages\n", swsusp_info.pages);
L
Linus Torvalds 已提交
414 415
}

416
static void init_header(unsigned int nr_pages)
L
Linus Torvalds 已提交
417 418 419 420 421 422 423
{
	memset(&swsusp_info, 0, sizeof(swsusp_info));
	swsusp_info.version_code = LINUX_VERSION_CODE;
	swsusp_info.num_physpages = num_physpages;
	memcpy(&swsusp_info.uts, &system_utsname, sizeof(system_utsname));

	swsusp_info.cpus = num_online_cpus();
424 425
	swsusp_info.image_pages = nr_pages;
	swsusp_info.pages = nr_pages +
426
		((nr_pages * sizeof(long) + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
L
Linus Torvalds 已提交
427 428 429
}

/**
430 431
 *	pack_orig_addresses - the .orig_address fields of the PBEs from the
 *	list starting at @pbe are stored in the array @buf[] (1 page)
L
Linus Torvalds 已提交
432 433
 */

434 435
static inline struct pbe *pack_orig_addresses(unsigned long *buf,
                                              struct pbe *pbe)
L
Linus Torvalds 已提交
436
{
437
	int j;
L
Linus Torvalds 已提交
438

439 440 441 442 443 444 445 446
	for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
		buf[j] = pbe->orig_address;
		pbe = pbe->next;
	}
	if (!pbe)
		for (; j < PAGE_SIZE / sizeof(long); j++)
			buf[j] = 0;
	return pbe;
L
Linus Torvalds 已提交
447 448 449
}

/**
450 451
 *	save_image_metadata - save the .orig_address fields of the PBEs
 *	from the list @pblist using the swap map handle @handle
L
Linus Torvalds 已提交
452 453
 */

454 455
static int save_image_metadata(struct pbe *pblist,
                               struct swap_map_handle *handle)
L
Linus Torvalds 已提交
456
{
457
	unsigned long *buf;
P
Pavel Machek 已提交
458
	unsigned int n = 0;
459 460
	struct pbe *p;
	int error = 0;
L
Linus Torvalds 已提交
461

462 463 464 465 466 467 468 469 470 471 472
	printk("Saving image metadata ... ");
	buf = (unsigned long *)get_zeroed_page(GFP_ATOMIC);
	if (!buf)
		return -ENOMEM;
	p = pblist;
	while (p) {
		p = pack_orig_addresses(buf, p);
		error = swap_map_write_page(handle, (unsigned long)buf);
		if (error)
			break;
		n++;
L
Linus Torvalds 已提交
473
	}
474 475 476
	free_page((unsigned long)buf);
	if (!error)
		printk("done (%u pages saved)\n", n);
L
Linus Torvalds 已提交
477 478 479
	return error;
}

480 481 482 483
/**
 *	enough_swap - Make sure we have enough swap to save the image.
 *
 *	Returns TRUE or FALSE after checking the total amount of swap
484
 *	space avaiable from the resume partition.
485 486 487 488
 */

static int enough_swap(unsigned int nr_pages)
{
489 490
	unsigned int free_swap = swap_info[root_swap].pages -
		swap_info[root_swap].inuse_pages;
491

492 493
	pr_debug("swsusp: free swap pages: %u\n", free_swap);
	return free_swap > (nr_pages + PAGES_FOR_IO +
494 495 496
		(nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
}

L
Linus Torvalds 已提交
497
/**
498 499 500 501 502 503
 *	swsusp_write - Write entire image and metadata.
 *
 *	It is important _NOT_ to umount filesystems at this point. We want
 *	them synced (in case something goes wrong) but we DO not want to mark
 *	filesystem clean: it is not. (And it does not matter, if we resume
 *	correctly, we'll mark system clean, anyway.)
L
Linus Torvalds 已提交
504
 */
505 506

int swsusp_write(struct pbe *pblist, unsigned int nr_pages)
L
Linus Torvalds 已提交
507
{
508 509
	struct swap_map_page *swap_map;
	struct swap_map_handle handle;
510
	swp_entry_t start;
L
Linus Torvalds 已提交
511 512
	int error;

513 514 515 516
	if ((error = swsusp_swap_check())) {
		printk(KERN_ERR "swsusp: Cannot find swap device, try swapon -a.\n");
		return error;
	}
517
	if (!enough_swap(nr_pages)) {
518 519 520 521
		printk(KERN_ERR "swsusp: Not enough free swap\n");
		return -ENOSPC;
	}

522 523 524 525 526
	init_header(nr_pages);
	swap_map = alloc_swap_map(swsusp_info.pages);
	if (!swap_map)
		return -ENOMEM;
	init_swap_map_handle(&handle, swap_map);
L
Linus Torvalds 已提交
527

528 529 530
	error = swap_map_write_page(&handle, (unsigned long)&swsusp_info);
	if (!error)
		error = save_image_metadata(pblist, &handle);
531 532 533 534
	if (!error)
		error = save_image_data(pblist, &handle, nr_pages);
	if (error)
		goto Free_image_entries;
L
Linus Torvalds 已提交
535

536
	swap_map = reverse_swap_map(swap_map);
537
	error = save_swap_map(swap_map, &start);
538 539 540
	if (error)
		goto Free_map_entries;

541 542 543 544
	dump_info();
	printk( "S" );
	error = mark_swapfiles(start);
	printk( "|\n" );
545 546 547 548 549
	if (error)
		goto Free_map_entries;

Free_swap_map:
	free_swap_map(swap_map);
L
Linus Torvalds 已提交
550
	return error;
551 552 553 554 555 556

Free_map_entries:
	free_swap_map_entries(swap_map);
Free_image_entries:
	free_image_entries(swap_map);
	goto Free_swap_map;
L
Linus Torvalds 已提交
557 558
}

559 560 561 562 563 564 565 566 567 568 569 570 571
/**
 *	swsusp_shrink_memory -  Try to free as much memory as needed
 *
 *	... but do not OOM-kill anyone
 *
 *	Notice: all userland should be stopped before it is called, or
 *	livelock is possible.
 */

#define SHRINK_BITE	10000

int swsusp_shrink_memory(void)
{
572
	long size, tmp;
573 574 575 576 577 578 579
	struct zone *zone;
	unsigned long pages = 0;
	unsigned int i = 0;
	char *p = "-\\|/";

	printk("Shrinking memory...  ");
	do {
580 581 582
		size = 2 * count_highmem_pages();
		size += size / 50 + count_data_pages();
		size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE +
583
			PAGES_FOR_IO;
584
		tmp = size;
585 586 587 588 589 590 591 592
		for_each_zone (zone)
			if (!is_highmem(zone))
				tmp -= zone->free_pages;
		if (tmp > 0) {
			tmp = shrink_all_memory(SHRINK_BITE);
			if (!tmp)
				return -ENOMEM;
			pages += tmp;
593
		} else if (size > image_size / PAGE_SIZE) {
594 595
			tmp = shrink_all_memory(SHRINK_BITE);
			pages += tmp;
596 597 598 599 600 601 602 603
		}
		printk("\b%c", p[i++%4]);
	} while (tmp > 0);
	printk("\bdone (%lu pages freed)\n", pages);

	return 0;
}

L
Linus Torvalds 已提交
604 605 606
int swsusp_suspend(void)
{
	int error;
607

L
Linus Torvalds 已提交
608 609 610 611 612 613 614 615 616 617
	if ((error = arch_prepare_suspend()))
		return error;
	local_irq_disable();
	/* At this point, device_suspend() has been called, but *not*
	 * device_power_down(). We *must* device_power_down() now.
	 * Otherwise, drivers for some devices (e.g. interrupt controllers)
	 * become desynchronized with the actual state of the hardware
	 * at resume time, and evil weirdness ensues.
	 */
	if ((error = device_power_down(PMSG_FREEZE))) {
618
		printk(KERN_ERR "Some devices failed to power down, aborting suspend\n");
619
		goto Enable_irqs;
L
Linus Torvalds 已提交
620
	}
621

622 623 624
	if ((error = save_highmem())) {
		printk(KERN_ERR "swsusp: Not enough free pages for highmem\n");
		goto Restore_highmem;
625 626
	}

L
Linus Torvalds 已提交
627 628
	save_processor_state();
	if ((error = swsusp_arch_suspend()))
629
		printk(KERN_ERR "Error %d suspending\n", error);
L
Linus Torvalds 已提交
630 631
	/* Restore control flow magically appears here */
	restore_processor_state();
632
Restore_highmem:
L
Linus Torvalds 已提交
633 634
	restore_highmem();
	device_power_up();
635
Enable_irqs:
L
Linus Torvalds 已提交
636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652
	local_irq_enable();
	return error;
}

int swsusp_resume(void)
{
	int error;
	local_irq_disable();
	if (device_power_down(PMSG_FREEZE))
		printk(KERN_ERR "Some devices failed to power down, very bad\n");
	/* We'll ignore saved state, but this gets preempt count (etc) right */
	save_processor_state();
	error = swsusp_arch_resume();
	/* Code below is only ever reached in case of failure. Otherwise
	 * execution continues at place where swsusp_arch_suspend was called
         */
	BUG_ON(!error);
653 654 655 656 657
	/* The only reason why swsusp_arch_resume() can fail is memory being
	 * very tight, so we have to free it as soon as we can to avoid
	 * subsequent failures
	 */
	swsusp_free();
L
Linus Torvalds 已提交
658 659
	restore_processor_state();
	restore_highmem();
I
Ingo Molnar 已提交
660
	touch_softlockup_watchdog();
L
Linus Torvalds 已提交
661 662 663 664 665 666
	device_power_up();
	local_irq_enable();
	return error;
}

/**
667 668 669
 *	mark_unsafe_pages - mark the pages that cannot be used for storing
 *	the image during resume, because they conflict with the pages that
 *	had been used before suspend
L
Linus Torvalds 已提交
670 671
 */

672
static void mark_unsafe_pages(struct pbe *pblist)
L
Linus Torvalds 已提交
673 674 675
{
	struct zone *zone;
	unsigned long zone_pfn;
676
	struct pbe *p;
L
Linus Torvalds 已提交
677 678

	if (!pblist) /* a sanity check */
679
		return;
L
Linus Torvalds 已提交
680

681
	/* Clear page flags */
682
	for_each_zone (zone) {
683 684 685
		for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
			if (pfn_valid(zone_pfn + zone->zone_start_pfn))
				ClearPageNosaveFree(pfn_to_page(zone_pfn +
L
Linus Torvalds 已提交
686 687 688
					zone->zone_start_pfn));
	}

689
	/* Mark orig addresses */
L
Linus Torvalds 已提交
690
	for_each_pbe (p, pblist)
691
		SetPageNosaveFree(virt_to_page(p->orig_address));
L
Linus Torvalds 已提交
692

693
}
L
Linus Torvalds 已提交
694

695 696 697 698 699 700 701
static void copy_page_backup_list(struct pbe *dst, struct pbe *src)
{
	/* We assume both lists contain the same number of elements */
	while (src) {
		dst->orig_address = src->orig_address;
		dst = dst->next;
		src = src->next;
702
	}
L
Linus Torvalds 已提交
703 704
}

705
/*
L
Linus Torvalds 已提交
706 707 708 709 710 711 712 713 714 715
 *	Using bio to read from swap.
 *	This code requires a bit more work than just using buffer heads
 *	but, it is the recommended way for 2.5/2.6.
 *	The following are to signal the beginning and end of I/O. Bios
 *	finish asynchronously, while we want them to happen synchronously.
 *	A simple atomic_t, and a wait loop take care of this problem.
 */

static atomic_t io_done = ATOMIC_INIT(0);

P
Pavel Machek 已提交
716
static int end_io(struct bio *bio, unsigned int num, int err)
L
Linus Torvalds 已提交
717 718 719 720 721 722 723
{
	if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
		panic("I/O error reading memory image");
	atomic_set(&io_done, 0);
	return 0;
}

P
Pavel Machek 已提交
724
static struct block_device *resume_bdev;
L
Linus Torvalds 已提交
725 726 727 728 729 730 731 732 733 734 735 736

/**
 *	submit - submit BIO request.
 *	@rw:	READ or WRITE.
 *	@off	physical offset of page.
 *	@page:	page we're reading or writing.
 *
 *	Straight from the textbook - allocate and initialize the bio.
 *	If we're writing, make sure the page is marked as dirty.
 *	Then submit it and wait.
 */

P
Pavel Machek 已提交
737
static int submit(int rw, pgoff_t page_off, void *page)
L
Linus Torvalds 已提交
738 739
{
	int error = 0;
P
Pavel Machek 已提交
740
	struct bio *bio;
L
Linus Torvalds 已提交
741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759

	bio = bio_alloc(GFP_ATOMIC, 1);
	if (!bio)
		return -ENOMEM;
	bio->bi_sector = page_off * (PAGE_SIZE >> 9);
	bio->bi_bdev = resume_bdev;
	bio->bi_end_io = end_io;

	if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) {
		printk("swsusp: ERROR: adding page to bio at %ld\n",page_off);
		error = -EFAULT;
		goto Done;
	}


	atomic_set(&io_done, 1);
	submit_bio(rw | (1 << BIO_RW_SYNC), bio);
	while (atomic_read(&io_done))
		yield();
760 761
	if (rw == READ)
		bio_set_pages_dirty(bio);
L
Linus Torvalds 已提交
762 763 764 765 766
 Done:
	bio_put(bio);
	return error;
}

P
Pavel Machek 已提交
767
static int bio_read_page(pgoff_t page_off, void *page)
L
Linus Torvalds 已提交
768 769 770 771
{
	return submit(READ, page_off, page);
}

P
Pavel Machek 已提交
772
static int bio_write_page(pgoff_t page_off, void *page)
L
Linus Torvalds 已提交
773 774 775 776
{
	return submit(WRITE, page_off, page);
}

777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831
/**
 *	The following functions allow us to read data using a swap map
 *	in a file-alike way
 */

static inline void release_swap_map_reader(struct swap_map_handle *handle)
{
	if (handle->cur)
		free_page((unsigned long)handle->cur);
	handle->cur = NULL;
}

static inline int get_swap_map_reader(struct swap_map_handle *handle,
                                      swp_entry_t start)
{
	int error;

	if (!swp_offset(start))
		return -EINVAL;
	handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
	if (!handle->cur)
		return -ENOMEM;
	error = bio_read_page(swp_offset(start), handle->cur);
	if (error) {
		release_swap_map_reader(handle);
		return error;
	}
	handle->k = 0;
	return 0;
}

static inline int swap_map_read_page(struct swap_map_handle *handle, void *buf)
{
	unsigned long offset;
	int error;

	if (!handle->cur)
		return -EINVAL;
	offset = swp_offset(handle->cur->entries[handle->k]);
	if (!offset)
		return -EINVAL;
	error = bio_read_page(offset, buf);
	if (error)
		return error;
	if (++handle->k >= MAP_PAGE_SIZE) {
		handle->k = 0;
		offset = swp_offset(handle->cur->next_swap);
		if (!offset)
			release_swap_map_reader(handle);
		else
			error = bio_read_page(offset, handle->cur);
	}
	return error;
}

832
static int check_header(void)
L
Linus Torvalds 已提交
833
{
834 835
	char *reason = NULL;

L
Linus Torvalds 已提交
836
	dump_info();
837
	if (swsusp_info.version_code != LINUX_VERSION_CODE)
838
		reason = "kernel version";
839
	if (swsusp_info.num_physpages != num_physpages)
840
		reason = "memory size";
L
Linus Torvalds 已提交
841
	if (strcmp(swsusp_info.uts.sysname,system_utsname.sysname))
842
		reason = "system type";
L
Linus Torvalds 已提交
843
	if (strcmp(swsusp_info.uts.release,system_utsname.release))
844
		reason = "kernel release";
L
Linus Torvalds 已提交
845
	if (strcmp(swsusp_info.uts.version,system_utsname.version))
846
		reason = "version";
L
Linus Torvalds 已提交
847
	if (strcmp(swsusp_info.uts.machine,system_utsname.machine))
848 849 850
		reason = "machine";
	if (reason) {
		printk(KERN_ERR "swsusp: Resume mismatch: %s\n", reason);
L
Linus Torvalds 已提交
851 852
		return -EPERM;
	}
853
	return 0;
L
Linus Torvalds 已提交
854 855 856
}

/**
857 858 859
 *	load_image_data - load the image data using the swap map handle
 *	@handle and store them using the page backup list @pblist
 *	(assume there are @nr_pages pages to load)
L
Linus Torvalds 已提交
860 861
 */

862 863 864
static int load_image_data(struct pbe *pblist,
                           struct swap_map_handle *handle,
                           unsigned int nr_pages)
L
Linus Torvalds 已提交
865
{
866 867
	int error;
	unsigned int m;
P
Pavel Machek 已提交
868
	struct pbe *p;
L
Linus Torvalds 已提交
869

870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885
	if (!pblist)
		return -EINVAL;
	printk("Loading image data pages (%u pages) ...     ", nr_pages);
	m = nr_pages / 100;
	if (!m)
		m = 1;
	nr_pages = 0;
	p = pblist;
	while (p) {
		error = swap_map_read_page(handle, (void *)p->address);
		if (error)
			break;
		p = p->next;
		if (!(nr_pages % m))
			printk("\b\b\b\b%3d%%", nr_pages / m);
		nr_pages++;
L
Linus Torvalds 已提交
886
	}
887 888
	if (!error)
		printk("\b\b\b\bdone\n");
L
Linus Torvalds 已提交
889 890 891 892
	return error;
}

/**
893 894
 *	unpack_orig_addresses - copy the elements of @buf[] (1 page) to
 *	the PBEs in the list starting at @pbe
L
Linus Torvalds 已提交
895 896
 */

897 898
static inline struct pbe *unpack_orig_addresses(unsigned long *buf,
                                                struct pbe *pbe)
L
Linus Torvalds 已提交
899
{
900
	int j;
L
Linus Torvalds 已提交
901

902 903 904 905 906 907
	for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
		pbe->orig_address = buf[j];
		pbe = pbe->next;
	}
	return pbe;
}
L
Linus Torvalds 已提交
908

909 910 911 912
/**
 *	load_image_metadata - load the image metadata using the swap map
 *	handle @handle and put them into the PBEs in the list @pblist
 */
L
Linus Torvalds 已提交
913

914 915 916 917 918 919
static int load_image_metadata(struct pbe *pblist, struct swap_map_handle *handle)
{
	struct pbe *p;
	unsigned long *buf;
	unsigned int n = 0;
	int error = 0;
L
Linus Torvalds 已提交
920

921 922 923 924 925 926 927
	printk("Loading image metadata ... ");
	buf = (unsigned long *)get_zeroed_page(GFP_ATOMIC);
	if (!buf)
		return -ENOMEM;
	p = pblist;
	while (p) {
		error = swap_map_read_page(handle, buf);
L
Linus Torvalds 已提交
928 929
		if (error)
			break;
930 931
		p = unpack_orig_addresses(buf, p);
		n++;
L
Linus Torvalds 已提交
932
	}
933
	free_page((unsigned long)buf);
934
	if (!error)
935
		printk("done (%u pages loaded)\n", n);
L
Linus Torvalds 已提交
936 937 938
	return error;
}

939
int swsusp_read(struct pbe **pblist_ptr)
L
Linus Torvalds 已提交
940
{
941
	int error;
942 943
	struct pbe *p, *pblist;
	struct swap_map_handle handle;
944
	unsigned int nr_pages;
L
Linus Torvalds 已提交
945

946 947 948 949 950 951 952 953 954 955 956 957 958
	if (IS_ERR(resume_bdev)) {
		pr_debug("swsusp: block device not initialised\n");
		return PTR_ERR(resume_bdev);
	}

	error = get_swap_map_reader(&handle, swsusp_header.image);
	if (!error)
		error = swap_map_read_page(&handle, &swsusp_info);
	if (!error)
		error = check_header();
	if (error)
		return error;
	nr_pages = swsusp_info.image_pages;
959 960
	p = alloc_pagedir(nr_pages, GFP_ATOMIC, 0);
	if (!p)
L
Linus Torvalds 已提交
961
		return -ENOMEM;
962 963 964 965 966 967 968 969 970 971 972 973 974
	error = load_image_metadata(p, &handle);
	if (!error) {
		mark_unsafe_pages(p);
		pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1);
		if (pblist)
			copy_page_backup_list(pblist, p);
		free_pagedir(p);
		if (!pblist)
			error = -ENOMEM;

		/* Allocate memory for the image and read the data from swap */
		if (!error)
			error = alloc_data_pages(pblist, GFP_ATOMIC, 1);
975 976
		if (!error) {
			release_eaten_pages();
977
			error = load_image_data(pblist, &handle, nr_pages);
978
		}
979 980
		if (!error)
			*pblist_ptr = pblist;
981
	}
982
	release_swap_map_reader(&handle);
983 984 985 986 987 988 989

	blkdev_put(resume_bdev);

	if (!error)
		pr_debug("swsusp: Reading resume file was successful\n");
	else
		pr_debug("swsusp: Error %d resuming\n", error);
L
Linus Torvalds 已提交
990 991 992 993
	return error;
}

/**
994
 *      swsusp_check - Check for swsusp signature in the resume device
L
Linus Torvalds 已提交
995 996 997 998 999 1000 1001 1002 1003
 */

int swsusp_check(void)
{
	int error;

	resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
	if (!IS_ERR(resume_bdev)) {
		set_blocksize(resume_bdev, PAGE_SIZE);
1004 1005 1006 1007 1008 1009 1010 1011 1012 1013
		memset(&swsusp_header, 0, sizeof(swsusp_header));
		if ((error = bio_read_page(0, &swsusp_header)))
			return error;
		if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
			memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
			/* Reset swap signature now */
			error = bio_write_page(0, &swsusp_header);
		} else {
			return -EINVAL;
		}
L
Linus Torvalds 已提交
1014
		if (error)
1015 1016 1017 1018
			blkdev_put(resume_bdev);
		else
			pr_debug("swsusp: Signature found, resuming\n");
	} else {
L
Linus Torvalds 已提交
1019 1020 1021
		error = PTR_ERR(resume_bdev);
	}

1022 1023
	if (error)
		pr_debug("swsusp: Error %d check for resume file\n", error);
L
Linus Torvalds 已提交
1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040

	return error;
}

/**
 *	swsusp_close - close swap device.
 */

void swsusp_close(void)
{
	if (IS_ERR(resume_bdev)) {
		pr_debug("swsusp: block device not initialised\n");
		return;
	}

	blkdev_put(resume_bdev);
}