kexec.c 6.6 KB
Newer Older
1
/*
2
 * kexec.c - kexec_load system call
3 4 5 6 7 8
 * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
 *
 * This source code is licensed under the GNU General Public License,
 * Version 2.  See the file COPYING for more details.
 */

9
#include <linux/capability.h>
10 11 12
#include <linux/mm.h>
#include <linux/file.h>
#include <linux/kexec.h>
13
#include <linux/mutex.h>
14 15
#include <linux/list.h>
#include <linux/syscalls.h>
16
#include <linux/vmalloc.h>
17
#include <linux/slab.h>
18

19 20
#include "kexec_internal.h"

21 22 23
static int copy_user_segment_list(struct kimage *image,
				  unsigned long nr_segments,
				  struct kexec_segment __user *segments)
24
{
25
	int ret;
26 27 28 29 30
	size_t segment_bytes;

	/* Read in the segments */
	image->nr_segments = nr_segments;
	segment_bytes = nr_segments * sizeof(*segments);
31 32 33 34 35 36 37
	ret = copy_from_user(image->segment, segments, segment_bytes);
	if (ret)
		ret = -EFAULT;

	return ret;
}

38 39 40 41
static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
			     unsigned long nr_segments,
			     struct kexec_segment __user *segments,
			     unsigned long flags)
42
{
43
	int ret;
44
	struct kimage *image;
45 46 47 48 49 50 51
	bool kexec_on_panic = flags & KEXEC_ON_CRASH;

	if (kexec_on_panic) {
		/* Verify we have a valid entry point */
		if ((entry < crashk_res.start) || (entry > crashk_res.end))
			return -EADDRNOTAVAIL;
	}
52 53

	/* Allocate and initialize a controlling structure */
54 55 56 57 58 59
	image = do_kimage_alloc_init();
	if (!image)
		return -ENOMEM;

	image->start = entry;

60 61
	ret = copy_user_segment_list(image, nr_segments, segments);
	if (ret)
62 63
		goto out_free_image;

64 65
	ret = sanity_check_segment_list(image);
	if (ret)
66
		goto out_free_image;
M
Maneesh Soni 已提交
67

68 69 70 71 72 73
	 /* Enable the special crash kernel control page allocation policy. */
	if (kexec_on_panic) {
		image->control_page = crashk_res.start;
		image->type = KEXEC_TYPE_CRASH;
	}

74 75 76 77 78
	/*
	 * Find a location for the control code buffer, and add it
	 * the vector of segments so that it's pages will also be
	 * counted as destination pages.
	 */
79
	ret = -ENOMEM;
80
	image->control_code_page = kimage_alloc_control_pages(image,
81
					   get_order(KEXEC_CONTROL_PAGE_SIZE));
82
	if (!image->control_code_page) {
83
		pr_err("Could not allocate control_code_buffer\n");
84
		goto out_free_image;
85 86
	}

87 88 89 90 91 92
	if (!kexec_on_panic) {
		image->swap_page = kimage_alloc_control_pages(image, 0);
		if (!image->swap_page) {
			pr_err("Could not allocate swap buffer\n");
			goto out_free_control_pages;
		}
H
Huang Ying 已提交
93 94
	}

95 96
	*rimage = image;
	return 0;
97
out_free_control_pages:
98
	kimage_free_page_list(&image->control_pages);
99
out_free_image:
100
	kfree(image);
101
	return ret;
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
}

/*
 * Exec Kernel system call: for obvious reasons only root may call it.
 *
 * This call breaks up into three pieces.
 * - A generic part which loads the new kernel from the current
 *   address space, and very carefully places the data in the
 *   allocated pages.
 *
 * - A generic part that interacts with the kernel and tells all of
 *   the devices to shut down.  Preventing on-going dmas, and placing
 *   the devices in a consistent state so a later kernel can
 *   reinitialize them.
 *
 * - A machine specific part that includes the syscall number
G
Geert Uytterhoeven 已提交
118
 *   and then copies the image to it's final destination.  And
119 120 121 122 123
 *   jumps into the image at entry.
 *
 * kexec does not sync, or unmount filesystems so if you need
 * that to happen you need to do that yourself.
 */
124

125 126
SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
		struct kexec_segment __user *, segments, unsigned long, flags)
127 128 129 130 131
{
	struct kimage **dest_image, *image;
	int result;

	/* We only trust the superuser with rebooting the system. */
132
	if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
		return -EPERM;

	/*
	 * Verify we have a legal set of flags
	 * This leaves us room for future extensions.
	 */
	if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
		return -EINVAL;

	/* Verify we are on the appropriate architecture */
	if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
		((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
		return -EINVAL;

	/* Put an artificial cap on the number
	 * of segments passed to kexec_load.
	 */
	if (nr_segments > KEXEC_SEGMENT_MAX)
		return -EINVAL;

	image = NULL;
	result = 0;

	/* Because we write directly to the reserved memory
	 * region when loading crash kernels we need a mutex here to
	 * prevent multiple crash  kernels from attempting to load
	 * simultaneously, and to prevent a crash kernel from loading
	 * over the top of a in use crash kernel.
	 *
	 * KISS: always take the mutex.
	 */
164
	if (!mutex_trylock(&kexec_mutex))
165
		return -EBUSY;
M
Maneesh Soni 已提交
166

167
	dest_image = &kexec_image;
M
Maneesh Soni 已提交
168
	if (flags & KEXEC_ON_CRASH)
169 170 171
		dest_image = &kexec_crash_image;
	if (nr_segments > 0) {
		unsigned long i;
M
Maneesh Soni 已提交
172

G
Geoff Levand 已提交
173 174 175 176
		if (flags & KEXEC_ON_CRASH) {
			/*
			 * Loading another kernel to switch to if this one
			 * crashes.  Free any current crash dump kernel before
177 178
			 * we corrupt it.
			 */
G
Geoff Levand 已提交
179

180
			kimage_free(xchg(&kexec_crash_image, NULL));
181 182
			result = kimage_alloc_init(&image, entry, nr_segments,
						   segments, flags);
183
			crash_map_reserved_pages();
G
Geoff Levand 已提交
184 185 186 187 188
		} else {
			/* Loading another kernel to reboot into. */

			result = kimage_alloc_init(&image, entry, nr_segments,
						   segments, flags);
189
		}
M
Maneesh Soni 已提交
190
		if (result)
191
			goto out;
M
Maneesh Soni 已提交
192

H
Huang Ying 已提交
193 194
		if (flags & KEXEC_PRESERVE_CONTEXT)
			image->preserve_context = 1;
195
		result = machine_kexec_prepare(image);
M
Maneesh Soni 已提交
196
		if (result)
197
			goto out;
M
Maneesh Soni 已提交
198 199

		for (i = 0; i < nr_segments; i++) {
200
			result = kimage_load_segment(image, &image->segment[i]);
M
Maneesh Soni 已提交
201
			if (result)
202 203
				goto out;
		}
204
		kimage_terminate(image);
205 206
		if (flags & KEXEC_ON_CRASH)
			crash_unmap_reserved_pages();
207 208 209 210
	}
	/* Install the new kernel, and  Uninstall the old */
	image = xchg(dest_image, image);

M
Maneesh Soni 已提交
211
out:
212
	mutex_unlock(&kexec_mutex);
213
	kimage_free(image);
M
Maneesh Soni 已提交
214

215 216 217 218
	return result;
}

#ifdef CONFIG_COMPAT
219 220 221 222
COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
		       compat_ulong_t, nr_segments,
		       struct compat_kexec_segment __user *, segments,
		       compat_ulong_t, flags)
223 224 225 226 227 228 229 230
{
	struct compat_kexec_segment in;
	struct kexec_segment out, __user *ksegments;
	unsigned long i, result;

	/* Don't allow clients that don't understand the native
	 * architecture to do anything.
	 */
M
Maneesh Soni 已提交
231
	if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
232 233
		return -EINVAL;

M
Maneesh Soni 已提交
234
	if (nr_segments > KEXEC_SEGMENT_MAX)
235 236 237
		return -EINVAL;

	ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
238
	for (i = 0; i < nr_segments; i++) {
239
		result = copy_from_user(&in, &segments[i], sizeof(in));
M
Maneesh Soni 已提交
240
		if (result)
241 242 243 244 245 246 247 248
			return -EFAULT;

		out.buf   = compat_ptr(in.buf);
		out.bufsz = in.bufsz;
		out.mem   = in.mem;
		out.memsz = in.memsz;

		result = copy_to_user(&ksegments[i], &out, sizeof(out));
M
Maneesh Soni 已提交
249
		if (result)
250 251 252 253 254 255
			return -EFAULT;
	}

	return sys_kexec_load(entry, nr_segments, ksegments, flags);
}
#endif