usercopy.c 9.5 KB
Newer Older
K
Kees Cook 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * This implements the various checks for CONFIG_HARDENED_USERCOPY*,
 * which are designed to protect kernel memory from needless exposure
 * and overwrite under many unintended conditions. This code is based
 * on PAX_USERCOPY, which is:
 *
 * Copyright (C) 2001-2016 PaX Team, Bradley Spengler, Open Source
 * Security Inc.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/mm.h>
#include <linux/slab.h>
19
#include <linux/sched.h>
20 21
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
22
#include <linux/thread_info.h>
23 24
#include <linux/atomic.h>
#include <linux/jump_label.h>
K
Kees Cook 已提交
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
#include <asm/sections.h>

/*
 * Checks if a given pointer and length is contained by the current
 * stack frame (if possible).
 *
 * Returns:
 *	NOT_STACK: not at all on the stack
 *	GOOD_FRAME: fully within a valid stack frame
 *	GOOD_STACK: fully on the stack (when can't do frame-checking)
 *	BAD_STACK: error condition (invalid stack position or bad stack frame)
 */
static noinline int check_stack_object(const void *obj, unsigned long len)
{
	const void * const stack = task_stack_page(current);
	const void * const stackend = stack + THREAD_SIZE;
	int ret;

	/* Object is not on the stack at all. */
	if (obj + len <= stack || stackend <= obj)
		return NOT_STACK;

	/*
	 * Reject: object partially overlaps the stack (passing the
	 * the check above means at least one end is within the stack,
	 * so if this check fails, the other end is outside the stack).
	 */
	if (obj < stack || stackend < obj + len)
		return BAD_STACK;

	/* Check if object is safely within a valid frame. */
	ret = arch_within_stack_frames(stack, stackend, obj, len);
	if (ret)
		return ret;

	return GOOD_STACK;
}

63
/*
64 65
 * If these functions are reached, then CONFIG_HARDENED_USERCOPY has found
 * an unexpected state during a copy_from_user() or copy_to_user() call.
66 67 68
 * There are several checks being performed on the buffer by the
 * __check_object_size() function. Normal stack buffer usage should never
 * trip the checks, and kernel text addressing will always trip the check.
69 70 71 72 73
 * For cache objects, it is checking that only the whitelisted range of
 * bytes for a given cache is being accessed (via the cache's usersize and
 * useroffset fields). To adjust a cache whitelist, use the usercopy-aware
 * kmem_cache_create_usercopy() function to create the cache (and
 * carefully audit the whitelist range).
74
 */
75 76 77 78 79 80 81 82 83 84 85
void usercopy_warn(const char *name, const char *detail, bool to_user,
		   unsigned long offset, unsigned long len)
{
	WARN_ONCE(1, "Bad or missing usercopy whitelist? Kernel memory %s attempt detected %s %s%s%s%s (offset %lu, size %lu)!\n",
		 to_user ? "exposure" : "overwrite",
		 to_user ? "from" : "to",
		 name ? : "unknown?!",
		 detail ? " '" : "", detail ? : "", detail ? "'" : "",
		 offset, len);
}

86 87 88
void __noreturn usercopy_abort(const char *name, const char *detail,
			       bool to_user, unsigned long offset,
			       unsigned long len)
K
Kees Cook 已提交
89
{
90 91 92 93 94 95 96
	pr_emerg("Kernel memory %s attempt detected %s %s%s%s%s (offset %lu, size %lu)!\n",
		 to_user ? "exposure" : "overwrite",
		 to_user ? "from" : "to",
		 name ? : "unknown?!",
		 detail ? " '" : "", detail ? : "", detail ? "'" : "",
		 offset, len);

K
Kees Cook 已提交
97 98 99 100 101 102 103 104 105
	/*
	 * For greater effect, it would be nice to do do_group_exit(),
	 * but BUG() actually hooks all the lock-breaking and per-arch
	 * Oops code, so that is used here instead.
	 */
	BUG();
}

/* Returns true if any portion of [ptr,ptr+n) over laps with [low,high). */
106 107
static bool overlaps(const unsigned long ptr, unsigned long n,
		     unsigned long low, unsigned long high)
K
Kees Cook 已提交
108
{
109
	const unsigned long check_low = ptr;
K
Kees Cook 已提交
110 111 112
	unsigned long check_high = check_low + n;

	/* Does not overlap if entirely above or entirely below. */
113
	if (check_low >= high || check_high <= low)
K
Kees Cook 已提交
114 115 116 117 118 119
		return false;

	return true;
}

/* Is this address range in the kernel text area? */
120 121
static inline void check_kernel_text_object(const unsigned long ptr,
					    unsigned long n, bool to_user)
K
Kees Cook 已提交
122 123 124 125 126 127
{
	unsigned long textlow = (unsigned long)_stext;
	unsigned long texthigh = (unsigned long)_etext;
	unsigned long textlow_linear, texthigh_linear;

	if (overlaps(ptr, n, textlow, texthigh))
128
		usercopy_abort("kernel text", NULL, to_user, ptr - textlow, n);
K
Kees Cook 已提交
129 130 131 132 133 134 135 136 137

	/*
	 * Some architectures have virtual memory mappings with a secondary
	 * mapping of the kernel text, i.e. there is more than one virtual
	 * kernel address that points to the kernel image. It is usually
	 * when there is a separate linear physical memory mapping, in that
	 * __pa() is not just the reverse of __va(). This can be detected
	 * and checked:
	 */
138
	textlow_linear = (unsigned long)lm_alias(textlow);
K
Kees Cook 已提交
139 140
	/* No different mapping: we're done. */
	if (textlow_linear == textlow)
141
		return;
K
Kees Cook 已提交
142 143

	/* Check the secondary mapping... */
144
	texthigh_linear = (unsigned long)lm_alias(texthigh);
K
Kees Cook 已提交
145
	if (overlaps(ptr, n, textlow_linear, texthigh_linear))
146 147
		usercopy_abort("linear kernel text", NULL, to_user,
			       ptr - textlow_linear, n);
K
Kees Cook 已提交
148 149
}

150 151
static inline void check_bogus_address(const unsigned long ptr, unsigned long n,
				       bool to_user)
K
Kees Cook 已提交
152 153
{
	/* Reject if object wraps past end of memory. */
154
	if (ptr + (n - 1) < ptr)
155
		usercopy_abort("wrapped address", NULL, to_user, 0, ptr + n);
K
Kees Cook 已提交
156 157 158

	/* Reject if NULL or ZERO-allocation. */
	if (ZERO_OR_NULL_PTR(ptr))
159
		usercopy_abort("null address", NULL, to_user, ptr, n);
K
Kees Cook 已提交
160 161
}

162
/* Checks for allocs that are marked in some way as spanning multiple pages. */
163 164
static inline void check_page_span(const void *ptr, unsigned long n,
				   struct page *page, bool to_user)
K
Kees Cook 已提交
165
{
166
#ifdef CONFIG_HARDENED_USERCOPY_PAGESPAN
K
Kees Cook 已提交
167
	const void *end = ptr + n - 1;
168
	struct page *endpage;
K
Kees Cook 已提交
169 170 171 172 173 174 175 176 177 178 179 180
	bool is_reserved, is_cma;

	/*
	 * Sometimes the kernel data regions are not marked Reserved (see
	 * check below). And sometimes [_sdata,_edata) does not cover
	 * rodata and/or bss, so check each range explicitly.
	 */

	/* Allow reads of kernel rodata region (if not marked as Reserved). */
	if (ptr >= (const void *)__start_rodata &&
	    end <= (const void *)__end_rodata) {
		if (!to_user)
181 182
			usercopy_abort("rodata", NULL, to_user, 0, n);
		return;
K
Kees Cook 已提交
183 184 185 186
	}

	/* Allow kernel data region (if not marked as Reserved). */
	if (ptr >= (const void *)_sdata && end <= (const void *)_edata)
187
		return;
K
Kees Cook 已提交
188 189 190 191

	/* Allow kernel bss region (if not marked as Reserved). */
	if (ptr >= (const void *)__bss_start &&
	    end <= (const void *)__bss_stop)
192
		return;
K
Kees Cook 已提交
193 194 195 196

	/* Is the object wholly within one base page? */
	if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) ==
		   ((unsigned long)end & (unsigned long)PAGE_MASK)))
197
		return;
K
Kees Cook 已提交
198

199
	/* Allow if fully inside the same compound (__GFP_COMP) page. */
K
Kees Cook 已提交
200 201
	endpage = virt_to_head_page(end);
	if (likely(endpage == page))
202
		return;
K
Kees Cook 已提交
203 204 205 206 207 208 209 210 211

	/*
	 * Reject if range is entirely either Reserved (i.e. special or
	 * device memory), or CMA. Otherwise, reject since the object spans
	 * several independently allocated pages.
	 */
	is_reserved = PageReserved(page);
	is_cma = is_migrate_cma_page(page);
	if (!is_reserved && !is_cma)
212
		usercopy_abort("spans multiple pages", NULL, to_user, 0, n);
K
Kees Cook 已提交
213 214 215 216

	for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) {
		page = virt_to_head_page(ptr);
		if (is_reserved && !PageReserved(page))
217 218
			usercopy_abort("spans Reserved and non-Reserved pages",
				       NULL, to_user, 0, n);
K
Kees Cook 已提交
219
		if (is_cma && !is_migrate_cma_page(page))
220 221
			usercopy_abort("spans CMA and non-CMA pages", NULL,
				       to_user, 0, n);
K
Kees Cook 已提交
222
	}
223 224 225
#endif
}

226 227
static inline void check_heap_object(const void *ptr, unsigned long n,
				     bool to_user)
228 229 230 231
{
	struct page *page;

	if (!virt_addr_valid(ptr))
232
		return;
233 234 235

	page = virt_to_head_page(ptr);

236 237 238 239 240 241 242
	if (PageSlab(page)) {
		/* Check slab allocator for flags and size. */
		__check_heap_object(ptr, n, page, to_user);
	} else {
		/* Verify object does not incorrectly span multiple pages. */
		check_page_span(ptr, n, page, to_user);
	}
K
Kees Cook 已提交
243 244
}

245 246
static DEFINE_STATIC_KEY_FALSE_RO(bypass_usercopy_checks);

K
Kees Cook 已提交
247 248 249
/*
 * Validates that the given object is:
 * - not bogus address
250 251
 * - fully contained by stack (or stack frame, when available)
 * - fully within SLAB object (or object whitelist area, when available)
K
Kees Cook 已提交
252 253 254 255
 * - not in kernel text
 */
void __check_object_size(const void *ptr, unsigned long n, bool to_user)
{
256 257 258
	if (static_branch_unlikely(&bypass_usercopy_checks))
		return;

K
Kees Cook 已提交
259 260 261 262 263
	/* Skip all tests if size is zero. */
	if (!n)
		return;

	/* Check for invalid addresses. */
264
	check_bogus_address((const unsigned long)ptr, n, to_user);
K
Kees Cook 已提交
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279

	/* Check for bad stack object. */
	switch (check_stack_object(ptr, n)) {
	case NOT_STACK:
		/* Object is not touching the current process stack. */
		break;
	case GOOD_FRAME:
	case GOOD_STACK:
		/*
		 * Object is either in the correct frame (when it
		 * is possible to check) or just generally on the
		 * process stack (when frame checking not available).
		 */
		return;
	default:
280
		usercopy_abort("process stack", NULL, to_user, 0, n);
K
Kees Cook 已提交
281 282
	}

283 284 285
	/* Check for bad heap object. */
	check_heap_object(ptr, n, to_user);

K
Kees Cook 已提交
286
	/* Check for object in kernel to avoid text exposure. */
287
	check_kernel_text_object((const unsigned long)ptr, n, to_user);
K
Kees Cook 已提交
288 289
}
EXPORT_SYMBOL(__check_object_size);
290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307

static bool enable_checks __initdata = true;

static int __init parse_hardened_usercopy(char *str)
{
	return strtobool(str, &enable_checks);
}

__setup("hardened_usercopy=", parse_hardened_usercopy);

static int __init set_hardened_usercopy(void)
{
	if (enable_checks == false)
		static_branch_enable(&bypass_usercopy_checks);
	return 1;
}

late_initcall(set_hardened_usercopy);