提交 e754aedc 编写于 作者: D Dave Hansen 提交者: Ingo Molnar

x86/mpx, selftests: Add MPX self test

I've had this code for a while, but never submitted it upstream.  Now
that Skylake hardware is out in the wild, folks can actually run this
for real.  It tests the following:

	1. The MPX hardware is enabled by the kernel and doing what it
	   is supposed to
	2. The MPX management code is present and enabled in the kernel
	3. MPX Signal handling
	4. The MPX bounds table population code (on-demand population)
	5. The MPX bounds table unmapping code (kernel-initiated freeing
	   when unused)

This has also caught bugs in the XSAVE code because MPX state is
saved/restored with XSAVE.

I'm submitting it now because it would have caught the recent issues
with the compat_siginfo code not being properly augmented when new
siginfo state is added.
Signed-off-by: NDave Hansen <dave.hansen@linux.intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20160608172535.5B40B0EE@viggo.jf.intel.comSigned-off-by: NIngo Molnar <mingo@kernel.org>
上级 02e8fda2
......@@ -5,7 +5,7 @@ include ../lib.mk
.PHONY: all all_32 all_64 warn_32bit_failure clean
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall \
check_initial_reg_state sigreturn ldt_gdt iopl
check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
......
#ifndef _MPX_DEBUG_H
#define _MPX_DEBUG_H
#ifndef DEBUG_LEVEL
#define DEBUG_LEVEL 0
#endif
#define dprintf_level(level, args...) do { if(level <= DEBUG_LEVEL) printf(args); } while(0)
#define dprintf1(args...) dprintf_level(1, args)
#define dprintf2(args...) dprintf_level(2, args)
#define dprintf3(args...) dprintf_level(3, args)
#define dprintf4(args...) dprintf_level(4, args)
#define dprintf5(args...) dprintf_level(5, args)
#endif /* _MPX_DEBUG_H */
/*
* Written by Dave Hansen <dave.hansen@intel.com>
*/
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/mman.h>
#include <string.h>
#include <fcntl.h>
#include "mpx-debug.h"
#include "mpx-mm.h"
#include "mpx-hw.h"
unsigned long bounds_dir_global;
#define mpx_dig_abort() __mpx_dig_abort(__FILE__, __func__, __LINE__)
static void inline __mpx_dig_abort(const char *file, const char *func, int line)
{
fprintf(stderr, "MPX dig abort @ %s::%d in %s()\n", file, line, func);
printf("MPX dig abort @ %s::%d in %s()\n", file, line, func);
abort();
}
/*
* run like this (BDIR finds the probably bounds directory):
*
* BDIR="$(cat /proc/$pid/smaps | grep -B1 2097152 \
* | head -1 | awk -F- '{print $1}')";
* ./mpx-dig $pid 0x$BDIR
*
* NOTE:
* assumes that the only 2097152-kb VMA is the bounds dir
*/
long nr_incore(void *ptr, unsigned long size_bytes)
{
int i;
long ret = 0;
long vec_len = size_bytes / PAGE_SIZE;
unsigned char *vec = malloc(vec_len);
int incore_ret;
if (!vec)
mpx_dig_abort();
incore_ret = mincore(ptr, size_bytes, vec);
if (incore_ret) {
printf("mincore ret: %d\n", incore_ret);
perror("mincore");
mpx_dig_abort();
}
for (i = 0; i < vec_len; i++)
ret += vec[i];
free(vec);
return ret;
}
int open_proc(int pid, char *file)
{
static char buf[100];
int fd;
snprintf(&buf[0], sizeof(buf), "/proc/%d/%s", pid, file);
fd = open(&buf[0], O_RDONLY);
if (fd < 0)
perror(buf);
return fd;
}
struct vaddr_range {
unsigned long start;
unsigned long end;
};
struct vaddr_range *ranges;
int nr_ranges_allocated;
int nr_ranges_populated;
int last_range = -1;
int __pid_load_vaddrs(int pid)
{
int ret = 0;
int proc_maps_fd = open_proc(pid, "maps");
char linebuf[10000];
unsigned long start;
unsigned long end;
char rest[1000];
FILE *f = fdopen(proc_maps_fd, "r");
if (!f)
mpx_dig_abort();
nr_ranges_populated = 0;
while (!feof(f)) {
char *readret = fgets(linebuf, sizeof(linebuf), f);
int parsed;
if (readret == NULL) {
if (feof(f))
break;
mpx_dig_abort();
}
parsed = sscanf(linebuf, "%lx-%lx%s", &start, &end, rest);
if (parsed != 3)
mpx_dig_abort();
dprintf4("result[%d]: %lx-%lx<->%s\n", parsed, start, end, rest);
if (nr_ranges_populated >= nr_ranges_allocated) {
ret = -E2BIG;
break;
}
ranges[nr_ranges_populated].start = start;
ranges[nr_ranges_populated].end = end;
nr_ranges_populated++;
}
last_range = -1;
fclose(f);
close(proc_maps_fd);
return ret;
}
int pid_load_vaddrs(int pid)
{
int ret;
dprintf2("%s(%d)\n", __func__, pid);
if (!ranges) {
nr_ranges_allocated = 4;
ranges = malloc(nr_ranges_allocated * sizeof(ranges[0]));
dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, pid,
nr_ranges_allocated, ranges);
assert(ranges != NULL);
}
do {
ret = __pid_load_vaddrs(pid);
if (!ret)
break;
if (ret == -E2BIG) {
dprintf2("%s(%d) need to realloc\n", __func__, pid);
nr_ranges_allocated *= 2;
ranges = realloc(ranges,
nr_ranges_allocated * sizeof(ranges[0]));
dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__,
pid, nr_ranges_allocated, ranges);
assert(ranges != NULL);
dprintf1("reallocating to hold %d ranges\n", nr_ranges_allocated);
}
} while (1);
dprintf2("%s(%d) done\n", __func__, pid);
return ret;
}
static inline int vaddr_in_range(unsigned long vaddr, struct vaddr_range *r)
{
if (vaddr < r->start)
return 0;
if (vaddr >= r->end)
return 0;
return 1;
}
static inline int vaddr_mapped_by_range(unsigned long vaddr)
{
int i;
if (last_range > 0 && vaddr_in_range(vaddr, &ranges[last_range]))
return 1;
for (i = 0; i < nr_ranges_populated; i++) {
struct vaddr_range *r = &ranges[i];
if (vaddr_in_range(vaddr, r))
continue;
last_range = i;
return 1;
}
return 0;
}
const int bt_entry_size_bytes = sizeof(unsigned long) * 4;
void *read_bounds_table_into_buf(unsigned long table_vaddr)
{
#ifdef MPX_DIG_STANDALONE
static char bt_buf[MPX_BOUNDS_TABLE_SIZE_BYTES];
off_t seek_ret = lseek(fd, table_vaddr, SEEK_SET);
if (seek_ret != table_vaddr)
mpx_dig_abort();
int read_ret = read(fd, &bt_buf, sizeof(bt_buf));
if (read_ret != sizeof(bt_buf))
mpx_dig_abort();
return &bt_buf;
#else
return (void *)table_vaddr;
#endif
}
int dump_table(unsigned long table_vaddr, unsigned long base_controlled_vaddr,
unsigned long bde_vaddr)
{
unsigned long offset_inside_bt;
int nr_entries = 0;
int do_abort = 0;
char *bt_buf;
dprintf3("%s() base_controlled_vaddr: 0x%012lx bde_vaddr: 0x%012lx\n",
__func__, base_controlled_vaddr, bde_vaddr);
bt_buf = read_bounds_table_into_buf(table_vaddr);
dprintf4("%s() read done\n", __func__);
for (offset_inside_bt = 0;
offset_inside_bt < MPX_BOUNDS_TABLE_SIZE_BYTES;
offset_inside_bt += bt_entry_size_bytes) {
unsigned long bt_entry_index;
unsigned long bt_entry_controls;
unsigned long this_bt_entry_for_vaddr;
unsigned long *bt_entry_buf;
int i;
dprintf4("%s() offset_inside_bt: 0x%lx of 0x%llx\n", __func__,
offset_inside_bt, MPX_BOUNDS_TABLE_SIZE_BYTES);
bt_entry_buf = (void *)&bt_buf[offset_inside_bt];
if (!bt_buf) {
printf("null bt_buf\n");
mpx_dig_abort();
}
if (!bt_entry_buf) {
printf("null bt_entry_buf\n");
mpx_dig_abort();
}
dprintf4("%s() reading *bt_entry_buf @ %p\n", __func__,
bt_entry_buf);
if (!bt_entry_buf[0] &&
!bt_entry_buf[1] &&
!bt_entry_buf[2] &&
!bt_entry_buf[3])
continue;
nr_entries++;
bt_entry_index = offset_inside_bt/bt_entry_size_bytes;
bt_entry_controls = sizeof(void *);
this_bt_entry_for_vaddr =
base_controlled_vaddr + bt_entry_index*bt_entry_controls;
/*
* We sign extend vaddr bits 48->63 which effectively
* creates a hole in the virtual address space.
* This calculation corrects for the hole.
*/
if (this_bt_entry_for_vaddr > 0x00007fffffffffffUL)
this_bt_entry_for_vaddr |= 0xffff800000000000;
if (!vaddr_mapped_by_range(this_bt_entry_for_vaddr)) {
printf("bt_entry_buf: %p\n", bt_entry_buf);
printf("there is a bte for %lx but no mapping\n",
this_bt_entry_for_vaddr);
printf(" bde vaddr: %016lx\n", bde_vaddr);
printf("base_controlled_vaddr: %016lx\n", base_controlled_vaddr);
printf(" table_vaddr: %016lx\n", table_vaddr);
printf(" entry vaddr: %016lx @ offset %lx\n",
table_vaddr + offset_inside_bt, offset_inside_bt);
do_abort = 1;
mpx_dig_abort();
}
if (DEBUG_LEVEL < 4)
continue;
printf("table entry[%lx]: ", offset_inside_bt);
for (i = 0; i < bt_entry_size_bytes; i += sizeof(unsigned long))
printf("0x%016lx ", bt_entry_buf[i]);
printf("\n");
}
if (do_abort)
mpx_dig_abort();
dprintf4("%s() done\n", __func__);
return nr_entries;
}
int search_bd_buf(char *buf, int len_bytes, unsigned long bd_offset_bytes,
int *nr_populated_bdes)
{
unsigned long i;
int total_entries = 0;
dprintf3("%s(%p, %x, %lx, ...) buf end: %p\n", __func__, buf,
len_bytes, bd_offset_bytes, buf + len_bytes);
for (i = 0; i < len_bytes; i += sizeof(unsigned long)) {
unsigned long bd_index = (bd_offset_bytes + i) / sizeof(unsigned long);
unsigned long *bounds_dir_entry_ptr = (unsigned long *)&buf[i];
unsigned long bounds_dir_entry;
unsigned long bd_for_vaddr;
unsigned long bt_start;
unsigned long bt_tail;
int nr_entries;
dprintf4("%s() loop i: %ld bounds_dir_entry_ptr: %p\n", __func__, i,
bounds_dir_entry_ptr);
bounds_dir_entry = *bounds_dir_entry_ptr;
if (!bounds_dir_entry) {
dprintf4("no bounds dir at index 0x%lx / 0x%lx "
"start at offset:%lx %lx\n", bd_index, bd_index,
bd_offset_bytes, i);
continue;
}
dprintf3("found bounds_dir_entry: 0x%lx @ "
"index 0x%lx buf ptr: %p\n", bounds_dir_entry, i,
&buf[i]);
/* mask off the enable bit: */
bounds_dir_entry &= ~0x1;
(*nr_populated_bdes)++;
dprintf4("nr_populated_bdes: %p\n", nr_populated_bdes);
dprintf4("*nr_populated_bdes: %d\n", *nr_populated_bdes);
bt_start = bounds_dir_entry;
bt_tail = bounds_dir_entry + MPX_BOUNDS_TABLE_SIZE_BYTES - 1;
if (!vaddr_mapped_by_range(bt_start)) {
printf("bounds directory 0x%lx points to nowhere\n",
bounds_dir_entry);
mpx_dig_abort();
}
if (!vaddr_mapped_by_range(bt_tail)) {
printf("bounds directory end 0x%lx points to nowhere\n",
bt_tail);
mpx_dig_abort();
}
/*
* Each bounds directory entry controls 1MB of virtual address
* space. This variable is the virtual address in the process
* of the beginning of the area controlled by this bounds_dir.
*/
bd_for_vaddr = bd_index * (1UL<<20);
nr_entries = dump_table(bounds_dir_entry, bd_for_vaddr,
bounds_dir_global+bd_offset_bytes+i);
total_entries += nr_entries;
dprintf5("dir entry[%4ld @ %p]: 0x%lx %6d entries "
"total this buf: %7d bd_for_vaddrs: 0x%lx -> 0x%lx\n",
bd_index, buf+i,
bounds_dir_entry, nr_entries, total_entries,
bd_for_vaddr, bd_for_vaddr + (1UL<<20));
}
dprintf3("%s(%p, %x, %lx, ...) done\n", __func__, buf, len_bytes,
bd_offset_bytes);
return total_entries;
}
int proc_pid_mem_fd = -1;
void *fill_bounds_dir_buf_other(long byte_offset_inside_bounds_dir,
long buffer_size_bytes, void *buffer)
{
unsigned long seekto = bounds_dir_global + byte_offset_inside_bounds_dir;
int read_ret;
off_t seek_ret = lseek(proc_pid_mem_fd, seekto, SEEK_SET);
if (seek_ret != seekto)
mpx_dig_abort();
read_ret = read(proc_pid_mem_fd, buffer, buffer_size_bytes);
/* there shouldn't practically be short reads of /proc/$pid/mem */
if (read_ret != buffer_size_bytes)
mpx_dig_abort();
return buffer;
}
void *fill_bounds_dir_buf_self(long byte_offset_inside_bounds_dir,
long buffer_size_bytes, void *buffer)
{
unsigned char vec[buffer_size_bytes / PAGE_SIZE];
char *dig_bounds_dir_ptr =
(void *)(bounds_dir_global + byte_offset_inside_bounds_dir);
/*
* use mincore() to quickly find the areas of the bounds directory
* that have memory and thus will be worth scanning.
*/
int incore_ret;
int incore = 0;
int i;
dprintf4("%s() dig_bounds_dir_ptr: %p\n", __func__, dig_bounds_dir_ptr);
incore_ret = mincore(dig_bounds_dir_ptr, buffer_size_bytes, &vec[0]);
if (incore_ret) {
printf("mincore ret: %d\n", incore_ret);
perror("mincore");
mpx_dig_abort();
}
for (i = 0; i < sizeof(vec); i++)
incore += vec[i];
dprintf4("%s() total incore: %d\n", __func__, incore);
if (!incore)
return NULL;
dprintf3("%s() total incore: %d\n", __func__, incore);
return dig_bounds_dir_ptr;
}
int inspect_pid(int pid)
{
static int dig_nr;
long offset_inside_bounds_dir;
char bounds_dir_buf[sizeof(unsigned long) * (1UL << 15)];
char *dig_bounds_dir_ptr;
int total_entries = 0;
int nr_populated_bdes = 0;
int inspect_self;
if (getpid() == pid) {
dprintf4("inspecting self\n");
inspect_self = 1;
} else {
dprintf4("inspecting pid %d\n", pid);
mpx_dig_abort();
}
for (offset_inside_bounds_dir = 0;
offset_inside_bounds_dir < MPX_BOUNDS_TABLE_SIZE_BYTES;
offset_inside_bounds_dir += sizeof(bounds_dir_buf)) {
static int bufs_skipped;
int this_entries;
if (inspect_self) {
dig_bounds_dir_ptr =
fill_bounds_dir_buf_self(offset_inside_bounds_dir,
sizeof(bounds_dir_buf),
&bounds_dir_buf[0]);
} else {
dig_bounds_dir_ptr =
fill_bounds_dir_buf_other(offset_inside_bounds_dir,
sizeof(bounds_dir_buf),
&bounds_dir_buf[0]);
}
if (!dig_bounds_dir_ptr) {
bufs_skipped++;
continue;
}
this_entries = search_bd_buf(dig_bounds_dir_ptr,
sizeof(bounds_dir_buf),
offset_inside_bounds_dir,
&nr_populated_bdes);
total_entries += this_entries;
}
printf("mpx dig (%3d) complete, SUCCESS (%8d / %4d)\n", ++dig_nr,
total_entries, nr_populated_bdes);
return total_entries + nr_populated_bdes;
}
#ifdef MPX_DIG_REMOTE
int main(int argc, char **argv)
{
int err;
char *c;
unsigned long bounds_dir_entry;
int pid;
printf("mpx-dig starting...\n");
err = sscanf(argv[1], "%d", &pid);
printf("parsing: '%s', err: %d\n", argv[1], err);
if (err != 1)
mpx_dig_abort();
err = sscanf(argv[2], "%lx", &bounds_dir_global);
printf("parsing: '%s': %d\n", argv[2], err);
if (err != 1)
mpx_dig_abort();
proc_pid_mem_fd = open_proc(pid, "mem");
if (proc_pid_mem_fd < 0)
mpx_dig_abort();
inspect_pid(pid);
return 0;
}
#endif
long inspect_me(struct mpx_bounds_dir *bounds_dir)
{
int pid = getpid();
pid_load_vaddrs(pid);
bounds_dir_global = (unsigned long)bounds_dir;
dprintf4("enter %s() bounds dir: %p\n", __func__, bounds_dir);
return inspect_pid(pid);
}
#ifndef _MPX_HW_H
#define _MPX_HW_H
#include <assert.h>
/* Describe the MPX Hardware Layout in here */
#define NR_MPX_BOUNDS_REGISTERS 4
#ifdef __i386__
#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 16 /* 4 * 32-bits */
#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 14) /* 16k */
#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 4
#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 22) /* 4MB */
#define MPX_BOUNDS_TABLE_BOTTOM_BIT 2
#define MPX_BOUNDS_TABLE_TOP_BIT 11
#define MPX_BOUNDS_DIR_BOTTOM_BIT 12
#define MPX_BOUNDS_DIR_TOP_BIT 31
#else
/*
* Linear Address of "pointer" (LAp)
* 0 -> 2: ignored
* 3 -> 19: index in to bounds table
* 20 -> 47: index in to bounds directory
* 48 -> 63: ignored
*/
#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 32
#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 22) /* 4MB */
#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 8
#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 31) /* 2GB */
#define MPX_BOUNDS_TABLE_BOTTOM_BIT 3
#define MPX_BOUNDS_TABLE_TOP_BIT 19
#define MPX_BOUNDS_DIR_BOTTOM_BIT 20
#define MPX_BOUNDS_DIR_TOP_BIT 47
#endif
#define MPX_BOUNDS_DIR_NR_ENTRIES \
(MPX_BOUNDS_DIR_SIZE_BYTES/MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES)
#define MPX_BOUNDS_TABLE_NR_ENTRIES \
(MPX_BOUNDS_TABLE_SIZE_BYTES/MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES)
#define MPX_BOUNDS_TABLE_ENTRY_VALID_BIT 0x1
struct mpx_bd_entry {
union {
char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES];
void *contents[1];
};
} __attribute__((packed));
struct mpx_bt_entry {
union {
char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES];
unsigned long contents[1];
};
} __attribute__((packed));
struct mpx_bounds_dir {
struct mpx_bd_entry entries[MPX_BOUNDS_DIR_NR_ENTRIES];
} __attribute__((packed));
struct mpx_bounds_table {
struct mpx_bt_entry entries[MPX_BOUNDS_TABLE_NR_ENTRIES];
} __attribute__((packed));
static inline unsigned long GET_BITS(unsigned long val, int bottombit, int topbit)
{
int total_nr_bits = topbit - bottombit;
unsigned long mask = (1UL << total_nr_bits)-1;
return (val >> bottombit) & mask;
}
static inline unsigned long __vaddr_bounds_table_index(void *vaddr)
{
return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_TABLE_BOTTOM_BIT,
MPX_BOUNDS_TABLE_TOP_BIT);
}
static inline unsigned long __vaddr_bounds_directory_index(void *vaddr)
{
return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_DIR_BOTTOM_BIT,
MPX_BOUNDS_DIR_TOP_BIT);
}
static inline struct mpx_bd_entry *mpx_vaddr_to_bd_entry(void *vaddr,
struct mpx_bounds_dir *bounds_dir)
{
unsigned long index = __vaddr_bounds_directory_index(vaddr);
return &bounds_dir->entries[index];
}
static inline int bd_entry_valid(struct mpx_bd_entry *bounds_dir_entry)
{
unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
return (__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
}
static inline struct mpx_bounds_table *
__bd_entry_to_bounds_table(struct mpx_bd_entry *bounds_dir_entry)
{
unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
assert(__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
__bd_entry &= ~MPX_BOUNDS_TABLE_ENTRY_VALID_BIT;
return (struct mpx_bounds_table *)__bd_entry;
}
static inline struct mpx_bt_entry *
mpx_vaddr_to_bt_entry(void *vaddr, struct mpx_bounds_dir *bounds_dir)
{
struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(vaddr, bounds_dir);
struct mpx_bounds_table *bt = __bd_entry_to_bounds_table(bde);
unsigned long index = __vaddr_bounds_table_index(vaddr);
return &bt->entries[index];
}
#endif /* _MPX_HW_H */
此差异已折叠。
#ifndef _MPX_MM_H
#define _MPX_MM_H
#define PAGE_SIZE 4096
#define MB (1UL<<20)
extern long nr_incore(void *ptr, unsigned long size_bytes);
#endif /* _MPX_MM_H */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册