提交 37868fe1 编写于 作者: A Andy Lutomirski 提交者: Ingo Molnar

x86/ldt: Make modify_ldt synchronous

modify_ldt() has questionable locking and does not synchronize
threads.  Improve it: redesign the locking and synchronize all
threads' LDTs using an IPI on all modifications.

This will dramatically slow down modify_ldt in multithreaded
programs, but there shouldn't be any multithreaded programs that
care about modify_ldt's performance in the first place.

This fixes some fallout from the CVE-2015-5157 fixes.
Signed-off-by: NAndy Lutomirski <luto@kernel.org>
Reviewed-by: NBorislav Petkov <bp@suse.de>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: security@kernel.org <security@kernel.org>
Cc: <stable@vger.kernel.org>
Cc: xen-devel <xen-devel@lists.xen.org>
Link: http://lkml.kernel.org/r/4c6978476782160600471bd865b318db34c7b628.1438291540.git.luto@kernel.orgSigned-off-by: NIngo Molnar <mingo@kernel.org>
上级 aa1acff3
......@@ -280,21 +280,6 @@ static inline void clear_LDT(void)
set_ldt(NULL, 0);
}
/*
* load one particular LDT into the current CPU
*/
static inline void load_LDT_nolock(mm_context_t *pc)
{
set_ldt(pc->ldt, pc->size);
}
static inline void load_LDT(mm_context_t *pc)
{
preempt_disable();
load_LDT_nolock(pc);
preempt_enable();
}
static inline unsigned long get_desc_base(const struct desc_struct *desc)
{
return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
......
......@@ -9,8 +9,7 @@
* we put the segment information here.
*/
typedef struct {
void *ldt;
int size;
struct ldt_struct *ldt;
#ifdef CONFIG_X86_64
/* True if mm supports a task running in 32 bit compatibility mode. */
......
......@@ -33,6 +33,50 @@ static inline void load_mm_cr4(struct mm_struct *mm)
static inline void load_mm_cr4(struct mm_struct *mm) {}
#endif
/*
* ldt_structs can be allocated, used, and freed, but they are never
* modified while live.
*/
struct ldt_struct {
/*
* Xen requires page-aligned LDTs with special permissions. This is
* needed to prevent us from installing evil descriptors such as
* call gates. On native, we could merge the ldt_struct and LDT
* allocations, but it's not worth trying to optimize.
*/
struct desc_struct *entries;
int size;
};
static inline void load_mm_ldt(struct mm_struct *mm)
{
struct ldt_struct *ldt;
/* lockless_dereference synchronizes with smp_store_release */
ldt = lockless_dereference(mm->context.ldt);
/*
* Any change to mm->context.ldt is followed by an IPI to all
* CPUs with the mm active. The LDT will not be freed until
* after the IPI is handled by all such CPUs. This means that,
* if the ldt_struct changes before we return, the values we see
* will be safe, and the new values will be loaded before we run
* any user code.
*
* NB: don't try to convert this to use RCU without extreme care.
* We would still need IRQs off, because we don't want to change
* the local LDT after an IPI loaded a newer value than the one
* that we can see.
*/
if (unlikely(ldt))
set_ldt(ldt->entries, ldt->size);
else
clear_LDT();
DEBUG_LOCKS_WARN_ON(preemptible());
}
/*
* Used for LDT copy/destruction.
*/
......@@ -78,12 +122,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
* was called and then modify_ldt changed
* prev->context.ldt but suppressed an IPI to this CPU.
* In this case, prev->context.ldt != NULL, because we
* never free an LDT while the mm still exists. That
* means that next->context.ldt != prev->context.ldt,
* because mms never share an LDT.
* never set context.ldt to NULL while the mm still
* exists. That means that next->context.ldt !=
* prev->context.ldt, because mms never share an LDT.
*/
if (unlikely(prev->context.ldt != next->context.ldt))
load_LDT_nolock(&next->context);
load_mm_ldt(next);
}
#ifdef CONFIG_SMP
else {
......@@ -106,7 +150,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
load_cr3(next->pgd);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
load_mm_cr4(next);
load_LDT_nolock(&next->context);
load_mm_ldt(next);
}
}
#endif
......
......@@ -1410,7 +1410,7 @@ void cpu_init(void)
load_sp0(t, &current->thread);
set_tss_desc(cpu, t);
load_TR_desc();
load_LDT(&init_mm.context);
load_mm_ldt(&init_mm);
clear_all_debug_regs();
dbg_restore_debug_regs();
......@@ -1459,7 +1459,7 @@ void cpu_init(void)
load_sp0(t, thread);
set_tss_desc(cpu, t);
load_TR_desc();
load_LDT(&init_mm.context);
load_mm_ldt(&init_mm);
t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
......
......@@ -2179,21 +2179,25 @@ static unsigned long get_segment_base(unsigned int segment)
int idx = segment >> 3;
if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
struct ldt_struct *ldt;
if (idx > LDT_ENTRIES)
return 0;
if (idx > current->active_mm->context.size)
/* IRQs are off, so this synchronizes with smp_store_release */
ldt = lockless_dereference(current->active_mm->context.ldt);
if (!ldt || idx > ldt->size)
return 0;
desc = current->active_mm->context.ldt;
desc = &ldt->entries[idx];
} else {
if (idx > GDT_ENTRIES)
return 0;
desc = raw_cpu_ptr(gdt_page.gdt);
desc = raw_cpu_ptr(gdt_page.gdt) + idx;
}
return get_desc_base(desc + idx);
return get_desc_base(desc);
}
#ifdef CONFIG_COMPAT
......
......@@ -12,6 +12,7 @@
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
......@@ -20,82 +21,82 @@
#include <asm/mmu_context.h>
#include <asm/syscalls.h>
#ifdef CONFIG_SMP
/* context.lock is held for us, so we don't need any locking. */
static void flush_ldt(void *current_mm)
{
if (current->active_mm == current_mm)
load_LDT(&current->active_mm->context);
mm_context_t *pc;
if (current->active_mm != current_mm)
return;
pc = &current->active_mm->context;
set_ldt(pc->ldt->entries, pc->ldt->size);
}
#endif
static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
static struct ldt_struct *alloc_ldt_struct(int size)
{
void *oldldt, *newldt;
int oldsize;
if (mincount <= pc->size)
return 0;
oldsize = pc->size;
mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) &
(~(PAGE_SIZE / LDT_ENTRY_SIZE - 1));
if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
struct ldt_struct *new_ldt;
int alloc_size;
if (size > LDT_ENTRIES)
return NULL;
new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
if (!new_ldt)
return NULL;
BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
alloc_size = size * LDT_ENTRY_SIZE;
/*
* Xen is very picky: it requires a page-aligned LDT that has no
* trailing nonzero bytes in any page that contains LDT descriptors.
* Keep it simple: zero the whole allocation and never allocate less
* than PAGE_SIZE.
*/
if (alloc_size > PAGE_SIZE)
new_ldt->entries = vzalloc(alloc_size);
else
newldt = (void *)__get_free_page(GFP_KERNEL);
if (!newldt)
return -ENOMEM;
new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (oldsize)
memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE);
oldldt = pc->ldt;
memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
(mincount - oldsize) * LDT_ENTRY_SIZE);
if (!new_ldt->entries) {
kfree(new_ldt);
return NULL;
}
paravirt_alloc_ldt(newldt, mincount);
new_ldt->size = size;
return new_ldt;
}
#ifdef CONFIG_X86_64
/* CHECKME: Do we really need this ? */
wmb();
#endif
pc->ldt = newldt;
wmb();
pc->size = mincount;
wmb();
if (reload) {
#ifdef CONFIG_SMP
preempt_disable();
load_LDT(pc);
if (!cpumask_equal(mm_cpumask(current->mm),
cpumask_of(smp_processor_id())))
smp_call_function(flush_ldt, current->mm, 1);
preempt_enable();
#else
load_LDT(pc);
#endif
}
if (oldsize) {
paravirt_free_ldt(oldldt, oldsize);
if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(oldldt);
else
put_page(virt_to_page(oldldt));
}
return 0;
/* After calling this, the LDT is immutable. */
static void finalize_ldt_struct(struct ldt_struct *ldt)
{
paravirt_alloc_ldt(ldt->entries, ldt->size);
}
static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
/* context.lock is held */
static void install_ldt(struct mm_struct *current_mm,
struct ldt_struct *ldt)
{
int err = alloc_ldt(new, old->size, 0);
int i;
/* Synchronizes with lockless_dereference in load_mm_ldt. */
smp_store_release(&current_mm->context.ldt, ldt);
/* Activate the LDT for all CPUs using current_mm. */
on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true);
}
if (err < 0)
return err;
static void free_ldt_struct(struct ldt_struct *ldt)
{
if (likely(!ldt))
return;
for (i = 0; i < old->size; i++)
write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
return 0;
paravirt_free_ldt(ldt->entries, ldt->size);
if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(ldt->entries);
else
kfree(ldt->entries);
kfree(ldt);
}
/*
......@@ -104,17 +105,37 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
*/
int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
struct ldt_struct *new_ldt;
struct mm_struct *old_mm;
int retval = 0;
mutex_init(&mm->context.lock);
mm->context.size = 0;
old_mm = current->mm;
if (old_mm && old_mm->context.size > 0) {
mutex_lock(&old_mm->context.lock);
retval = copy_ldt(&mm->context, &old_mm->context);
mutex_unlock(&old_mm->context.lock);
if (!old_mm) {
mm->context.ldt = NULL;
return 0;
}
mutex_lock(&old_mm->context.lock);
if (!old_mm->context.ldt) {
mm->context.ldt = NULL;
goto out_unlock;
}
new_ldt = alloc_ldt_struct(old_mm->context.ldt->size);
if (!new_ldt) {
retval = -ENOMEM;
goto out_unlock;
}
memcpy(new_ldt->entries, old_mm->context.ldt->entries,
new_ldt->size * LDT_ENTRY_SIZE);
finalize_ldt_struct(new_ldt);
mm->context.ldt = new_ldt;
out_unlock:
mutex_unlock(&old_mm->context.lock);
return retval;
}
......@@ -125,53 +146,47 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
*/
void destroy_context(struct mm_struct *mm)
{
if (mm->context.size) {
#ifdef CONFIG_X86_32
/* CHECKME: Can this ever happen ? */
if (mm == current->active_mm)
clear_LDT();
#endif
paravirt_free_ldt(mm->context.ldt, mm->context.size);
if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(mm->context.ldt);
else
put_page(virt_to_page(mm->context.ldt));
mm->context.size = 0;
}
free_ldt_struct(mm->context.ldt);
mm->context.ldt = NULL;
}
static int read_ldt(void __user *ptr, unsigned long bytecount)
{
int err;
int retval;
unsigned long size;
struct mm_struct *mm = current->mm;
if (!mm->context.size)
return 0;
mutex_lock(&mm->context.lock);
if (!mm->context.ldt) {
retval = 0;
goto out_unlock;
}
if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
mutex_lock(&mm->context.lock);
size = mm->context.size * LDT_ENTRY_SIZE;
size = mm->context.ldt->size * LDT_ENTRY_SIZE;
if (size > bytecount)
size = bytecount;
err = 0;
if (copy_to_user(ptr, mm->context.ldt, size))
err = -EFAULT;
mutex_unlock(&mm->context.lock);
if (err < 0)
goto error_return;
if (copy_to_user(ptr, mm->context.ldt->entries, size)) {
retval = -EFAULT;
goto out_unlock;
}
if (size != bytecount) {
/* zero-fill the rest */
if (clear_user(ptr + size, bytecount - size) != 0) {
err = -EFAULT;
goto error_return;
/* Zero-fill the rest and pretend we read bytecount bytes. */
if (clear_user(ptr + size, bytecount - size)) {
retval = -EFAULT;
goto out_unlock;
}
}
return bytecount;
error_return:
return err;
retval = bytecount;
out_unlock:
mutex_unlock(&mm->context.lock);
return retval;
}
static int read_default_ldt(void __user *ptr, unsigned long bytecount)
......@@ -195,6 +210,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
struct desc_struct ldt;
int error;
struct user_desc ldt_info;
int oldsize, newsize;
struct ldt_struct *new_ldt, *old_ldt;
error = -EINVAL;
if (bytecount != sizeof(ldt_info))
......@@ -213,34 +230,39 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
goto out;
}
mutex_lock(&mm->context.lock);
if (ldt_info.entry_number >= mm->context.size) {
error = alloc_ldt(&current->mm->context,
ldt_info.entry_number + 1, 1);
if (error < 0)
goto out_unlock;
}
/* Allow LDTs to be cleared by the user. */
if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
if (oldmode || LDT_empty(&ldt_info)) {
memset(&ldt, 0, sizeof(ldt));
goto install;
if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) ||
LDT_empty(&ldt_info)) {
/* The user wants to clear the entry. */
memset(&ldt, 0, sizeof(ldt));
} else {
if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
error = -EINVAL;
goto out;
}
fill_ldt(&ldt, &ldt_info);
if (oldmode)
ldt.avl = 0;
}
if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
error = -EINVAL;
mutex_lock(&mm->context.lock);
old_ldt = mm->context.ldt;
oldsize = old_ldt ? old_ldt->size : 0;
newsize = max((int)(ldt_info.entry_number + 1), oldsize);
error = -ENOMEM;
new_ldt = alloc_ldt_struct(newsize);
if (!new_ldt)
goto out_unlock;
}
fill_ldt(&ldt, &ldt_info);
if (oldmode)
ldt.avl = 0;
if (old_ldt)
memcpy(new_ldt->entries, old_ldt->entries, oldsize * LDT_ENTRY_SIZE);
new_ldt->entries[ldt_info.entry_number] = ldt;
finalize_ldt_struct(new_ldt);
/* Install the new entry ... */
install:
write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt);
install_ldt(mm, new_ldt);
free_ldt_struct(old_ldt);
error = 0;
out_unlock:
......
......@@ -121,11 +121,11 @@ void __show_regs(struct pt_regs *regs, int all)
void release_thread(struct task_struct *dead_task)
{
if (dead_task->mm) {
if (dead_task->mm->context.size) {
if (dead_task->mm->context.ldt) {
pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
dead_task->comm,
dead_task->mm->context.ldt,
dead_task->mm->context.size);
dead_task->mm->context.ldt->size);
BUG();
}
}
......
......@@ -5,6 +5,7 @@
#include <linux/mm.h>
#include <linux/ptrace.h>
#include <asm/desc.h>
#include <asm/mmu_context.h>
unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs)
{
......@@ -30,10 +31,11 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
seg &= ~7UL;
mutex_lock(&child->mm->context.lock);
if (unlikely((seg >> 3) >= child->mm->context.size))
if (unlikely(!child->mm->context.ldt ||
(seg >> 3) >= child->mm->context.ldt->size))
addr = -1L; /* bogus selector, access would fault */
else {
desc = child->mm->context.ldt + seg;
desc = &child->mm->context.ldt->entries[seg];
base = get_desc_base(desc);
/* 16-bit code segment? */
......
......@@ -22,6 +22,7 @@
#include <asm/fpu/internal.h>
#include <asm/debugreg.h>
#include <asm/cpu.h>
#include <asm/mmu_context.h>
#ifdef CONFIG_X86_32
__visible unsigned long saved_context_ebx;
......@@ -153,7 +154,7 @@ static void fix_processor_context(void)
syscall_init(); /* This sets MSR_*STAR and related */
#endif
load_TR_desc(); /* This does ltr */
load_LDT(&current->active_mm->context); /* This does lldt */
load_mm_ldt(current->active_mm); /* This does lldt */
fpu__resume_cpu();
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册