提交 536e89ee 编写于 作者: L Linus Torvalds

Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Ingo Molnar:
 "Misc fixes (mainly Andy's TLS fixes), plus a cleanup"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/tls: Disallow unusual TLS segments
  x86/tls: Validate TLS entries to protect espfix
  MAINTAINERS: Add me as x86 VDSO submaintainer
  x86/asm: Unify segment selector defines
  x86/asm: Guard against building the 32/64-bit versions of the asm-offsets*.c file directly
  x86_64, switch_to(): Load TLS descriptors before switching DS and ES
  x86/mm: Use min() instead of min_t() in the e820 printout code
  x86/mm: Fix zone ranges boot printout
  x86/doc: Update documentation after file shuffling
...@@ -7,9 +7,12 @@ http://lkml.kernel.org/r/<20110529191055.GC9835%40elte.hu> ...@@ -7,9 +7,12 @@ http://lkml.kernel.org/r/<20110529191055.GC9835%40elte.hu>
The x86 architecture has quite a few different ways to jump into The x86 architecture has quite a few different ways to jump into
kernel code. Most of these entry points are registered in kernel code. Most of these entry points are registered in
arch/x86/kernel/traps.c and implemented in arch/x86/kernel/entry_64.S arch/x86/kernel/traps.c and implemented in arch/x86/kernel/entry_64.S
and arch/x86/ia32/ia32entry.S. for 64-bit, arch/x86/kernel/entry_32.S for 32-bit and finally
arch/x86/ia32/ia32entry.S which implements the 32-bit compatibility
syscall entry points and thus provides for 32-bit processes the
ability to execute syscalls when running on 64-bit kernels.
The IDT vector assignments are listed in arch/x86/include/irq_vectors.h. The IDT vector assignments are listed in arch/x86/include/asm/irq_vectors.h.
Some of these entries are: Some of these entries are:
......
...@@ -10485,6 +10485,13 @@ L: linux-edac@vger.kernel.org ...@@ -10485,6 +10485,13 @@ L: linux-edac@vger.kernel.org
S: Maintained S: Maintained
F: arch/x86/kernel/cpu/mcheck/* F: arch/x86/kernel/cpu/mcheck/*
X86 VDSO
M: Andy Lutomirski <luto@amacapital.net>
L: linux-kernel@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/vdso
S: Maintained
F: arch/x86/vdso/
XC2028/3028 TUNER DRIVER XC2028/3028 TUNER DRIVER
M: Mauro Carvalho Chehab <mchehab@osg.samsung.com> M: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
L: linux-media@vger.kernel.org L: linux-media@vger.kernel.org
......
...@@ -70,7 +70,7 @@ ...@@ -70,7 +70,7 @@
#define MAX_DMA_CHANNELS 8 #define MAX_DMA_CHANNELS 8
/* 16MB ISA DMA zone */ /* 16MB ISA DMA zone */
#define MAX_DMA_PFN ((16 * 1024 * 1024) >> PAGE_SHIFT) #define MAX_DMA_PFN ((16UL * 1024 * 1024) >> PAGE_SHIFT)
/* 4GB broken PCI/AGP hardware bus master zone */ /* 4GB broken PCI/AGP hardware bus master zone */
#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) #define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
......
...@@ -23,6 +23,15 @@ ...@@ -23,6 +23,15 @@
#define GDT_ENTRY_BOOT_TSS (GDT_ENTRY_BOOT_CS + 2) #define GDT_ENTRY_BOOT_TSS (GDT_ENTRY_BOOT_CS + 2)
#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS * 8) #define __BOOT_TSS (GDT_ENTRY_BOOT_TSS * 8)
#define SEGMENT_RPL_MASK 0x3 /*
* Bottom two bits of selector give the ring
* privilege level
*/
#define SEGMENT_TI_MASK 0x4 /* Bit 2 is table indicator (LDT/GDT) */
#define USER_RPL 0x3 /* User mode is privilege level 3 */
#define SEGMENT_LDT 0x4 /* LDT segment has TI set... */
#define SEGMENT_GDT 0x0 /* ... GDT has it cleared */
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
/* /*
* The layout of the per-CPU GDT under Linux: * The layout of the per-CPU GDT under Linux:
...@@ -125,16 +134,6 @@ ...@@ -125,16 +134,6 @@
#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */ #define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */
#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */ #define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */
/* Bottom two bits of selector give the ring privilege level */
#define SEGMENT_RPL_MASK 0x3
/* Bit 2 is table indicator (LDT/GDT) */
#define SEGMENT_TI_MASK 0x4
/* User mode is privilege level 3 */
#define USER_RPL 0x3
/* LDT segment has TI set, GDT has it cleared */
#define SEGMENT_LDT 0x4
#define SEGMENT_GDT 0x0
/* /*
* Matching rules for certain types of segments. * Matching rules for certain types of segments.
...@@ -192,17 +191,6 @@ ...@@ -192,17 +191,6 @@
#define get_kernel_rpl() 0 #define get_kernel_rpl() 0
#endif #endif
/* User mode is privilege level 3 */
#define USER_RPL 0x3
/* LDT segment has TI set, GDT has it cleared */
#define SEGMENT_LDT 0x4
#define SEGMENT_GDT 0x0
/* Bottom two bits of selector give the ring privilege level */
#define SEGMENT_RPL_MASK 0x3
/* Bit 2 is table indicator (LDT/GDT) */
#define SEGMENT_TI_MASK 0x4
#define IDT_ENTRIES 256 #define IDT_ENTRIES 256
#define NUM_EXCEPTION_VECTORS 32 #define NUM_EXCEPTION_VECTORS 32
/* Bitmask of exception vectors which push an error code on the stack */ /* Bitmask of exception vectors which push an error code on the stack */
......
#ifndef __LINUX_KBUILD_H
# error "Please do not build this file directly, build asm-offsets.c instead"
#endif
#include <asm/ucontext.h> #include <asm/ucontext.h>
#include <linux/lguest.h> #include <linux/lguest.h>
......
#ifndef __LINUX_KBUILD_H
# error "Please do not build this file directly, build asm-offsets.c instead"
#endif
#include <asm/ia32.h> #include <asm/ia32.h>
#define __SYSCALL_64(nr, sym, compat) [nr] = 1, #define __SYSCALL_64(nr, sym, compat) [nr] = 1,
......
...@@ -1114,8 +1114,8 @@ void __init memblock_find_dma_reserve(void) ...@@ -1114,8 +1114,8 @@ void __init memblock_find_dma_reserve(void)
* at first, and assume boot_mem will not take below MAX_DMA_PFN * at first, and assume boot_mem will not take below MAX_DMA_PFN
*/ */
for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
start_pfn = min_t(unsigned long, start_pfn, MAX_DMA_PFN); start_pfn = min(start_pfn, MAX_DMA_PFN);
end_pfn = min_t(unsigned long, end_pfn, MAX_DMA_PFN); end_pfn = min(end_pfn, MAX_DMA_PFN);
nr_pages += end_pfn - start_pfn; nr_pages += end_pfn - start_pfn;
} }
......
...@@ -283,24 +283,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -283,24 +283,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
fpu = switch_fpu_prepare(prev_p, next_p, cpu); fpu = switch_fpu_prepare(prev_p, next_p, cpu);
/* /* Reload esp0 and ss1. */
* Reload esp0, LDT and the page table pointer:
*/
load_sp0(tss, next); load_sp0(tss, next);
/*
* Switch DS and ES.
* This won't pick up thread selector changes, but I guess that is ok.
*/
savesegment(es, prev->es);
if (unlikely(next->es | prev->es))
loadsegment(es, next->es);
savesegment(ds, prev->ds);
if (unlikely(next->ds | prev->ds))
loadsegment(ds, next->ds);
/* We must save %fs and %gs before load_TLS() because /* We must save %fs and %gs before load_TLS() because
* %fs and %gs may be cleared by load_TLS(). * %fs and %gs may be cleared by load_TLS().
* *
...@@ -309,41 +294,101 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -309,41 +294,101 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
savesegment(fs, fsindex); savesegment(fs, fsindex);
savesegment(gs, gsindex); savesegment(gs, gsindex);
/*
* Load TLS before restoring any segments so that segment loads
* reference the correct GDT entries.
*/
load_TLS(next, cpu); load_TLS(next, cpu);
/* /*
* Leave lazy mode, flushing any hypercalls made here. * Leave lazy mode, flushing any hypercalls made here. This
* This must be done before restoring TLS segments so * must be done after loading TLS entries in the GDT but before
* the GDT and LDT are properly updated, and must be * loading segments that might reference them, and and it must
* done before math_state_restore, so the TS bit is up * be done before math_state_restore, so the TS bit is up to
* to date. * date.
*/ */
arch_end_context_switch(next_p); arch_end_context_switch(next_p);
/* Switch DS and ES.
*
* Reading them only returns the selectors, but writing them (if
* nonzero) loads the full descriptor from the GDT or LDT. The
* LDT for next is loaded in switch_mm, and the GDT is loaded
* above.
*
* We therefore need to write new values to the segment
* registers on every context switch unless both the new and old
* values are zero.
*
* Note that we don't need to do anything for CS and SS, as
* those are saved and restored as part of pt_regs.
*/
savesegment(es, prev->es);
if (unlikely(next->es | prev->es))
loadsegment(es, next->es);
savesegment(ds, prev->ds);
if (unlikely(next->ds | prev->ds))
loadsegment(ds, next->ds);
/* /*
* Switch FS and GS. * Switch FS and GS.
* *
* Segment register != 0 always requires a reload. Also * These are even more complicated than FS and GS: they have
* reload when it has changed. When prev process used 64bit * 64-bit bases are that controlled by arch_prctl. Those bases
* base always reload to avoid an information leak. * only differ from the values in the GDT or LDT if the selector
* is 0.
*
* Loading the segment register resets the hidden base part of
* the register to 0 or the value from the GDT / LDT. If the
* next base address zero, writing 0 to the segment register is
* much faster than using wrmsr to explicitly zero the base.
*
* The thread_struct.fs and thread_struct.gs values are 0
* if the fs and gs bases respectively are not overridden
* from the values implied by fsindex and gsindex. They
* are nonzero, and store the nonzero base addresses, if
* the bases are overridden.
*
* (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) should
* be impossible.
*
* Therefore we need to reload the segment registers if either
* the old or new selector is nonzero, and we need to override
* the base address if next thread expects it to be overridden.
*
* This code is unnecessarily slow in the case where the old and
* new indexes are zero and the new base is nonzero -- it will
* unnecessarily write 0 to the selector before writing the new
* base address.
*
* Note: This all depends on arch_prctl being the only way that
* user code can override the segment base. Once wrfsbase and
* wrgsbase are enabled, most of this code will need to change.
*/ */
if (unlikely(fsindex | next->fsindex | prev->fs)) { if (unlikely(fsindex | next->fsindex | prev->fs)) {
loadsegment(fs, next->fsindex); loadsegment(fs, next->fsindex);
/* /*
* Check if the user used a selector != 0; if yes * If user code wrote a nonzero value to FS, then it also
* clear 64bit base, since overloaded base is always * cleared the overridden base address.
* mapped to the Null selector *
* XXX: if user code wrote 0 to FS and cleared the base
* address itself, we won't notice and we'll incorrectly
* restore the prior base address next time we reschdule
* the process.
*/ */
if (fsindex) if (fsindex)
prev->fs = 0; prev->fs = 0;
} }
/* when next process has a 64bit base use it */
if (next->fs) if (next->fs)
wrmsrl(MSR_FS_BASE, next->fs); wrmsrl(MSR_FS_BASE, next->fs);
prev->fsindex = fsindex; prev->fsindex = fsindex;
if (unlikely(gsindex | next->gsindex | prev->gs)) { if (unlikely(gsindex | next->gsindex | prev->gs)) {
load_gs_index(next->gsindex); load_gs_index(next->gsindex);
/* This works (and fails) the same way as fsindex above. */
if (gsindex) if (gsindex)
prev->gs = 0; prev->gs = 0;
} }
......
...@@ -27,6 +27,43 @@ static int get_free_idx(void) ...@@ -27,6 +27,43 @@ static int get_free_idx(void)
return -ESRCH; return -ESRCH;
} }
static bool tls_desc_okay(const struct user_desc *info)
{
if (LDT_empty(info))
return true;
/*
* espfix is required for 16-bit data segments, but espfix
* only works for LDT segments.
*/
if (!info->seg_32bit)
return false;
/* Only allow data segments in the TLS array. */
if (info->contents > 1)
return false;
/*
* Non-present segments with DPL 3 present an interesting attack
* surface. The kernel should handle such segments correctly,
* but TLS is very difficult to protect in a sandbox, so prevent
* such segments from being created.
*
* If userspace needs to remove a TLS entry, it can still delete
* it outright.
*/
if (info->seg_not_present)
return false;
#ifdef CONFIG_X86_64
/* The L bit makes no sense for data. */
if (info->lm)
return false;
#endif
return true;
}
static void set_tls_desc(struct task_struct *p, int idx, static void set_tls_desc(struct task_struct *p, int idx,
const struct user_desc *info, int n) const struct user_desc *info, int n)
{ {
...@@ -66,6 +103,9 @@ int do_set_thread_area(struct task_struct *p, int idx, ...@@ -66,6 +103,9 @@ int do_set_thread_area(struct task_struct *p, int idx,
if (copy_from_user(&info, u_info, sizeof(info))) if (copy_from_user(&info, u_info, sizeof(info)))
return -EFAULT; return -EFAULT;
if (!tls_desc_okay(&info))
return -EINVAL;
if (idx == -1) if (idx == -1)
idx = info.entry_number; idx = info.entry_number;
...@@ -192,6 +232,7 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset, ...@@ -192,6 +232,7 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset,
{ {
struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES]; struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES];
const struct user_desc *info; const struct user_desc *info;
int i;
if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
(pos % sizeof(struct user_desc)) != 0 || (pos % sizeof(struct user_desc)) != 0 ||
...@@ -205,6 +246,10 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset, ...@@ -205,6 +246,10 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset,
else else
info = infobuf; info = infobuf;
for (i = 0; i < count / sizeof(struct user_desc); i++)
if (!tls_desc_okay(info + i))
return -EINVAL;
set_tls_desc(target, set_tls_desc(target,
GDT_ENTRY_TLS_MIN + (pos / sizeof(struct user_desc)), GDT_ENTRY_TLS_MIN + (pos / sizeof(struct user_desc)),
info, count / sizeof(struct user_desc)); info, count / sizeof(struct user_desc));
......
...@@ -703,10 +703,10 @@ void __init zone_sizes_init(void) ...@@ -703,10 +703,10 @@ void __init zone_sizes_init(void)
memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
#ifdef CONFIG_ZONE_DMA #ifdef CONFIG_ZONE_DMA
max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; max_zone_pfns[ZONE_DMA] = min(MAX_DMA_PFN, max_low_pfn);
#endif #endif
#ifdef CONFIG_ZONE_DMA32 #ifdef CONFIG_ZONE_DMA32
max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; max_zone_pfns[ZONE_DMA32] = min(MAX_DMA32_PFN, max_low_pfn);
#endif #endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn; max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_HIGHMEM #ifdef CONFIG_HIGHMEM
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册