diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index 93d568dfa762168c2baf2b391e7fe255beff77e1..99b522052d16d9cae2f3c07f11afd9c3cdfb59e9 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c @@ -312,6 +312,10 @@ MODULE_AUTHOR("Eric Youngdale, Andi Kleen"); static void elf32_init(struct pt_regs *); +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 +#define arch_setup_additional_pages syscall32_setup_pages +extern int syscall32_setup_pages(struct linux_binprm *, int exstack); + #include "../../../fs/binfmt_elf.c" static void elf32_init(struct pt_regs *regs) diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c index 399ff498509978c834420c82bc539ed4bfe6b268..01d8db1a1c09c7bff982a740bf5222735e996893 100644 --- a/arch/x86_64/ia32/syscall32.c +++ b/arch/x86_64/ia32/syscall32.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -30,51 +31,57 @@ extern int sysctl_vsyscall32; char *syscall32_page; static int use_sysenter = -1; -/* - * Map the 32bit vsyscall page on demand. - * - * RED-PEN: This knows too much about high level VM. - * - * Alternative would be to generate a vma with appropriate backing options - * and let it be handled by generic VM. - */ -int __map_syscall32(struct mm_struct *mm, unsigned long address) -{ - pgd_t *pgd; - pud_t *pud; - pte_t *pte; - pmd_t *pmd; - int err = -ENOMEM; - - spin_lock(&mm->page_table_lock); - pgd = pgd_offset(mm, address); - pud = pud_alloc(mm, pgd, address); - if (pud) { - pmd = pmd_alloc(mm, pud, address); - if (pmd && (pte = pte_alloc_map(mm, pmd, address)) != NULL) { - if (pte_none(*pte)) { - set_pte(pte, - mk_pte(virt_to_page(syscall32_page), - PAGE_KERNEL_VSYSCALL32)); - } - /* Flush only the local CPU. Other CPUs taking a fault - will just end up here again - This probably not needed and just paranoia. */ - __flush_tlb_one(address); - err = 0; - } - } - spin_unlock(&mm->page_table_lock); - return err; +static struct page * +syscall32_nopage(struct vm_area_struct *vma, unsigned long adr, int *type) +{ + struct page *p = virt_to_page(adr - vma->vm_start + syscall32_page); + get_page(p); + return p; } -int map_syscall32(struct mm_struct *mm, unsigned long address) +/* Prevent VMA merging */ +static void syscall32_vma_close(struct vm_area_struct *vma) { - int err; - down_read(&mm->mmap_sem); - err = __map_syscall32(mm, address); - up_read(&mm->mmap_sem); - return err; +} + +static struct vm_operations_struct syscall32_vm_ops = { + .close = syscall32_vma_close, + .nopage = syscall32_nopage, +}; + +struct linux_binprm; + +/* Setup a VMA at program startup for the vsyscall page */ +int syscall32_setup_pages(struct linux_binprm *bprm, int exstack) +{ + int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT; + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (!vma) + return -ENOMEM; + if (security_vm_enough_memory(npages)) { + kmem_cache_free(vm_area_cachep, vma); + return -ENOMEM; + } + + memset(vma, 0, sizeof(struct vm_area_struct)); + /* Could randomize here */ + vma->vm_start = VSYSCALL32_BASE; + vma->vm_end = VSYSCALL32_END; + /* MAYWRITE to allow gdb to COW and set breakpoints */ + vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYEXEC|VM_MAYWRITE; + vma->vm_flags |= mm->def_flags; + vma->vm_page_prot = protection_map[vma->vm_flags & 7]; + vma->vm_ops = &syscall32_vm_ops; + vma->vm_mm = mm; + + down_write(&mm->mmap_sem); + insert_vm_struct(mm, vma); + mm->total_vm += npages; + up_write(&mm->mmap_sem); + return 0; } static int __init init_syscall32(void) @@ -82,7 +89,6 @@ static int __init init_syscall32(void) syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); if (!syscall32_page) panic("Cannot allocate syscall32 page"); - SetPageReserved(virt_to_page(syscall32_page)); if (use_sysenter > 0) { memcpy(syscall32_page, syscall32_sysenter, syscall32_sysenter_end - syscall32_sysenter); diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 5724370475cc10816730aeb7375e4193206f36fc..d4676ca091449bd87c075956057716f57decd406 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -458,17 +458,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) up_read(&mm->mmap_sem); bad_area_nosemaphore: - -#ifdef CONFIG_IA32_EMULATION - /* 32bit vsyscall. map on demand. */ - if (test_thread_flag(TIF_IA32) && - address >= VSYSCALL32_BASE && address < VSYSCALL32_END) { - if (map_syscall32(mm, address) < 0) - goto out_of_memory2; - return; - } -#endif - /* User mode accesses just cause a SIGSEGV */ if (error_code & 4) { if (is_prefetch(regs, address, error_code)) @@ -550,7 +539,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) */ out_of_memory: up_read(&mm->mmap_sem); -out_of_memory2: if (current->pid == 1) { yield(); goto again; diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index b0d604551d862ad474d6e49670cbc11a371a7f96..dbe53b4c7e66c0c0f75fbccfb8d404961d7eb10d 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -583,9 +583,9 @@ static __init int x8664_sysctl_init(void) __initcall(x8664_sysctl_init); #endif -/* Pseudo VMAs to allow ptrace access for the vsyscall pages. x86-64 has two - different ones: one for 32bit and one for 64bit. Use the appropiate - for the target task. */ +/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only + covers the 64bit vsyscall page now. 32bit has a real VMA now and does + not need special handling anymore. */ static struct vm_area_struct gate_vma = { .vm_start = VSYSCALL_START, @@ -593,22 +593,11 @@ static struct vm_area_struct gate_vma = { .vm_page_prot = PAGE_READONLY }; -static struct vm_area_struct gate32_vma = { - .vm_start = VSYSCALL32_BASE, - .vm_end = VSYSCALL32_END, - .vm_page_prot = PAGE_READONLY -}; - struct vm_area_struct *get_gate_vma(struct task_struct *tsk) { #ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(tsk, TIF_IA32)) { - /* lookup code assumes the pages are present. set them up - now */ - if (__map_syscall32(tsk->mm, VSYSCALL32_BASE) < 0) - return NULL; - return &gate32_vma; - } + if (test_tsk_thread_flag(tsk, TIF_IA32)) + return NULL; #endif return &gate_vma; } @@ -616,6 +605,8 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk) int in_gate_area(struct task_struct *task, unsigned long addr) { struct vm_area_struct *vma = get_gate_vma(task); + if (!vma) + return 0; return (addr >= vma->vm_start) && (addr < vma->vm_end); } @@ -625,6 +616,5 @@ int in_gate_area(struct task_struct *task, unsigned long addr) */ int in_gate_area_no_task(unsigned long addr) { - return (((addr >= VSYSCALL_START) && (addr < VSYSCALL_END)) || - ((addr >= VSYSCALL32_BASE) && (addr < VSYSCALL32_END))); + return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); } diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index c59526ee54da668b388597892c03c914abe13968..217bd9ace69b7b56a83ca0f556f043df29c6b7f1 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -69,8 +69,6 @@ extern void __die(const char * str, struct pt_regs * regs, long err); extern void __show_regs(struct pt_regs * regs); extern void show_regs(struct pt_regs * regs); -extern int map_syscall32(struct mm_struct *mm, unsigned long address); -extern int __map_syscall32(struct mm_struct *mm, unsigned long address); extern char *syscall32_page; extern void syscall32_cpu_init(void);