提交 8e4ef638 编写于 作者: L Linus Torvalds

Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 vdso updates from Ingo Molnar:
 "The main changes in this cycle centered around adding support for
  32-bit compatible C/R of the vDSO on 64-bit kernels, by Dmitry
  Safonov"

* 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/vdso: Use CONFIG_X86_X32_ABI to enable vdso prctl
  x86/vdso: Only define map_vdso_randomized() if CONFIG_X86_64
  x86/vdso: Only define prctl_map_vdso() if CONFIG_CHECKPOINT_RESTORE
  x86/signal: Add SA_{X32,IA32}_ABI sa_flags
  x86/ptrace: Down with test_thread_flag(TIF_IA32)
  x86/coredump: Use pr_reg size, rather that TIF_IA32 flag
  x86/arch_prctl/vdso: Add ARCH_MAP_VDSO_*
  x86/vdso: Replace calculate_addr in map_vdso() with addr
  x86/vdso: Unmap vdso blob on vvar mapping failure
......@@ -37,54 +37,6 @@ void __init init_vdso_image(const struct vdso_image *image)
struct linux_binprm;
/*
* Put the vdso above the (randomized) stack with another randomized
* offset. This way there is no hole in the middle of address space.
* To save memory make sure it is still in the same PTE as the stack
* top. This doesn't give that many random bits.
*
* Note that this algorithm is imperfect: the distribution of the vdso
* start address within a PMD is biased toward the end.
*
* Only used for the 64-bit and x32 vdsos.
*/
static unsigned long vdso_addr(unsigned long start, unsigned len)
{
#ifdef CONFIG_X86_32
return 0;
#else
unsigned long addr, end;
unsigned offset;
/*
* Round up the start address. It can start out unaligned as a result
* of stack start randomization.
*/
start = PAGE_ALIGN(start);
/* Round the lowest possible end address up to a PMD boundary. */
end = (start + len + PMD_SIZE - 1) & PMD_MASK;
if (end >= TASK_SIZE_MAX)
end = TASK_SIZE_MAX;
end -= len;
if (end > start) {
offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
addr = start + (offset << PAGE_SHIFT);
} else {
addr = start;
}
/*
* Forcibly align the final address in case we have a hardware
* issue that requires alignment for performance reasons.
*/
addr = align_vdso_addr(addr);
return addr;
#endif
}
static int vdso_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
......@@ -176,30 +128,28 @@ static int vvar_fault(const struct vm_special_mapping *sm,
return VM_FAULT_SIGBUS;
}
static int map_vdso(const struct vdso_image *image, bool calculate_addr)
static const struct vm_special_mapping vdso_mapping = {
.name = "[vdso]",
.fault = vdso_fault,
.mremap = vdso_mremap,
};
static const struct vm_special_mapping vvar_mapping = {
.name = "[vvar]",
.fault = vvar_fault,
};
/*
* Add vdso and vvar mappings to current process.
* @image - blob to map
* @addr - request a specific address (zero to map at free addr)
*/
static int map_vdso(const struct vdso_image *image, unsigned long addr)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long addr, text_start;
unsigned long text_start;
int ret = 0;
static const struct vm_special_mapping vdso_mapping = {
.name = "[vdso]",
.fault = vdso_fault,
.mremap = vdso_mremap,
};
static const struct vm_special_mapping vvar_mapping = {
.name = "[vvar]",
.fault = vvar_fault,
};
if (calculate_addr) {
addr = vdso_addr(current->mm->start_stack,
image->size - image->sym_vvar_start);
} else {
addr = 0;
}
if (down_write_killable(&mm->mmap_sem))
return -EINTR;
......@@ -238,24 +188,104 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto up_fail;
do_munmap(mm, text_start, image->size);
}
up_fail:
if (ret)
if (ret) {
current->mm->context.vdso = NULL;
current->mm->context.vdso_image = NULL;
}
up_write(&mm->mmap_sem);
return ret;
}
#ifdef CONFIG_X86_64
/*
* Put the vdso above the (randomized) stack with another randomized
* offset. This way there is no hole in the middle of address space.
* To save memory make sure it is still in the same PTE as the stack
* top. This doesn't give that many random bits.
*
* Note that this algorithm is imperfect: the distribution of the vdso
* start address within a PMD is biased toward the end.
*
* Only used for the 64-bit and x32 vdsos.
*/
static unsigned long vdso_addr(unsigned long start, unsigned len)
{
unsigned long addr, end;
unsigned offset;
/*
* Round up the start address. It can start out unaligned as a result
* of stack start randomization.
*/
start = PAGE_ALIGN(start);
/* Round the lowest possible end address up to a PMD boundary. */
end = (start + len + PMD_SIZE - 1) & PMD_MASK;
if (end >= TASK_SIZE_MAX)
end = TASK_SIZE_MAX;
end -= len;
if (end > start) {
offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
addr = start + (offset << PAGE_SHIFT);
} else {
addr = start;
}
/*
* Forcibly align the final address in case we have a hardware
* issue that requires alignment for performance reasons.
*/
addr = align_vdso_addr(addr);
return addr;
}
static int map_vdso_randomized(const struct vdso_image *image)
{
unsigned long addr = vdso_addr(current->mm->start_stack, image->size-image->sym_vvar_start);
return map_vdso(image, addr);
}
#endif
int map_vdso_once(const struct vdso_image *image, unsigned long addr)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
down_write(&mm->mmap_sem);
/*
* Check if we have already mapped vdso blob - fail to prevent
* abusing from userspace install_speciall_mapping, which may
* not do accounting and rlimit right.
* We could search vma near context.vdso, but it's a slowpath,
* so let's explicitely check all VMAs to be completely sure.
*/
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (vma_is_special_mapping(vma, &vdso_mapping) ||
vma_is_special_mapping(vma, &vvar_mapping)) {
up_write(&mm->mmap_sem);
return -EEXIST;
}
}
up_write(&mm->mmap_sem);
return map_vdso(image, addr);
}
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
static int load_vdso32(void)
{
if (vdso32_enabled != 1) /* Other values all mean "disabled" */
return 0;
return map_vdso(&vdso_image_32, false);
return map_vdso(&vdso_image_32, 0);
}
#endif
......@@ -265,7 +295,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
if (!vdso64_enabled)
return 0;
return map_vdso(&vdso_image_64, true);
return map_vdso_randomized(&vdso_image_64);
}
#ifdef CONFIG_COMPAT
......@@ -276,8 +306,7 @@ int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
if (test_thread_flag(TIF_X32)) {
if (!vdso64_enabled)
return 0;
return map_vdso(&vdso_image_x32, true);
return map_vdso_randomized(&vdso_image_x32);
}
#endif
#ifdef CONFIG_IA32_EMULATION
......
......@@ -378,7 +378,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode);
} put_user_catch(err);
err |= copy_siginfo_to_user32(&frame->info, &ksig->info);
err |= __copy_siginfo_to_user32(&frame->info, &ksig->info, false);
err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
......
......@@ -275,10 +275,10 @@ struct compat_shmid64_ds {
#ifdef CONFIG_X86_X32_ABI
typedef struct user_regs_struct compat_elf_gregset_t;
#define PR_REG_SIZE(S) (test_thread_flag(TIF_IA32) ? 68 : 216)
#define PRSTATUS_SIZE(S) (test_thread_flag(TIF_IA32) ? 144 : 296)
#define SET_PR_FPVALID(S,V) \
do { *(int *) (((void *) &((S)->pr_reg)) + PR_REG_SIZE(0)) = (V); } \
/* Full regset -- prstatus on x32, otherwise on ia32 */
#define PRSTATUS_SIZE(S, R) (R != sizeof(S.pr_reg) ? 144 : 296)
#define SET_PR_FPVALID(S, V, R) \
do { *(int *) (((void *) &((S)->pr_reg)) + R) = (V); } \
while (0)
#define COMPAT_USE_64BIT_TIME \
......
......@@ -19,6 +19,12 @@ int ia32_setup_frame(int sig, struct ksignal *ksig,
# define ia32_setup_rt_frame __setup_rt_frame
#endif
#ifdef CONFIG_COMPAT
int __copy_siginfo_to_user32(compat_siginfo_t __user *to,
const siginfo_t *from, bool x32_ABI);
#endif
extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
struct task_struct *tsk);
extern void convert_to_fxsr(struct task_struct *tsk,
......
......@@ -23,6 +23,10 @@ typedef struct {
unsigned long sig[_NSIG_WORDS];
} sigset_t;
/* non-uapi in-kernel SA_FLAGS for those indicates ABI for a signal frame */
#define SA_IA32_ABI 0x02000000u
#define SA_X32_ABI 0x01000000u
#ifndef CONFIG_COMPAT
typedef sigset_t compat_sigset_t;
#endif
......
......@@ -41,6 +41,8 @@ extern const struct vdso_image vdso_image_32;
extern void __init init_vdso_image(const struct vdso_image *image);
extern int map_vdso_once(const struct vdso_image *image, unsigned long addr);
#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_VDSO_H */
......@@ -6,4 +6,10 @@
#define ARCH_GET_FS 0x1003
#define ARCH_GET_GS 0x1004
#ifdef CONFIG_CHECKPOINT_RESTORE
# define ARCH_MAP_VDSO_X32 0x2001
# define ARCH_MAP_VDSO_32 0x2002
# define ARCH_MAP_VDSO_64 0x2003
#endif
#endif /* _ASM_X86_PRCTL_H */
......@@ -49,6 +49,7 @@
#include <asm/debugreg.h>
#include <asm/switch_to.h>
#include <asm/xen/hypervisor.h>
#include <asm/vdso.h>
__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
......@@ -523,6 +524,19 @@ void set_personality_ia32(bool x32)
}
EXPORT_SYMBOL_GPL(set_personality_ia32);
#ifdef CONFIG_CHECKPOINT_RESTORE
static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
{
int ret;
ret = map_vdso_once(image, addr);
if (ret)
return ret;
return (long)image->size;
}
#endif
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
{
int ret = 0;
......@@ -576,6 +590,19 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
break;
}
#ifdef CONFIG_CHECKPOINT_RESTORE
# ifdef CONFIG_X86_X32_ABI
case ARCH_MAP_VDSO_X32:
return prctl_map_vdso(&vdso_image_x32, addr);
# endif
# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
case ARCH_MAP_VDSO_32:
return prctl_map_vdso(&vdso_image_32, addr);
# endif
case ARCH_MAP_VDSO_64:
return prctl_map_vdso(&vdso_image_64, addr);
#endif
default:
ret = -EINVAL;
break;
......
......@@ -1358,7 +1358,7 @@ void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask)
const struct user_regset_view *task_user_regset_view(struct task_struct *task)
{
#ifdef CONFIG_IA32_EMULATION
if (test_tsk_thread_flag(task, TIF_IA32))
if (!user_64bit_mode(task_pt_regs(task)))
#endif
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
return &user_x86_32_view;
......
......@@ -42,6 +42,7 @@
#include <asm/syscalls.h>
#include <asm/sigframe.h>
#include <asm/signal.h>
#define COPY(x) do { \
get_user_ex(regs->x, &sc->x); \
......@@ -547,7 +548,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
return -EFAULT;
if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
if (copy_siginfo_to_user32(&frame->info, &ksig->info))
if (__copy_siginfo_to_user32(&frame->info, &ksig->info, true))
return -EFAULT;
}
......@@ -660,20 +661,21 @@ asmlinkage long sys_rt_sigreturn(void)
return 0;
}
static inline int is_ia32_compat_frame(void)
static inline int is_ia32_compat_frame(struct ksignal *ksig)
{
return IS_ENABLED(CONFIG_IA32_EMULATION) &&
test_thread_flag(TIF_IA32);
ksig->ka.sa.sa_flags & SA_IA32_ABI;
}
static inline int is_ia32_frame(void)
static inline int is_ia32_frame(struct ksignal *ksig)
{
return IS_ENABLED(CONFIG_X86_32) || is_ia32_compat_frame();
return IS_ENABLED(CONFIG_X86_32) || is_ia32_compat_frame(ksig);
}
static inline int is_x32_frame(void)
static inline int is_x32_frame(struct ksignal *ksig)
{
return IS_ENABLED(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
return IS_ENABLED(CONFIG_X86_X32_ABI) &&
ksig->ka.sa.sa_flags & SA_X32_ABI;
}
static int
......@@ -684,12 +686,12 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
compat_sigset_t *cset = (compat_sigset_t *) set;
/* Set up the stack frame */
if (is_ia32_frame()) {
if (is_ia32_frame(ksig)) {
if (ksig->ka.sa.sa_flags & SA_SIGINFO)
return ia32_setup_rt_frame(usig, ksig, cset, regs);
else
return ia32_setup_frame(usig, ksig, cset, regs);
} else if (is_x32_frame()) {
} else if (is_x32_frame(ksig)) {
return x32_setup_rt_frame(ksig, cset, regs);
} else {
return __setup_rt_frame(ksig->sig, ksig, set, regs);
......
#include <linux/compat.h>
#include <linux/uaccess.h>
#include <linux/ptrace.h>
/*
* The compat_siginfo_t structure and handing code is very easy
......@@ -92,10 +93,31 @@ static inline void signal_compat_build_tests(void)
/* any new si_fields should be added here */
}
int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact)
{
/* Don't leak in-kernel non-uapi flags to user-space */
if (oact)
oact->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI);
if (!act)
return;
/* Don't let flags to be set from userspace */
act->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI);
if (user_64bit_mode(current_pt_regs()))
return;
if (in_ia32_syscall())
act->sa.sa_flags |= SA_IA32_ABI;
if (in_x32_syscall())
act->sa.sa_flags |= SA_X32_ABI;
}
int __copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from,
bool x32_ABI)
{
int err = 0;
bool ia32 = test_thread_flag(TIF_IA32);
signal_compat_build_tests();
......@@ -146,7 +168,7 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
put_user_ex(from->si_arch, &to->si_arch);
break;
case __SI_CHLD >> 16:
if (ia32) {
if (!x32_ABI) {
put_user_ex(from->si_utime, &to->si_utime);
put_user_ex(from->si_stime, &to->si_stime);
} else {
......@@ -180,6 +202,12 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
return err;
}
/* from syscall's path, where we know the ABI */
int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
{
return __copy_siginfo_to_user32(to, from, in_x32_syscall());
}
int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
{
int err = 0;
......
......@@ -1624,20 +1624,12 @@ static void do_thread_regset_writeback(struct task_struct *task,
regset->writeback(task, regset, 1);
}
#ifndef PR_REG_SIZE
#define PR_REG_SIZE(S) sizeof(S)
#endif
#ifndef PRSTATUS_SIZE
#define PRSTATUS_SIZE(S) sizeof(S)
#endif
#ifndef PR_REG_PTR
#define PR_REG_PTR(S) (&((S)->pr_reg))
#define PRSTATUS_SIZE(S, R) sizeof(S)
#endif
#ifndef SET_PR_FPVALID
#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
#define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
#endif
static int fill_thread_core_info(struct elf_thread_core_info *t,
......@@ -1645,6 +1637,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
long signr, size_t *total)
{
unsigned int i;
unsigned int regset_size = view->regsets[0].n * view->regsets[0].size;
/*
* NT_PRSTATUS is the one special case, because the regset data
......@@ -1653,12 +1646,11 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
* We assume that regset 0 is NT_PRSTATUS.
*/
fill_prstatus(&t->prstatus, t->task, signr);
(void) view->regsets[0].get(t->task, &view->regsets[0],
0, PR_REG_SIZE(t->prstatus.pr_reg),
PR_REG_PTR(&t->prstatus), NULL);
(void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset_size,
&t->prstatus.pr_reg, NULL);
fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
PRSTATUS_SIZE(t->prstatus), &t->prstatus);
PRSTATUS_SIZE(t->prstatus, regset_size), &t->prstatus);
*total += notesize(&t->notes[0]);
do_thread_regset_writeback(t->task, &view->regsets[0]);
......@@ -1688,7 +1680,8 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
regset->core_note_type,
size, data);
else {
SET_PR_FPVALID(&t->prstatus, 1);
SET_PR_FPVALID(&t->prstatus,
1, regset_size);
fill_note(&t->notes[i], "CORE",
NT_PRFPREG, size, data);
}
......
......@@ -2019,6 +2019,8 @@ extern struct file *get_task_exe_file(struct task_struct *task);
extern bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long npages);
extern void vm_stat_account(struct mm_struct *, vm_flags_t, long npages);
extern bool vma_is_special_mapping(const struct vm_area_struct *vma,
const struct vm_special_mapping *sm);
extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
unsigned long addr, unsigned long len,
unsigned long flags,
......
......@@ -3044,6 +3044,11 @@ void kernel_sigaction(int sig, __sighandler_t action)
}
EXPORT_SYMBOL(kernel_sigaction);
void __weak sigaction_compat_abi(struct k_sigaction *act,
struct k_sigaction *oact)
{
}
int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
{
struct task_struct *p = current, *t;
......@@ -3059,6 +3064,8 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
if (oact)
*oact = *k;
sigaction_compat_abi(act, oact);
if (act) {
sigdelsetmask(&act->sa.sa_mask,
sigmask(SIGKILL) | sigmask(SIGSTOP));
......
......@@ -3068,6 +3068,14 @@ static struct vm_area_struct *__install_special_mapping(
return ERR_PTR(ret);
}
bool vma_is_special_mapping(const struct vm_area_struct *vma,
const struct vm_special_mapping *sm)
{
return vma->vm_private_data == sm &&
(vma->vm_ops == &special_mapping_vmops ||
vma->vm_ops == &legacy_special_mapping_vmops);
}
/*
* Called with mm->mmap_sem held for writing.
* Insert a new vma covering the given region, with the given flags.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册