提交 87d59863 编写于 作者: H Heiko Carstens

s390/mm: remove set_fs / rework address space handling

Remove set_fs support from s390. With doing this rework address space
handling and simplify it. As a result address spaces are now setup
like this:

CPU running in              | %cr1 ASCE | %cr7 ASCE | %cr13 ASCE
----------------------------|-----------|-----------|-----------
user space                  |  user     |  user     |  kernel
kernel, normal execution    |  kernel   |  user     |  kernel
kernel, kvm guest execution |  gmap     |  user     |  kernel

To achieve this the getcpu vdso syscall is removed in order to avoid
secondary address mode and a separate vdso address space in for user
space. The getcpu vdso syscall will be implemented differently with a
subsequent patch.

The kernel accesses user space always via secondary address space.
This happens in different ways:
- with mvcos in home space mode and directly read/write to secondary
  address space
- with mvcs/mvcp in primary space mode and copy from primary space to
  secondary space or vice versa
- with e.g. cs in secondary space mode and access secondary space

Switching translation modes happens with sacf before and after
instructions which access user space, like before.

Lazy handling of control register reloading is removed in the hope to
make everything simpler, but at the cost of making kernel entry and
exit a bit slower. That is: on kernel entry the primary asce is always
changed to contain the kernel asce, and on kernel exit the primary
asce is changed again so it contains the user asce.

In kernel mode there is only one exception to the primary asce: when
kvm guests are executed the primary asce contains the gmap asce (which
describes the guest address space). The primary asce is reset to
kernel asce whenever kvm guest execution is interrupted, so that this
doesn't has to be taken into account for any user space accesses.
Reviewed-by: NSven Schnelle <svens@linux.ibm.com>
Signed-off-by: NHeiko Carstens <hca@linux.ibm.com>
上级 77663819
......@@ -190,7 +190,6 @@ config S390
select PCI_DOMAINS if PCI
select PCI_MSI if PCI
select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
select SET_FS
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
......
......@@ -26,9 +26,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
u32 __user *uaddr)
{
int oldval = 0, newval, ret;
mm_segment_t old_fs;
old_fs = enable_sacf_uaccess();
switch (op) {
case FUTEX_OP_SET:
__futex_atomic_op("lr %2,%5\n",
......@@ -53,7 +51,6 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
default:
ret = -ENOSYS;
}
disable_sacf_uaccess(old_fs);
if (!ret)
*oval = oldval;
......@@ -64,10 +61,8 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
u32 oldval, u32 newval)
{
mm_segment_t old_fs;
int ret;
old_fs = enable_sacf_uaccess();
asm volatile(
" sacf 256\n"
"0: cs %1,%4,0(%5)\n"
......@@ -77,7 +72,6 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
: "=d" (ret), "+d" (oldval), "=m" (*uaddr)
: "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
: "cc", "memory");
disable_sacf_uaccess(old_fs);
*uval = oldval;
return ret;
}
......
......@@ -116,7 +116,7 @@ struct lowcore {
/* Address space pointer. */
__u64 kernel_asce; /* 0x0380 */
__u64 user_asce; /* 0x0388 */
__u64 vdso_asce; /* 0x0390 */
__u8 pad_0x0390[0x0398-0x0390]; /* 0x0390 */
/*
* The lpp and current_pid fields form a
......@@ -134,7 +134,7 @@ struct lowcore {
__u32 spinlock_index; /* 0x03b0 */
__u32 fpu_flags; /* 0x03b4 */
__u64 percpu_offset; /* 0x03b8 */
__u64 vdso_per_cpu_data; /* 0x03c0 */
__u8 pad_0x03c0[0x03c8-0x03c0]; /* 0x03c0 */
__u64 machine_flags; /* 0x03c8 */
__u64 gmap; /* 0x03d0 */
__u8 pad_0x03d8[0x0400-0x03d8]; /* 0x03d8 */
......
......@@ -71,16 +71,6 @@ static inline int init_new_context(struct task_struct *tsk,
#define destroy_context(mm) do { } while (0)
static inline void set_user_asce(struct mm_struct *mm)
{
S390_lowcore.user_asce = mm->context.asce;
__ctl_load(S390_lowcore.user_asce, 1, 1);
clear_cpu_flag(CIF_ASCE_PRIMARY);
}
mm_segment_t enable_sacf_uaccess(void);
void disable_sacf_uaccess(mm_segment_t old_fs);
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
......@@ -88,15 +78,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
S390_lowcore.user_asce = next->context.asce;
cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
/* Clear previous user-ASCE from CR1 and CR7 */
if (!test_cpu_flag(CIF_ASCE_PRIMARY)) {
__ctl_load(S390_lowcore.kernel_asce, 1, 1);
set_cpu_flag(CIF_ASCE_PRIMARY);
}
if (test_cpu_flag(CIF_ASCE_SECONDARY)) {
__ctl_load(S390_lowcore.vdso_asce, 7, 7);
clear_cpu_flag(CIF_ASCE_SECONDARY);
}
/* Clear previous user-ASCE from CR7 */
__ctl_load(S390_lowcore.kernel_asce, 7, 7);
if (prev != next)
cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
}
......@@ -115,7 +98,7 @@ static inline void finish_arch_post_lock_switch(void)
__tlb_flush_mm_lazy(mm);
preempt_enable();
}
set_fs(current->thread.mm_segment);
__ctl_load(S390_lowcore.user_asce, 7, 7);
}
#define enter_lazy_tlb(mm,tsk) do { } while (0)
......@@ -126,7 +109,7 @@ static inline void activate_mm(struct mm_struct *prev,
{
switch_mm(prev, next, current);
cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
set_user_asce(next);
__ctl_load(S390_lowcore.user_asce, 7, 7);
}
#endif /* __S390_MMU_CONTEXT_H */
......@@ -14,8 +14,6 @@
#include <linux/bits.h>
#define CIF_ASCE_PRIMARY 0 /* primary asce needs fixup / uaccess */
#define CIF_ASCE_SECONDARY 1 /* secondary asce needs fixup / uaccess */
#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
#define CIF_FPU 3 /* restore FPU registers */
#define CIF_IGNORE_IRQ 4 /* ignore interrupt (for udelay) */
......@@ -23,8 +21,6 @@
#define CIF_MCCK_GUEST 6 /* machine check happening in guest */
#define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */
#define _CIF_ASCE_PRIMARY BIT(CIF_ASCE_PRIMARY)
#define _CIF_ASCE_SECONDARY BIT(CIF_ASCE_SECONDARY)
#define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY)
#define _CIF_FPU BIT(CIF_FPU)
#define _CIF_IGNORE_IRQ BIT(CIF_IGNORE_IRQ)
......@@ -102,8 +98,6 @@ extern void __bpon(void);
#define HAVE_ARCH_PICK_MMAP_LAYOUT
typedef unsigned int mm_segment_t;
/*
* Thread structure
*/
......@@ -116,7 +110,6 @@ struct thread_struct {
unsigned long hardirq_timer; /* task cputime in hardirq context */
unsigned long softirq_timer; /* task cputime in softirq context */
unsigned long sys_call_table; /* system call table address */
mm_segment_t mm_segment;
unsigned long gmap_addr; /* address of last gmap fault. */
unsigned int gmap_write_flag; /* gmap fault write indication */
unsigned int gmap_int_code; /* int code of last gmap fault */
......
......@@ -87,6 +87,7 @@ struct pt_regs
unsigned int int_parm;
unsigned long int_parm_long;
unsigned long flags;
unsigned long cr1;
};
/*
......
......@@ -18,24 +18,6 @@
#include <asm/extable.h>
#include <asm/facility.h>
/*
* The fs value determines whether argument validity checking should be
* performed or not. If get_fs() == USER_DS, checking is performed, with
* get_fs() == KERNEL_DS, checking is bypassed.
*
* For historical reasons, these macros are grossly misnamed.
*/
#define KERNEL_DS (0)
#define KERNEL_DS_SACF (1)
#define USER_DS (2)
#define USER_DS_SACF (3)
#define get_fs() (current->thread.mm_segment)
#define uaccess_kernel() ((get_fs() & 2) == KERNEL_DS)
void set_fs(mm_segment_t fs);
static inline int __range_ok(unsigned long addr, unsigned long size)
{
return 1;
......@@ -88,7 +70,7 @@ int __get_user_bad(void) __attribute__((noreturn));
static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
{
unsigned long spec = 0x010000UL;
unsigned long spec = 0x810000UL;
int rc;
switch (size) {
......@@ -121,7 +103,7 @@ static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon
static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
{
unsigned long spec = 0x01UL;
unsigned long spec = 0x81UL;
int rc;
switch (size) {
......
......@@ -12,32 +12,7 @@
#ifndef __ASSEMBLY__
/*
* Note about the vdso_data and vdso_per_cpu_data structures:
*
* NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the
* structure is supposed to be known only to the function in the vdso
* itself and may change without notice.
*/
struct vdso_per_cpu_data {
/*
* Note: node_id and cpu_nr must be at adjacent memory locations.
* VDSO userspace must read both values with a single instruction.
*/
union {
__u64 getcpu_val;
struct {
__u32 node_id;
__u32 cpu_nr;
};
};
};
extern struct vdso_data *vdso_data;
int vdso_alloc_per_cpu(struct lowcore *lowcore);
void vdso_free_per_cpu(struct lowcore *lowcore);
#endif /* __ASSEMBLY__ */
#endif /* __S390_VDSO_H__ */
......@@ -13,7 +13,6 @@
#include <linux/purgatory.h>
#include <linux/pgtable.h>
#include <asm/idle.h>
#include <asm/vdso.h>
#include <asm/gmap.h>
#include <asm/nmi.h>
#include <asm/stacktrace.h>
......@@ -48,6 +47,7 @@ int main(void)
OFFSET(__PT_INT_PARM, pt_regs, int_parm);
OFFSET(__PT_INT_PARM_LONG, pt_regs, int_parm_long);
OFFSET(__PT_FLAGS, pt_regs, flags);
OFFSET(__PT_CR1, pt_regs, cr1);
DEFINE(__PT_SIZE, sizeof(struct pt_regs));
BLANK();
/* stack_frame offsets */
......@@ -59,8 +59,6 @@ int main(void)
OFFSET(__SF_SIE_REASON, stack_frame, empty1[3]);
OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[4]);
BLANK();
OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val);
BLANK();
/* idle data offsets */
OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter);
OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit);
......@@ -138,12 +136,11 @@ int main(void)
OFFSET(__LC_RESTART_FN, lowcore, restart_fn);
OFFSET(__LC_RESTART_DATA, lowcore, restart_data);
OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source);
OFFSET(__LC_KERNEL_ASCE, lowcore, kernel_asce);
OFFSET(__LC_USER_ASCE, lowcore, user_asce);
OFFSET(__LC_VDSO_ASCE, lowcore, vdso_asce);
OFFSET(__LC_LPP, lowcore, lpp);
OFFSET(__LC_CURRENT_PID, lowcore, current_pid);
OFFSET(__LC_PERCPU_OFFSET, lowcore, percpu_offset);
OFFSET(__LC_VDSO_PER_CPU, lowcore, vdso_per_cpu_data);
OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags);
OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count);
OFFSET(__LC_GMAP, lowcore, gmap);
......
......@@ -55,7 +55,7 @@ _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
_TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING)
_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
_TIF_SYSCALL_TRACEPOINT)
_CIF_WORK = (_CIF_ASCE_PRIMARY | _CIF_ASCE_SECONDARY | _CIF_FPU)
_CIF_WORK = (_CIF_FPU)
_PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
_LPP_OFFSET = __LC_LPP
......@@ -327,7 +327,7 @@ ENTRY(sie64a)
BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
.Lsie_skip:
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce
.Lsie_done:
# some program checks are suppressing. C code (e.g. do_protection_exception)
# will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There
......@@ -380,6 +380,7 @@ ENTRY(system_call)
lg %r12,__LC_CURRENT
lghi %r14,_PIF_SYSCALL
.Lsysc_per:
lctlg %c1,%c1,__LC_KERNEL_ASCE
lghi %r13,__TASK_thread
lg %r15,__LC_KERNEL_STACK
la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
......@@ -427,8 +428,7 @@ ENTRY(system_call)
jnz .Lsysc_work
TSTMSK __TI_flags(%r12),_TIF_WORK
jnz .Lsysc_work # check for work
TSTMSK __LC_CPU_FLAGS,(_CIF_WORK-_CIF_FPU)
jnz .Lsysc_work
lctlg %c1,%c1,__LC_USER_ASCE
BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
TSTMSK __LC_CPU_FLAGS, _CIF_FPU
jz .Lsysc_skip_fpu
......@@ -467,8 +467,6 @@ ENTRY(system_call)
jo .Lsysc_sigpending
TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME
jo .Lsysc_notify_resume
TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
jnz .Lsysc_asce
j .Lsysc_return
#
......@@ -478,26 +476,6 @@ ENTRY(system_call)
larl %r14,.Lsysc_return
jg schedule
#
# _CIF_ASCE_PRIMARY and/or _CIF_ASCE_SECONDARY set, load user space asce
#
.Lsysc_asce:
ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_SECONDARY
lctlg %c7,%c7,__LC_VDSO_ASCE # load secondary asce
TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY
jz .Lsysc_return
#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES
tm __LC_STFLE_FAC_LIST+3,0x10 # has MVCOS ?
jnz .Lsysc_set_fs_fixup
ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
j .Lsysc_return
.Lsysc_set_fs_fixup:
#endif
larl %r14,.Lsysc_return
jg set_fs_fixup
#
# _TIF_SIGPENDING is set, call do_signal
#
......@@ -634,8 +612,11 @@ ENTRY(pgm_check_handler)
0: lg %r12,__LC_CURRENT
lghi %r11,0
lmg %r8,%r9,__LC_PGM_OLD_PSW
tmhh %r8,0x0001 # test problem state bit
jnz 3f # -> fault in user space
tmhh %r8,0x0001 # coming from user space?
jno .Lpgm_skip_asce
lctlg %c1,%c1,__LC_KERNEL_ASCE
j 3f
.Lpgm_skip_asce:
#if IS_ENABLED(CONFIG_KVM)
# cleanup critical section for program checks in sie64a
lgr %r14,%r9
......@@ -646,7 +627,7 @@ ENTRY(pgm_check_handler)
jhe 1f
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce
larl %r9,sie_exit # skip forward to sie_exit
lghi %r11,_PIF_GUEST_FAULT
#endif
......@@ -767,6 +748,10 @@ ENTRY(io_int_handler)
xgr %r10,%r10
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
stmg %r8,%r9,__PT_PSW(%r11)
tm __PT_PSW+1(%r11),0x01 # coming from user space?
jno .Lio_skip_asce
lctlg %c1,%c1,__LC_KERNEL_ASCE
.Lio_skip_asce:
mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ
......@@ -808,6 +793,7 @@ ENTRY(io_int_handler)
mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
tm __PT_PSW+1(%r11),0x01 # returning to user ?
jno .Lio_exit_kernel
lctlg %c1,%c1,__LC_USER_ASCE
BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
stpt __LC_EXIT_TIMER
.Lio_exit_kernel:
......@@ -873,29 +859,8 @@ ENTRY(io_int_handler)
jo .Lio_guarded_storage
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
jo .Lio_vxrs
TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
jnz .Lio_asce
j .Lio_return
#
# _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce
#
.Lio_asce:
ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_SECONDARY
lctlg %c7,%c7,__LC_VDSO_ASCE # load secondary asce
TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY
jz .Lio_return
#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES
tm __LC_STFLE_FAC_LIST+3,0x10 # has MVCOS ?
jnz .Lio_set_fs_fixup
ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
j .Lio_return
.Lio_set_fs_fixup:
#endif
larl %r14,.Lio_return
jg set_fs_fixup
#
# CIF_FPU is set, restore floating-point controls and floating-point registers.
#
......@@ -977,6 +942,10 @@ ENTRY(ext_int_handler)
xgr %r10,%r10
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
stmg %r8,%r9,__PT_PSW(%r11)
tm __PT_PSW+1(%r11),0x01 # coming from user space?
jno .Lext_skip_asce
lctlg %c1,%c1,__LC_KERNEL_ASCE
.Lext_skip_asce:
lghi %r1,__LC_EXT_PARAMS2
mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
......@@ -1206,6 +1175,9 @@ ENTRY(mcck_int_handler)
xgr %r10,%r10
mvc __PT_R8(64,%r11),0(%r14)
stmg %r8,%r9,__PT_PSW(%r11)
la %r14,4095
mvc __PT_CR1(8,%r11),__LC_CREGS_SAVE_AREA-4095+8(%r14)
lctlg %c1,%c1,__LC_KERNEL_ASCE
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
......@@ -1221,6 +1193,7 @@ ENTRY(mcck_int_handler)
brasl %r14,s390_handle_mcck
TRACE_IRQS_ON
.Lmcck_return:
lctlg %c1,%c1,__PT_CR1(%r11)
lmg %r0,%r10,__PT_R0(%r11)
mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
......@@ -1297,7 +1270,7 @@ ENDPROC(stack_overflow)
1: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
lctlg %c1,%c1,__LC_KERNEL_ASCE
larl %r9,sie_exit # skip forward to sie_exit
BR_EX %r14,%r11
......
......@@ -83,7 +83,6 @@ long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user
DECLARE_PER_CPU(u64, mt_cycles[8]);
void gs_load_bc_cb(struct pt_regs *regs);
void set_fs_fixup(void);
unsigned long stack_alloc(void);
void stack_free(unsigned long stack);
......
......@@ -94,7 +94,6 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
/* Save access registers to new thread structure. */
save_access_regs(&p->thread.acrs[0]);
/* start new process with ar4 pointing to the correct address space */
p->thread.mm_segment = get_fs();
/* Don't copy debug registers */
memset(&p->thread.per_user, 0, sizeof(p->thread.per_user));
memset(&p->thread.per_event, 0, sizeof(p->thread.per_event));
......@@ -208,16 +207,3 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
ret = PAGE_ALIGN(mm->brk + brk_rnd());
return (ret > mm->brk) ? ret : mm->brk;
}
void set_fs_fixup(void)
{
struct pt_regs *regs = current_pt_regs();
static bool warned;
set_fs(USER_DS);
if (warned)
return;
WARN(1, "Unbalanced set_fs - int code: 0x%x\n", regs->int_code);
show_registers(regs);
warned = true;
}
......@@ -47,7 +47,6 @@
#include <asm/vtimer.h>
#include <asm/lowcore.h>
#include <asm/sclp.h>
#include <asm/vdso.h>
#include <asm/debug.h>
#include <asm/os_info.h>
#include <asm/sigp.h>
......@@ -217,14 +216,10 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
if (nmi_alloc_per_cpu(lc))
goto out_async;
if (vdso_alloc_per_cpu(lc))
goto out_mcesa;
lowcore_ptr[cpu] = lc;
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc);
return 0;
out_mcesa:
nmi_free_per_cpu(lc);
out_async:
stack_free(async_stack);
out:
......@@ -245,7 +240,6 @@ static void pcpu_free_lowcore(struct pcpu *pcpu)
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
lowcore_ptr[pcpu - pcpu_devices] = NULL;
vdso_free_per_cpu(pcpu->lowcore);
nmi_free_per_cpu(pcpu->lowcore);
stack_free(async_stack);
if (pcpu == &pcpu_devices[0])
......@@ -271,7 +265,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
lc->steal_timer = lc->avg_steal_timer = 0;
__ctl_store(lc->cregs_save_area, 0, 15);
lc->cregs_save_area[1] = lc->kernel_asce;
lc->cregs_save_area[7] = lc->vdso_asce;
lc->cregs_save_area[7] = lc->user_asce;
save_access_regs((unsigned int *) lc->access_regs_save_area);
memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
sizeof(lc->stfle_fac_list));
......@@ -859,8 +853,6 @@ static void smp_init_secondary(void)
S390_lowcore.last_update_clock = get_tod_clock();
restore_access_regs(S390_lowcore.access_regs_save_area);
set_cpu_flag(CIF_ASCE_PRIMARY);
set_cpu_flag(CIF_ASCE_SECONDARY);
cpu_init();
rcu_cpu_starting(cpu);
preempt_disable();
......
......@@ -99,61 +99,6 @@ static union {
u8 page[PAGE_SIZE];
} vdso_data_store __page_aligned_data;
struct vdso_data *vdso_data = (struct vdso_data *)&vdso_data_store.data;
/*
* Allocate/free per cpu vdso data.
*/
#define SEGMENT_ORDER 2
int vdso_alloc_per_cpu(struct lowcore *lowcore)
{
unsigned long segment_table, page_table, page_frame;
struct vdso_per_cpu_data *vd;
segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER);
page_table = get_zeroed_page(GFP_KERNEL);
page_frame = get_zeroed_page(GFP_KERNEL);
if (!segment_table || !page_table || !page_frame)
goto out;
arch_set_page_dat(virt_to_page(segment_table), SEGMENT_ORDER);
arch_set_page_dat(virt_to_page(page_table), 0);
/* Initialize per-cpu vdso data page */
vd = (struct vdso_per_cpu_data *) page_frame;
vd->cpu_nr = lowcore->cpu_nr;
vd->node_id = cpu_to_node(vd->cpu_nr);
/* Set up page table for the vdso address space */
memset64((u64 *)segment_table, _SEGMENT_ENTRY_EMPTY, _CRST_ENTRIES);
memset64((u64 *)page_table, _PAGE_INVALID, PTRS_PER_PTE);
*(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
*(unsigned long *) page_table = _PAGE_PROTECT + page_frame;
lowcore->vdso_asce = segment_table +
_ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT;
lowcore->vdso_per_cpu_data = page_frame;
return 0;
out:
free_page(page_frame);
free_page(page_table);
free_pages(segment_table, SEGMENT_ORDER);
return -ENOMEM;
}
void vdso_free_per_cpu(struct lowcore *lowcore)
{
unsigned long segment_table, page_table, page_frame;
segment_table = lowcore->vdso_asce & PAGE_MASK;
page_table = *(unsigned long *) segment_table;
page_frame = *(unsigned long *) page_table;
free_page(page_frame);
free_page(page_table);
free_pages(segment_table, SEGMENT_ORDER);
}
/*
* This is called from binfmt_elf, we create the special vma for the
......@@ -240,8 +185,6 @@ static int __init vdso_init(void)
}
vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data);
vdso64_pagelist[vdso64_pages] = NULL;
if (vdso_alloc_per_cpu(&S390_lowcore))
BUG();
get_page(virt_to_page(vdso_data));
......
......@@ -6,7 +6,7 @@ ARCH_REL_TYPE_ABS := R_390_COPY|R_390_GLOB_DAT|R_390_JMP_SLOT|R_390_RELATIVE
ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT
include $(srctree)/lib/vdso/Makefile
obj-vdso64 = vdso_user_wrapper.o note.o getcpu.o
obj-vdso64 = vdso_user_wrapper.o note.o
obj-cvdso64 = vdso64_generic.o
CFLAGS_REMOVE_vdso64_generic.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Userland implementation of getcpu() for 64 bits processes in a
* s390 kernel for use in the vDSO
*
* Copyright IBM Corp. 2016
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
#include <asm/vdso.h>
#include <asm/asm-offsets.h>
#include <asm/dwarf.h>
.text
.align 4
.globl __kernel_getcpu
.type __kernel_getcpu,@function
__kernel_getcpu:
CFI_STARTPROC
sacf 256
lm %r4,%r5,__VDSO_GETCPU_VAL(%r0)
sacf 0
ltgr %r2,%r2
jz 2f
st %r5,0(%r2)
2: ltgr %r3,%r3
jz 3f
st %r4,0(%r3)
3: lghi %r2,0
br %r14
CFI_ENDPROC
.size __kernel_getcpu,.-__kernel_getcpu
......@@ -135,7 +135,6 @@ VERSION
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
__kernel_getcpu;
local: *;
};
......
......@@ -40,71 +40,10 @@ static inline int copy_with_mvcos(void)
}
#endif
void set_fs(mm_segment_t fs)
{
current->thread.mm_segment = fs;
if (fs == USER_DS) {
__ctl_load(S390_lowcore.user_asce, 1, 1);
clear_cpu_flag(CIF_ASCE_PRIMARY);
} else {
__ctl_load(S390_lowcore.kernel_asce, 1, 1);
set_cpu_flag(CIF_ASCE_PRIMARY);
}
if (fs & 1) {
if (fs == USER_DS_SACF)
__ctl_load(S390_lowcore.user_asce, 7, 7);
else
__ctl_load(S390_lowcore.kernel_asce, 7, 7);
set_cpu_flag(CIF_ASCE_SECONDARY);
}
}
EXPORT_SYMBOL(set_fs);
mm_segment_t enable_sacf_uaccess(void)
{
mm_segment_t old_fs;
unsigned long asce, cr;
unsigned long flags;
old_fs = current->thread.mm_segment;
if (old_fs & 1)
return old_fs;
/* protect against a concurrent page table upgrade */
local_irq_save(flags);
current->thread.mm_segment |= 1;
asce = S390_lowcore.kernel_asce;
if (likely(old_fs == USER_DS)) {
__ctl_store(cr, 1, 1);
if (cr != S390_lowcore.kernel_asce) {
__ctl_load(S390_lowcore.kernel_asce, 1, 1);
set_cpu_flag(CIF_ASCE_PRIMARY);
}
asce = S390_lowcore.user_asce;
}
__ctl_store(cr, 7, 7);
if (cr != asce) {
__ctl_load(asce, 7, 7);
set_cpu_flag(CIF_ASCE_SECONDARY);
}
local_irq_restore(flags);
return old_fs;
}
EXPORT_SYMBOL(enable_sacf_uaccess);
void disable_sacf_uaccess(mm_segment_t old_fs)
{
current->thread.mm_segment = old_fs;
if (old_fs == USER_DS && test_facility(27)) {
__ctl_load(S390_lowcore.user_asce, 1, 1);
clear_cpu_flag(CIF_ASCE_PRIMARY);
}
}
EXPORT_SYMBOL(disable_sacf_uaccess);
static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
unsigned long size)
{
register unsigned long reg0 asm("0") = 0x01UL;
register unsigned long reg0 asm("0") = 0x81UL;
unsigned long tmp1, tmp2;
tmp1 = -4096UL;
......@@ -135,9 +74,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
unsigned long size)
{
unsigned long tmp1, tmp2;
mm_segment_t old_fs;
old_fs = enable_sacf_uaccess();
tmp1 = -256UL;
asm volatile(
" sacf 0\n"
......@@ -164,7 +101,6 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
: : "cc", "memory");
disable_sacf_uaccess(old_fs);
return size;
}
......@@ -179,7 +115,7 @@ EXPORT_SYMBOL(raw_copy_from_user);
static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
unsigned long size)
{
register unsigned long reg0 asm("0") = 0x010000UL;
register unsigned long reg0 asm("0") = 0x810000UL;
unsigned long tmp1, tmp2;
tmp1 = -4096UL;
......@@ -210,9 +146,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
unsigned long size)
{
unsigned long tmp1, tmp2;
mm_segment_t old_fs;
old_fs = enable_sacf_uaccess();
tmp1 = -256UL;
asm volatile(
" sacf 0\n"
......@@ -239,7 +173,6 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
: : "cc", "memory");
disable_sacf_uaccess(old_fs);
return size;
}
......@@ -254,7 +187,7 @@ EXPORT_SYMBOL(raw_copy_to_user);
static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from,
unsigned long size)
{
register unsigned long reg0 asm("0") = 0x010001UL;
register unsigned long reg0 asm("0") = 0x810081UL;
unsigned long tmp1, tmp2;
tmp1 = -4096UL;
......@@ -277,10 +210,8 @@ static inline unsigned long copy_in_user_mvcos(void __user *to, const void __use
static inline unsigned long copy_in_user_mvc(void __user *to, const void __user *from,
unsigned long size)
{
mm_segment_t old_fs;
unsigned long tmp1;
old_fs = enable_sacf_uaccess();
asm volatile(
" sacf 256\n"
" aghi %0,-1\n"
......@@ -304,7 +235,6 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user
EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
: "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1)
: : "cc", "memory");
disable_sacf_uaccess(old_fs);
return size;
}
......@@ -318,7 +248,7 @@ EXPORT_SYMBOL(raw_copy_in_user);
static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size)
{
register unsigned long reg0 asm("0") = 0x010000UL;
register unsigned long reg0 asm("0") = 0x810000UL;
unsigned long tmp1, tmp2;
tmp1 = -4096UL;
......@@ -346,10 +276,8 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size
static inline unsigned long clear_user_xc(void __user *to, unsigned long size)
{
mm_segment_t old_fs;
unsigned long tmp1, tmp2;
old_fs = enable_sacf_uaccess();
asm volatile(
" sacf 256\n"
" aghi %0,-1\n"
......@@ -378,7 +306,6 @@ static inline unsigned long clear_user_xc(void __user *to, unsigned long size)
EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
: "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2)
: : "cc", "memory");
disable_sacf_uaccess(old_fs);
return size;
}
......@@ -414,15 +341,9 @@ static inline unsigned long strnlen_user_srst(const char __user *src,
unsigned long __strnlen_user(const char __user *src, unsigned long size)
{
mm_segment_t old_fs;
unsigned long len;
if (unlikely(!size))
return 0;
old_fs = enable_sacf_uaccess();
len = strnlen_user_srst(src, size);
disable_sacf_uaccess(old_fs);
return len;
return strnlen_user_srst(src, size);
}
EXPORT_SYMBOL(__strnlen_user);
......
......@@ -53,7 +53,6 @@
enum fault_type {
KERNEL_FAULT,
USER_FAULT,
VDSO_FAULT,
GMAP_FAULT,
};
......@@ -77,22 +76,16 @@ static enum fault_type get_fault_type(struct pt_regs *regs)
trans_exc_code = regs->int_parm_long & 3;
if (likely(trans_exc_code == 0)) {
/* primary space exception */
if (IS_ENABLED(CONFIG_PGSTE) &&
test_pt_regs_flag(regs, PIF_GUEST_FAULT))
return GMAP_FAULT;
if (current->thread.mm_segment == USER_DS)
if (user_mode(regs))
return USER_FAULT;
if (!IS_ENABLED(CONFIG_PGSTE))
return KERNEL_FAULT;
}
if (trans_exc_code == 2) {
/* secondary space exception */
if (current->thread.mm_segment & 1) {
if (current->thread.mm_segment == USER_DS_SACF)
return USER_FAULT;
if (test_pt_regs_flag(regs, PIF_GUEST_FAULT))
return GMAP_FAULT;
return KERNEL_FAULT;
}
return VDSO_FAULT;
}
if (trans_exc_code == 2)
return USER_FAULT;
if (trans_exc_code == 1) {
/* access register mode, not used in the kernel */
return USER_FAULT;
......@@ -188,10 +181,6 @@ static void dump_fault_info(struct pt_regs *regs)
asce = S390_lowcore.user_asce;
pr_cont("user ");
break;
case VDSO_FAULT:
asce = S390_lowcore.vdso_asce;
pr_cont("vdso ");
break;
case GMAP_FAULT:
asce = ((struct gmap *) S390_lowcore.gmap)->asce;
pr_cont("gmap ");
......@@ -414,9 +403,6 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
switch (type) {
case KERNEL_FAULT:
goto out;
case VDSO_FAULT:
fault = VM_FAULT_BADMAP;
goto out;
case USER_FAULT:
case GMAP_FAULT:
if (faulthandler_disabled() || !mm)
......@@ -834,7 +820,6 @@ void do_secure_storage_access(struct pt_regs *regs)
if (rc)
BUG();
break;
case VDSO_FAULT:
case GMAP_FAULT:
default:
do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
......
......@@ -70,19 +70,10 @@ static void __crst_table_upgrade(void *arg)
{
struct mm_struct *mm = arg;
/* we must change all active ASCEs to avoid the creation of new TLBs */
/* change all active ASCEs to avoid the creation of new TLBs */
if (current->active_mm == mm) {
S390_lowcore.user_asce = mm->context.asce;
if (current->thread.mm_segment == USER_DS) {
__ctl_load(S390_lowcore.user_asce, 1, 1);
/* Mark user-ASCE present in CR1 */
clear_cpu_flag(CIF_ASCE_PRIMARY);
}
if (current->thread.mm_segment == USER_DS_SACF) {
__ctl_load(S390_lowcore.user_asce, 7, 7);
/* enable_sacf_uaccess does all or nothing */
WARN_ON(!test_cpu_flag(CIF_ASCE_SECONDARY));
}
}
__tlb_flush_local();
}
......
......@@ -93,12 +93,10 @@ static inline int __memcpy_toio_inuser(void __iomem *dst,
{
int size, rc = 0;
u8 status = 0;
mm_segment_t old_fs;
if (!src)
return -EINVAL;
old_fs = enable_sacf_uaccess();
while (n > 0) {
size = zpci_get_max_write_size((u64 __force) dst,
(u64 __force) src, n,
......@@ -113,7 +111,6 @@ static inline int __memcpy_toio_inuser(void __iomem *dst,
dst += size;
n -= size;
}
disable_sacf_uaccess(old_fs);
if (rc)
zpci_err_mmio(rc, status, (__force u64) dst);
return rc;
......@@ -246,9 +243,7 @@ static inline int __memcpy_fromio_inuser(void __user *dst,
{
int size, rc = 0;
u8 status;
mm_segment_t old_fs;
old_fs = enable_sacf_uaccess();
while (n > 0) {
size = zpci_get_max_write_size((u64 __force) src,
(u64 __force) dst, n,
......@@ -260,7 +255,6 @@ static inline int __memcpy_fromio_inuser(void __user *dst,
dst += size;
n -= size;
}
disable_sacf_uaccess(old_fs);
if (rc)
zpci_err_mmio(rc, status, (__force u64) dst);
return rc;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册