提交 e4da01d8 编写于 作者: L Linus Torvalds

Merge tag 'powerpc-5.7-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull more powerpc updates from Michael Ellerman:
 "The bulk of this is the series to make CONFIG_COMPAT user-selectable,
  it's been around for a long time but was blocked behind the
  syscall-in-C series.

  Plus there's also a few fixes and other minor things.

  Summary:

   - A fix for a crash in machine check handling on pseries (ie. guests)

   - A small series to make it possible to disable CONFIG_COMPAT, and
     turn it off by default for ppc64le where it's not used.

   - A few other miscellaneous fixes and small improvements.

  Thanks to: Alexey Kardashevskiy, Anju T Sudhakar, Arnd Bergmann,
  Christophe Leroy, Dan Carpenter, Ganesh Goudar, Geert Uytterhoeven,
  Geoff Levand, Mahesh Salgaonkar, Markus Elfring, Michal Suchanek,
  Nicholas Piggin, Stephen Boyd, Wen Xiong"

* tag 'powerpc-5.7-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  selftests/powerpc: Always build the tm-poison test 64-bit
  powerpc: Improve ppc_save_regs()
  Revert "powerpc/64: irq_work avoid interrupt when called with hardware irqs enabled"
  powerpc/time: Replace <linux/clk-provider.h> by <linux/of_clk.h>
  powerpc/pseries/ddw: Extend upper limit for huge DMA window for persistent memory
  powerpc/perf: split callchain.c by bitness
  powerpc/64: Make COMPAT user-selectable disabled on littleendian by default.
  powerpc/64: make buildable without CONFIG_COMPAT
  powerpc/perf: consolidate valid_user_sp -> invalid_user_sp
  powerpc/perf: consolidate read_user_stack_32
  powerpc: move common register copy functions from signal_32.c to signal.c
  powerpc: Add back __ARCH_WANT_SYS_LLSEEK macro
  powerpc/ps3: Set CONFIG_UEVENT_HELPER=y in ps3_defconfig
  powerpc/ps3: Remove an unneeded NULL check
  powerpc/ps3: Remove duplicate error message
  powerpc/powernv: Re-enable imc trace-mode in kernel
  powerpc/perf: Implement a global lock to avoid races between trace, core and thread imc events.
  powerpc/pseries: Fix MCE handling on pseries
  selftests/eeh: Skip ahci adapters
  powerpc/64s: Fix doorbell wakeup msgclr optimisation
......@@ -266,8 +266,9 @@ config PANIC_TIMEOUT
default 180
config COMPAT
bool
default y if PPC64
bool "Enable support for 32bit binaries"
depends on PPC64
default y if !CPU_LITTLE_ENDIAN
select COMPAT_BINFMT_ELF
select ARCH_WANT_OLD_COMPAT_IPC
select COMPAT_OLD_SIGACTION
......
......@@ -60,6 +60,8 @@ CONFIG_CFG80211=m
CONFIG_CFG80211_WEXT=y
CONFIG_MAC80211=m
# CONFIG_MAC80211_RC_MINSTREL is not set
CONFIG_UEVENT_HELPER=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=65535
......
......@@ -162,10 +162,10 @@ static inline bool test_thread_local_flags(unsigned int flags)
return (ti->local_flags & flags) != 0;
}
#ifdef CONFIG_PPC64
#ifdef CONFIG_COMPAT
#define is_32bit_task() (test_thread_flag(TIF_32BIT))
#else
#define is_32bit_task() (1)
#define is_32bit_task() (IS_ENABLED(CONFIG_PPC32))
#endif
#if defined(CONFIG_PPC64)
......
......@@ -31,6 +31,7 @@
#define __ARCH_WANT_SYS_SOCKETCALL
#define __ARCH_WANT_SYS_FADVISE64
#define __ARCH_WANT_SYS_GETPGRP
#define __ARCH_WANT_SYS_LLSEEK
#define __ARCH_WANT_SYS_NICE
#define __ARCH_WANT_SYS_OLD_GETRLIMIT
#define __ARCH_WANT_SYS_OLD_UNAME
......
......@@ -40,16 +40,17 @@ CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING
endif
obj-y := cputable.o syscalls.o \
irq.o align.o signal_32.o pmc.o vdso.o \
irq.o align.o signal_$(BITS).o pmc.o vdso.o \
process.o systbl.o idle.o \
signal.o sysfs.o cacheinfo.o time.o \
prom.o traps.o setup-common.o \
udbg.o misc.o io.o misc_$(BITS).o \
of_platform.o prom_parse.o
obj-y += ptrace/
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o signal_64.o \
obj-$(CONFIG_PPC64) += setup_64.o \
paca.o nvram_64.o firmware.o note.o \
syscall_64.o
obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o
obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
......
......@@ -52,8 +52,10 @@
SYS_CALL_TABLE:
.tc sys_call_table[TC],sys_call_table
#ifdef CONFIG_COMPAT
COMPAT_SYS_CALL_TABLE:
.tc compat_sys_call_table[TC],compat_sys_call_table
#endif
/* This value is used to mark exception frames on the stack. */
exception_marker:
......
......@@ -3121,22 +3121,3 @@ handle_dabr_fault:
li r5,SIGSEGV
bl bad_page_fault
b interrupt_return
/*
* When doorbell is triggered from system reset wakeup, the message is
* not cleared, so it would fire again when EE is enabled.
*
* When coming from local_irq_enable, there may be the same problem if
* we were hard disabled.
*
* Execute msgclr to clear pending exceptions before handling it.
*/
h_doorbell_common_msgclr:
LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36))
PPC_MSGCLR(3)
b h_doorbell_common_virt
doorbell_super_common_msgclr:
LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36))
PPC_MSGCLRP(3)
b doorbell_super_common_virt
......@@ -527,6 +527,19 @@ void irq_set_pending_from_srr1(unsigned long srr1)
return;
}
if (reason == PACA_IRQ_DBELL) {
/*
* When doorbell triggers a system reset wakeup, the message
* is not cleared, so if the doorbell interrupt is replayed
* and the IPI handled, the doorbell interrupt would still
* fire when EE is enabled.
*
* To avoid taking the superfluous doorbell interrupt,
* execute a msgclr here before the interrupt is replayed.
*/
ppc_msgclr(PPC_DBELL_MSGTYPE);
}
/*
* The 0 index (SRR1[42:45]=b0000) must always evaluate to 0,
* so this can be called unconditionally with the SRR1 wake
......
......@@ -55,14 +55,17 @@ _GLOBAL(ppc_save_regs)
PPC_STL r29,29*SZL(r3)
PPC_STL r30,30*SZL(r3)
PPC_STL r31,31*SZL(r3)
lbz r0,PACAIRQSOFTMASK(r13)
PPC_STL r0,SOFTE-STACK_FRAME_OVERHEAD(r3)
#endif
/* go up one stack frame for SP */
PPC_LL r4,0(r1)
PPC_STL r4,1*SZL(r3)
/* get caller's LR */
PPC_LL r0,LRSAVE(r4)
PPC_STL r0,_NIP-STACK_FRAME_OVERHEAD(r3)
PPC_STL r0,_LINK-STACK_FRAME_OVERHEAD(r3)
mflr r0
PPC_STL r0,_NIP-STACK_FRAME_OVERHEAD(r3)
mfmsr r0
PPC_STL r0,_MSR-STACK_FRAME_OVERHEAD(r3)
mfctr r0
......@@ -73,4 +76,5 @@ _GLOBAL(ppc_save_regs)
PPC_STL r0,_CCR-STACK_FRAME_OVERHEAD(r3)
li r0,0
PPC_STL r0,_TRAP-STACK_FRAME_OVERHEAD(r3)
PPC_STL r0,ORIG_GPR3-STACK_FRAME_OVERHEAD(r3)
blr
......@@ -6,7 +6,7 @@
CFLAGS_ptrace-view.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
obj-y += ptrace.o ptrace-view.o
obj-$(CONFIG_PPC64) += ptrace32.o
obj-$(CONFIG_COMPAT) += ptrace32.o
obj-$(CONFIG_VSX) += ptrace-vsx.o
ifneq ($(CONFIG_VSX),y)
obj-y += ptrace-novsx.o
......
......@@ -18,12 +18,153 @@
#include <linux/syscalls.h>
#include <asm/hw_breakpoint.h>
#include <linux/uaccess.h>
#include <asm/switch_to.h>
#include <asm/unistd.h>
#include <asm/debug.h>
#include <asm/tm.h>
#include "signal.h"
#ifdef CONFIG_VSX
unsigned long copy_fpr_to_user(void __user *to,
struct task_struct *task)
{
u64 buf[ELF_NFPREG];
int i;
/* save FPR copy to local buffer then write to the thread_struct */
for (i = 0; i < (ELF_NFPREG - 1) ; i++)
buf[i] = task->thread.TS_FPR(i);
buf[i] = task->thread.fp_state.fpscr;
return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
}
unsigned long copy_fpr_from_user(struct task_struct *task,
void __user *from)
{
u64 buf[ELF_NFPREG];
int i;
if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
return 1;
for (i = 0; i < (ELF_NFPREG - 1) ; i++)
task->thread.TS_FPR(i) = buf[i];
task->thread.fp_state.fpscr = buf[i];
return 0;
}
unsigned long copy_vsx_to_user(void __user *to,
struct task_struct *task)
{
u64 buf[ELF_NVSRHALFREG];
int i;
/* save FPR copy to local buffer then write to the thread_struct */
for (i = 0; i < ELF_NVSRHALFREG; i++)
buf[i] = task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
}
unsigned long copy_vsx_from_user(struct task_struct *task,
void __user *from)
{
u64 buf[ELF_NVSRHALFREG];
int i;
if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
return 1;
for (i = 0; i < ELF_NVSRHALFREG ; i++)
task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
return 0;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
unsigned long copy_ckfpr_to_user(void __user *to,
struct task_struct *task)
{
u64 buf[ELF_NFPREG];
int i;
/* save FPR copy to local buffer then write to the thread_struct */
for (i = 0; i < (ELF_NFPREG - 1) ; i++)
buf[i] = task->thread.TS_CKFPR(i);
buf[i] = task->thread.ckfp_state.fpscr;
return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
}
unsigned long copy_ckfpr_from_user(struct task_struct *task,
void __user *from)
{
u64 buf[ELF_NFPREG];
int i;
if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
return 1;
for (i = 0; i < (ELF_NFPREG - 1) ; i++)
task->thread.TS_CKFPR(i) = buf[i];
task->thread.ckfp_state.fpscr = buf[i];
return 0;
}
unsigned long copy_ckvsx_to_user(void __user *to,
struct task_struct *task)
{
u64 buf[ELF_NVSRHALFREG];
int i;
/* save FPR copy to local buffer then write to the thread_struct */
for (i = 0; i < ELF_NVSRHALFREG; i++)
buf[i] = task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
}
unsigned long copy_ckvsx_from_user(struct task_struct *task,
void __user *from)
{
u64 buf[ELF_NVSRHALFREG];
int i;
if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
return 1;
for (i = 0; i < ELF_NVSRHALFREG ; i++)
task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
return 0;
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
#else
inline unsigned long copy_fpr_to_user(void __user *to,
struct task_struct *task)
{
return __copy_to_user(to, task->thread.fp_state.fpr,
ELF_NFPREG * sizeof(double));
}
inline unsigned long copy_fpr_from_user(struct task_struct *task,
void __user *from)
{
return __copy_from_user(task->thread.fp_state.fpr, from,
ELF_NFPREG * sizeof(double));
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
inline unsigned long copy_ckfpr_to_user(void __user *to,
struct task_struct *task)
{
return __copy_to_user(to, task->thread.ckfp_state.fpr,
ELF_NFPREG * sizeof(double));
}
inline unsigned long copy_ckfpr_from_user(struct task_struct *task,
void __user *from)
{
return __copy_from_user(task->thread.ckfp_state.fpr, from,
ELF_NFPREG * sizeof(double));
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
#endif
/* Log an error when sending an unhandled signal to a process. Controlled
* through debug.exception-trace sysctl.
*/
......@@ -106,7 +247,6 @@ static void do_signal(struct task_struct *tsk)
sigset_t *oldset = sigmask_to_save();
struct ksignal ksig = { .sig = 0 };
int ret;
int is32 = is_32bit_task();
BUG_ON(tsk != current);
......@@ -136,7 +276,7 @@ static void do_signal(struct task_struct *tsk)
rseq_signal_deliver(&ksig, tsk->thread.regs);
if (is32) {
if (is_32bit_task()) {
if (ksig.ka.sa.sa_flags & SA_SIGINFO)
ret = handle_rt_signal32(&ksig, oldset, tsk);
else
......
......@@ -235,146 +235,6 @@ struct rt_sigframe {
int abigap[56];
};
#ifdef CONFIG_VSX
unsigned long copy_fpr_to_user(void __user *to,
struct task_struct *task)
{
u64 buf[ELF_NFPREG];
int i;
/* save FPR copy to local buffer then write to the thread_struct */
for (i = 0; i < (ELF_NFPREG - 1) ; i++)
buf[i] = task->thread.TS_FPR(i);
buf[i] = task->thread.fp_state.fpscr;
return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
}
unsigned long copy_fpr_from_user(struct task_struct *task,
void __user *from)
{
u64 buf[ELF_NFPREG];
int i;
if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
return 1;
for (i = 0; i < (ELF_NFPREG - 1) ; i++)
task->thread.TS_FPR(i) = buf[i];
task->thread.fp_state.fpscr = buf[i];
return 0;
}
unsigned long copy_vsx_to_user(void __user *to,
struct task_struct *task)
{
u64 buf[ELF_NVSRHALFREG];
int i;
/* save FPR copy to local buffer then write to the thread_struct */
for (i = 0; i < ELF_NVSRHALFREG; i++)
buf[i] = task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
}
unsigned long copy_vsx_from_user(struct task_struct *task,
void __user *from)
{
u64 buf[ELF_NVSRHALFREG];
int i;
if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
return 1;
for (i = 0; i < ELF_NVSRHALFREG ; i++)
task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
return 0;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
unsigned long copy_ckfpr_to_user(void __user *to,
struct task_struct *task)
{
u64 buf[ELF_NFPREG];
int i;
/* save FPR copy to local buffer then write to the thread_struct */
for (i = 0; i < (ELF_NFPREG - 1) ; i++)
buf[i] = task->thread.TS_CKFPR(i);
buf[i] = task->thread.ckfp_state.fpscr;
return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
}
unsigned long copy_ckfpr_from_user(struct task_struct *task,
void __user *from)
{
u64 buf[ELF_NFPREG];
int i;
if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
return 1;
for (i = 0; i < (ELF_NFPREG - 1) ; i++)
task->thread.TS_CKFPR(i) = buf[i];
task->thread.ckfp_state.fpscr = buf[i];
return 0;
}
unsigned long copy_ckvsx_to_user(void __user *to,
struct task_struct *task)
{
u64 buf[ELF_NVSRHALFREG];
int i;
/* save FPR copy to local buffer then write to the thread_struct */
for (i = 0; i < ELF_NVSRHALFREG; i++)
buf[i] = task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
}
unsigned long copy_ckvsx_from_user(struct task_struct *task,
void __user *from)
{
u64 buf[ELF_NVSRHALFREG];
int i;
if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
return 1;
for (i = 0; i < ELF_NVSRHALFREG ; i++)
task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
return 0;
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
#else
inline unsigned long copy_fpr_to_user(void __user *to,
struct task_struct *task)
{
return __copy_to_user(to, task->thread.fp_state.fpr,
ELF_NFPREG * sizeof(double));
}
inline unsigned long copy_fpr_from_user(struct task_struct *task,
void __user *from)
{
return __copy_from_user(task->thread.fp_state.fpr, from,
ELF_NFPREG * sizeof(double));
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
inline unsigned long copy_ckfpr_to_user(void __user *to,
struct task_struct *task)
{
return __copy_to_user(to, task->thread.ckfp_state.fpr,
ELF_NFPREG * sizeof(double));
}
inline unsigned long copy_ckfpr_from_user(struct task_struct *task,
void __user *from)
{
return __copy_from_user(task->thread.ckfp_state.fpr, from,
ELF_NFPREG * sizeof(double));
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
#endif
/*
* Save the current user registers on the user stack.
* We only save the altivec/spe registers if the process has used
......
......@@ -22,7 +22,6 @@ notrace long system_call_exception(long r3, long r4, long r5,
long r6, long r7, long r8,
unsigned long r0, struct pt_regs *regs)
{
unsigned long ti_flags;
syscall_fn f;
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
......@@ -60,8 +59,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
local_irq_enable();
ti_flags = current_thread_info()->flags;
if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) {
/*
* We use the return value of do_syscall_trace_enter() as the
* syscall number. If the syscall was rejected for any reason
......@@ -86,7 +84,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
/* May be faster to do array_index_nospec? */
barrier_nospec();
if (unlikely(ti_flags & _TIF_32BIT)) {
if (unlikely(is_32bit_task())) {
f = (void *)compat_sys_call_table[r0];
r3 &= 0x00000000ffffffffULL;
......
......@@ -50,7 +50,7 @@
#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/irq_work.h>
#include <linux/clk-provider.h>
#include <linux/of_clk.h>
#include <linux/suspend.h>
#include <linux/sched/cputime.h>
#include <linux/processor.h>
......@@ -522,35 +522,6 @@ static inline void clear_irq_work_pending(void)
"i" (offsetof(struct paca_struct, irq_work_pending)));
}
void arch_irq_work_raise(void)
{
preempt_disable();
set_irq_work_pending_flag();
/*
* Non-nmi code running with interrupts disabled will replay
* irq_happened before it re-enables interrupts, so setthe
* decrementer there instead of causing a hardware exception
* which would immediately hit the masked interrupt handler
* and have the net effect of setting the decrementer in
* irq_happened.
*
* NMI interrupts can not check this when they return, so the
* decrementer hardware exception is raised, which will fire
* when interrupts are next enabled.
*
* BookE does not support this yet, it must audit all NMI
* interrupt handlers to ensure they call nmi_enter() so this
* check would be correct.
*/
if (IS_ENABLED(CONFIG_BOOKE) || !irqs_disabled() || in_nmi()) {
set_dec(1);
} else {
hard_irq_disable();
local_paca->irq_happened |= PACA_IRQ_DEC;
}
preempt_enable();
}
#else /* 32-bit */
DEFINE_PER_CPU(u8, irq_work_pending);
......@@ -559,16 +530,27 @@ DEFINE_PER_CPU(u8, irq_work_pending);
#define test_irq_work_pending() __this_cpu_read(irq_work_pending)
#define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0)
#endif /* 32 vs 64 bit */
void arch_irq_work_raise(void)
{
/*
* 64-bit code that uses irq soft-mask can just cause an immediate
* interrupt here that gets soft masked, if this is called under
* local_irq_disable(). It might be possible to prevent that happening
* by noticing interrupts are disabled and setting decrementer pending
* to be replayed when irqs are enabled. The problem there is that
* tracing can call irq_work_raise, including in code that does low
* level manipulations of irq soft-mask state (e.g., trace_hardirqs_on)
* which could get tangled up if we're messing with the same state
* here.
*/
preempt_disable();
set_irq_work_pending_flag();
set_dec(1);
preempt_enable();
}
#endif /* 32 vs 64 bit */
#else /* CONFIG_IRQ_WORK */
#define test_irq_work_pending() 0
......@@ -1149,9 +1131,7 @@ void __init time_init(void)
init_decrementer_clockevent();
tick_setup_hrtimer_broadcast();
#ifdef CONFIG_COMMON_CLK
of_clk_init(NULL);
#endif
}
/*
......
......@@ -651,7 +651,8 @@ static void __init vdso_setup_syscall_map(void)
if (sys_call_table[i] != sys_ni_syscall)
vdso_data->syscall_map_64[i >> 5] |=
0x80000000UL >> (i & 0x1f);
if (compat_sys_call_table[i] != sys_ni_syscall)
if (IS_ENABLED(CONFIG_COMPAT) &&
compat_sys_call_table[i] != sys_ni_syscall)
vdso_data->syscall_map_32[i >> 5] |=
0x80000000UL >> (i & 0x1f);
#else /* CONFIG_PPC64 */
......
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_PERF_EVENTS) += callchain.o perf_regs.o
obj-$(CONFIG_PERF_EVENTS) += callchain.o callchain_$(BITS).o perf_regs.o
ifdef CONFIG_COMPAT
obj-$(CONFIG_PERF_EVENTS) += callchain_32.o
endif
obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \
......
......@@ -15,11 +15,9 @@
#include <asm/sigcontext.h>
#include <asm/ucontext.h>
#include <asm/vdso.h>
#ifdef CONFIG_PPC64
#include "../kernel/ppc32.h"
#endif
#include <asm/pte-walk.h>
#include "callchain.h"
/*
* Is sp valid as the address of the next kernel stack frame after prev_sp?
......@@ -102,358 +100,6 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
}
}
#ifdef CONFIG_PPC64
/*
* On 64-bit we don't want to invoke hash_page on user addresses from
* interrupt context, so if the access faults, we read the page tables
* to find which page (if any) is mapped and access it directly.
*/
static int read_user_stack_slow(void __user *ptr, void *buf, int nb)
{
int ret = -EFAULT;
pgd_t *pgdir;
pte_t *ptep, pte;
unsigned shift;
unsigned long addr = (unsigned long) ptr;
unsigned long offset;
unsigned long pfn, flags;
void *kaddr;
pgdir = current->mm->pgd;
if (!pgdir)
return -EFAULT;
local_irq_save(flags);
ptep = find_current_mm_pte(pgdir, addr, NULL, &shift);
if (!ptep)
goto err_out;
if (!shift)
shift = PAGE_SHIFT;
/* align address to page boundary */
offset = addr & ((1UL << shift) - 1);
pte = READ_ONCE(*ptep);
if (!pte_present(pte) || !pte_user(pte))
goto err_out;
pfn = pte_pfn(pte);
if (!page_is_ram(pfn))
goto err_out;
/* no highmem to worry about here */
kaddr = pfn_to_kaddr(pfn);
memcpy(buf, kaddr + offset, nb);
ret = 0;
err_out:
local_irq_restore(flags);
return ret;
}
static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
{
if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) ||
((unsigned long)ptr & 7))
return -EFAULT;
if (!probe_user_read(ret, ptr, sizeof(*ret)))
return 0;
return read_user_stack_slow(ptr, ret, 8);
}
static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
{
if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
((unsigned long)ptr & 3))
return -EFAULT;
if (!probe_user_read(ret, ptr, sizeof(*ret)))
return 0;
return read_user_stack_slow(ptr, ret, 4);
}
static inline int valid_user_sp(unsigned long sp, int is_64)
{
if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x100000000UL) - 32)
return 0;
return 1;
}
/*
* 64-bit user processes use the same stack frame for RT and non-RT signals.
*/
struct signal_frame_64 {
char dummy[__SIGNAL_FRAMESIZE];
struct ucontext uc;
unsigned long unused[2];
unsigned int tramp[6];
struct siginfo *pinfo;
void *puc;
struct siginfo info;
char abigap[288];
};
static int is_sigreturn_64_address(unsigned long nip, unsigned long fp)
{
if (nip == fp + offsetof(struct signal_frame_64, tramp))
return 1;
if (vdso64_rt_sigtramp && current->mm->context.vdso_base &&
nip == current->mm->context.vdso_base + vdso64_rt_sigtramp)
return 1;
return 0;
}
/*
* Do some sanity checking on the signal frame pointed to by sp.
* We check the pinfo and puc pointers in the frame.
*/
static int sane_signal_64_frame(unsigned long sp)
{
struct signal_frame_64 __user *sf;
unsigned long pinfo, puc;
sf = (struct signal_frame_64 __user *) sp;
if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) ||
read_user_stack_64((unsigned long __user *) &sf->puc, &puc))
return 0;
return pinfo == (unsigned long) &sf->info &&
puc == (unsigned long) &sf->uc;
}
static void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
unsigned long sp, next_sp;
unsigned long next_ip;
unsigned long lr;
long level = 0;
struct signal_frame_64 __user *sigframe;
unsigned long __user *fp, *uregs;
next_ip = perf_instruction_pointer(regs);
lr = regs->link;
sp = regs->gpr[1];
perf_callchain_store(entry, next_ip);
while (entry->nr < entry->max_stack) {
fp = (unsigned long __user *) sp;
if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
return;
if (level > 0 && read_user_stack_64(&fp[2], &next_ip))
return;
/*
* Note: the next_sp - sp >= signal frame size check
* is true when next_sp < sp, which can happen when
* transitioning from an alternate signal stack to the
* normal stack.
*/
if (next_sp - sp >= sizeof(struct signal_frame_64) &&
(is_sigreturn_64_address(next_ip, sp) ||
(level <= 1 && is_sigreturn_64_address(lr, sp))) &&
sane_signal_64_frame(sp)) {
/*
* This looks like an signal frame
*/
sigframe = (struct signal_frame_64 __user *) sp;
uregs = sigframe->uc.uc_mcontext.gp_regs;
if (read_user_stack_64(&uregs[PT_NIP], &next_ip) ||
read_user_stack_64(&uregs[PT_LNK], &lr) ||
read_user_stack_64(&uregs[PT_R1], &sp))
return;
level = 0;
perf_callchain_store_context(entry, PERF_CONTEXT_USER);
perf_callchain_store(entry, next_ip);
continue;
}
if (level == 0)
next_ip = lr;
perf_callchain_store(entry, next_ip);
++level;
sp = next_sp;
}
}
#else /* CONFIG_PPC64 */
/*
* On 32-bit we just access the address and let hash_page create a
* HPTE if necessary, so there is no need to fall back to reading
* the page tables. Since this is called at interrupt level,
* do_page_fault() won't treat a DSI as a page fault.
*/
static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
{
if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
((unsigned long)ptr & 3))
return -EFAULT;
return probe_user_read(ret, ptr, sizeof(*ret));
}
static inline void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
}
static inline int valid_user_sp(unsigned long sp, int is_64)
{
if (!sp || (sp & 7) || sp > TASK_SIZE - 32)
return 0;
return 1;
}
#define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE
#define sigcontext32 sigcontext
#define mcontext32 mcontext
#define ucontext32 ucontext
#define compat_siginfo_t struct siginfo
#endif /* CONFIG_PPC64 */
/*
* Layout for non-RT signal frames
*/
struct signal_frame_32 {
char dummy[__SIGNAL_FRAMESIZE32];
struct sigcontext32 sctx;
struct mcontext32 mctx;
int abigap[56];
};
/*
* Layout for RT signal frames
*/
struct rt_signal_frame_32 {
char dummy[__SIGNAL_FRAMESIZE32 + 16];
compat_siginfo_t info;
struct ucontext32 uc;
int abigap[56];
};
static int is_sigreturn_32_address(unsigned int nip, unsigned int fp)
{
if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad))
return 1;
if (vdso32_sigtramp && current->mm->context.vdso_base &&
nip == current->mm->context.vdso_base + vdso32_sigtramp)
return 1;
return 0;
}
static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp)
{
if (nip == fp + offsetof(struct rt_signal_frame_32,
uc.uc_mcontext.mc_pad))
return 1;
if (vdso32_rt_sigtramp && current->mm->context.vdso_base &&
nip == current->mm->context.vdso_base + vdso32_rt_sigtramp)
return 1;
return 0;
}
static int sane_signal_32_frame(unsigned int sp)
{
struct signal_frame_32 __user *sf;
unsigned int regs;
sf = (struct signal_frame_32 __user *) (unsigned long) sp;
if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, &regs))
return 0;
return regs == (unsigned long) &sf->mctx;
}
static int sane_rt_signal_32_frame(unsigned int sp)
{
struct rt_signal_frame_32 __user *sf;
unsigned int regs;
sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, &regs))
return 0;
return regs == (unsigned long) &sf->uc.uc_mcontext;
}
static unsigned int __user *signal_frame_32_regs(unsigned int sp,
unsigned int next_sp, unsigned int next_ip)
{
struct mcontext32 __user *mctx = NULL;
struct signal_frame_32 __user *sf;
struct rt_signal_frame_32 __user *rt_sf;
/*
* Note: the next_sp - sp >= signal frame size check
* is true when next_sp < sp, for example, when
* transitioning from an alternate signal stack to the
* normal stack.
*/
if (next_sp - sp >= sizeof(struct signal_frame_32) &&
is_sigreturn_32_address(next_ip, sp) &&
sane_signal_32_frame(sp)) {
sf = (struct signal_frame_32 __user *) (unsigned long) sp;
mctx = &sf->mctx;
}
if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) &&
is_rt_sigreturn_32_address(next_ip, sp) &&
sane_rt_signal_32_frame(sp)) {
rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
mctx = &rt_sf->uc.uc_mcontext;
}
if (!mctx)
return NULL;
return mctx->mc_gregs;
}
static void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
unsigned int sp, next_sp;
unsigned int next_ip;
unsigned int lr;
long level = 0;
unsigned int __user *fp, *uregs;
next_ip = perf_instruction_pointer(regs);
lr = regs->link;
sp = regs->gpr[1];
perf_callchain_store(entry, next_ip);
while (entry->nr < entry->max_stack) {
fp = (unsigned int __user *) (unsigned long) sp;
if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp))
return;
if (level > 0 && read_user_stack_32(&fp[1], &next_ip))
return;
uregs = signal_frame_32_regs(sp, next_sp, next_ip);
if (!uregs && level <= 1)
uregs = signal_frame_32_regs(sp, next_sp, lr);
if (uregs) {
/*
* This looks like an signal frame, so restart
* the stack trace with the values in it.
*/
if (read_user_stack_32(&uregs[PT_NIP], &next_ip) ||
read_user_stack_32(&uregs[PT_LNK], &lr) ||
read_user_stack_32(&uregs[PT_R1], &sp))
return;
level = 0;
perf_callchain_store_context(entry, PERF_CONTEXT_USER);
perf_callchain_store(entry, next_ip);
continue;
}
if (level == 0)
next_ip = lr;
perf_callchain_store(entry, next_ip);
++level;
sp = next_sp;
}
}
void
perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
{
......
/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _POWERPC_PERF_CALLCHAIN_H
#define _POWERPC_PERF_CALLCHAIN_H
int read_user_stack_slow(void __user *ptr, void *buf, int nb);
void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs);
void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs);
static inline bool invalid_user_sp(unsigned long sp)
{
unsigned long mask = is_32bit_task() ? 3 : 7;
unsigned long top = STACK_TOP - (is_32bit_task() ? 16 : 32);
return (!sp || (sp & mask) || (sp > top));
}
#endif /* _POWERPC_PERF_CALLCHAIN_H */
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Performance counter callchain support - powerpc architecture code
*
* Copyright © 2009 Paul Mackerras, IBM Corporation.
*/
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/perf_event.h>
#include <linux/percpu.h>
#include <linux/uaccess.h>
#include <linux/mm.h>
#include <asm/ptrace.h>
#include <asm/pgtable.h>
#include <asm/sigcontext.h>
#include <asm/ucontext.h>
#include <asm/vdso.h>
#include <asm/pte-walk.h>
#include "callchain.h"
#ifdef CONFIG_PPC64
#include "../kernel/ppc32.h"
#else /* CONFIG_PPC64 */
#define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE
#define sigcontext32 sigcontext
#define mcontext32 mcontext
#define ucontext32 ucontext
#define compat_siginfo_t struct siginfo
#endif /* CONFIG_PPC64 */
/*
* On 32-bit we just access the address and let hash_page create a
* HPTE if necessary, so there is no need to fall back to reading
* the page tables. Since this is called at interrupt level,
* do_page_fault() won't treat a DSI as a page fault.
*/
static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
{
int rc;
if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
((unsigned long)ptr & 3))
return -EFAULT;
rc = probe_user_read(ret, ptr, sizeof(*ret));
if (IS_ENABLED(CONFIG_PPC64) && rc)
return read_user_stack_slow(ptr, ret, 4);
return rc;
}
/*
* Layout for non-RT signal frames
*/
struct signal_frame_32 {
char dummy[__SIGNAL_FRAMESIZE32];
struct sigcontext32 sctx;
struct mcontext32 mctx;
int abigap[56];
};
/*
* Layout for RT signal frames
*/
struct rt_signal_frame_32 {
char dummy[__SIGNAL_FRAMESIZE32 + 16];
compat_siginfo_t info;
struct ucontext32 uc;
int abigap[56];
};
static int is_sigreturn_32_address(unsigned int nip, unsigned int fp)
{
if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad))
return 1;
if (vdso32_sigtramp && current->mm->context.vdso_base &&
nip == current->mm->context.vdso_base + vdso32_sigtramp)
return 1;
return 0;
}
static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp)
{
if (nip == fp + offsetof(struct rt_signal_frame_32,
uc.uc_mcontext.mc_pad))
return 1;
if (vdso32_rt_sigtramp && current->mm->context.vdso_base &&
nip == current->mm->context.vdso_base + vdso32_rt_sigtramp)
return 1;
return 0;
}
static int sane_signal_32_frame(unsigned int sp)
{
struct signal_frame_32 __user *sf;
unsigned int regs;
sf = (struct signal_frame_32 __user *) (unsigned long) sp;
if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, &regs))
return 0;
return regs == (unsigned long) &sf->mctx;
}
static int sane_rt_signal_32_frame(unsigned int sp)
{
struct rt_signal_frame_32 __user *sf;
unsigned int regs;
sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, &regs))
return 0;
return regs == (unsigned long) &sf->uc.uc_mcontext;
}
static unsigned int __user *signal_frame_32_regs(unsigned int sp,
unsigned int next_sp, unsigned int next_ip)
{
struct mcontext32 __user *mctx = NULL;
struct signal_frame_32 __user *sf;
struct rt_signal_frame_32 __user *rt_sf;
/*
* Note: the next_sp - sp >= signal frame size check
* is true when next_sp < sp, for example, when
* transitioning from an alternate signal stack to the
* normal stack.
*/
if (next_sp - sp >= sizeof(struct signal_frame_32) &&
is_sigreturn_32_address(next_ip, sp) &&
sane_signal_32_frame(sp)) {
sf = (struct signal_frame_32 __user *) (unsigned long) sp;
mctx = &sf->mctx;
}
if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) &&
is_rt_sigreturn_32_address(next_ip, sp) &&
sane_rt_signal_32_frame(sp)) {
rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
mctx = &rt_sf->uc.uc_mcontext;
}
if (!mctx)
return NULL;
return mctx->mc_gregs;
}
void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
unsigned int sp, next_sp;
unsigned int next_ip;
unsigned int lr;
long level = 0;
unsigned int __user *fp, *uregs;
next_ip = perf_instruction_pointer(regs);
lr = regs->link;
sp = regs->gpr[1];
perf_callchain_store(entry, next_ip);
while (entry->nr < entry->max_stack) {
fp = (unsigned int __user *) (unsigned long) sp;
if (invalid_user_sp(sp) || read_user_stack_32(fp, &next_sp))
return;
if (level > 0 && read_user_stack_32(&fp[1], &next_ip))
return;
uregs = signal_frame_32_regs(sp, next_sp, next_ip);
if (!uregs && level <= 1)
uregs = signal_frame_32_regs(sp, next_sp, lr);
if (uregs) {
/*
* This looks like an signal frame, so restart
* the stack trace with the values in it.
*/
if (read_user_stack_32(&uregs[PT_NIP], &next_ip) ||
read_user_stack_32(&uregs[PT_LNK], &lr) ||
read_user_stack_32(&uregs[PT_R1], &sp))
return;
level = 0;
perf_callchain_store_context(entry, PERF_CONTEXT_USER);
perf_callchain_store(entry, next_ip);
continue;
}
if (level == 0)
next_ip = lr;
perf_callchain_store(entry, next_ip);
++level;
sp = next_sp;
}
}
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Performance counter callchain support - powerpc architecture code
*
* Copyright © 2009 Paul Mackerras, IBM Corporation.
*/
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/perf_event.h>
#include <linux/percpu.h>
#include <linux/uaccess.h>
#include <linux/mm.h>
#include <asm/ptrace.h>
#include <asm/pgtable.h>
#include <asm/sigcontext.h>
#include <asm/ucontext.h>
#include <asm/vdso.h>
#include <asm/pte-walk.h>
#include "callchain.h"
/*
* On 64-bit we don't want to invoke hash_page on user addresses from
* interrupt context, so if the access faults, we read the page tables
* to find which page (if any) is mapped and access it directly.
*/
int read_user_stack_slow(void __user *ptr, void *buf, int nb)
{
int ret = -EFAULT;
pgd_t *pgdir;
pte_t *ptep, pte;
unsigned int shift;
unsigned long addr = (unsigned long) ptr;
unsigned long offset;
unsigned long pfn, flags;
void *kaddr;
pgdir = current->mm->pgd;
if (!pgdir)
return -EFAULT;
local_irq_save(flags);
ptep = find_current_mm_pte(pgdir, addr, NULL, &shift);
if (!ptep)
goto err_out;
if (!shift)
shift = PAGE_SHIFT;
/* align address to page boundary */
offset = addr & ((1UL << shift) - 1);
pte = READ_ONCE(*ptep);
if (!pte_present(pte) || !pte_user(pte))
goto err_out;
pfn = pte_pfn(pte);
if (!page_is_ram(pfn))
goto err_out;
/* no highmem to worry about here */
kaddr = pfn_to_kaddr(pfn);
memcpy(buf, kaddr + offset, nb);
ret = 0;
err_out:
local_irq_restore(flags);
return ret;
}
static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
{
if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) ||
((unsigned long)ptr & 7))
return -EFAULT;
if (!probe_user_read(ret, ptr, sizeof(*ret)))
return 0;
return read_user_stack_slow(ptr, ret, 8);
}
/*
* 64-bit user processes use the same stack frame for RT and non-RT signals.
*/
struct signal_frame_64 {
char dummy[__SIGNAL_FRAMESIZE];
struct ucontext uc;
unsigned long unused[2];
unsigned int tramp[6];
struct siginfo *pinfo;
void *puc;
struct siginfo info;
char abigap[288];
};
static int is_sigreturn_64_address(unsigned long nip, unsigned long fp)
{
if (nip == fp + offsetof(struct signal_frame_64, tramp))
return 1;
if (vdso64_rt_sigtramp && current->mm->context.vdso_base &&
nip == current->mm->context.vdso_base + vdso64_rt_sigtramp)
return 1;
return 0;
}
/*
* Do some sanity checking on the signal frame pointed to by sp.
* We check the pinfo and puc pointers in the frame.
*/
static int sane_signal_64_frame(unsigned long sp)
{
struct signal_frame_64 __user *sf;
unsigned long pinfo, puc;
sf = (struct signal_frame_64 __user *) sp;
if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) ||
read_user_stack_64((unsigned long __user *) &sf->puc, &puc))
return 0;
return pinfo == (unsigned long) &sf->info &&
puc == (unsigned long) &sf->uc;
}
void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
unsigned long sp, next_sp;
unsigned long next_ip;
unsigned long lr;
long level = 0;
struct signal_frame_64 __user *sigframe;
unsigned long __user *fp, *uregs;
next_ip = perf_instruction_pointer(regs);
lr = regs->link;
sp = regs->gpr[1];
perf_callchain_store(entry, next_ip);
while (entry->nr < entry->max_stack) {
fp = (unsigned long __user *) sp;
if (invalid_user_sp(sp) || read_user_stack_64(fp, &next_sp))
return;
if (level > 0 && read_user_stack_64(&fp[2], &next_ip))
return;
/*
* Note: the next_sp - sp >= signal frame size check
* is true when next_sp < sp, which can happen when
* transitioning from an alternate signal stack to the
* normal stack.
*/
if (next_sp - sp >= sizeof(struct signal_frame_64) &&
(is_sigreturn_64_address(next_ip, sp) ||
(level <= 1 && is_sigreturn_64_address(lr, sp))) &&
sane_signal_64_frame(sp)) {
/*
* This looks like an signal frame
*/
sigframe = (struct signal_frame_64 __user *) sp;
uregs = sigframe->uc.uc_mcontext.gp_regs;
if (read_user_stack_64(&uregs[PT_NIP], &next_ip) ||
read_user_stack_64(&uregs[PT_LNK], &lr) ||
read_user_stack_64(&uregs[PT_R1], &sp))
return;
level = 0;
perf_callchain_store_context(entry, PERF_CONTEXT_USER);
perf_callchain_store(entry, next_ip);
continue;
}
if (level == 0)
next_ip = lr;
perf_callchain_store(entry, next_ip);
++level;
sp = next_sp;
}
}
......@@ -44,6 +44,16 @@ static DEFINE_PER_CPU(u64 *, trace_imc_mem);
static struct imc_pmu_ref *trace_imc_refc;
static int trace_imc_mem_size;
/*
* Global data structure used to avoid races between thread,
* core and trace-imc
*/
static struct imc_pmu_ref imc_global_refc = {
.lock = __MUTEX_INITIALIZER(imc_global_refc.lock),
.id = 0,
.refc = 0,
};
static struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
{
return container_of(event->pmu, struct imc_pmu, pmu);
......@@ -698,6 +708,16 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)
return -EINVAL;
ref->refc = 0;
/*
* Reduce the global reference count, if this is the
* last cpu in this core and core-imc event running
* in this cpu.
*/
mutex_lock(&imc_global_refc.lock);
if (imc_global_refc.id == IMC_DOMAIN_CORE)
imc_global_refc.refc--;
mutex_unlock(&imc_global_refc.lock);
}
return 0;
}
......@@ -710,6 +730,23 @@ static int core_imc_pmu_cpumask_init(void)
ppc_core_imc_cpu_offline);
}
static void reset_global_refc(struct perf_event *event)
{
mutex_lock(&imc_global_refc.lock);
imc_global_refc.refc--;
/*
* If no other thread is running any
* event for this domain(thread/core/trace),
* set the global id to zero.
*/
if (imc_global_refc.refc <= 0) {
imc_global_refc.refc = 0;
imc_global_refc.id = 0;
}
mutex_unlock(&imc_global_refc.lock);
}
static void core_imc_counters_release(struct perf_event *event)
{
int rc, core_id;
......@@ -759,6 +796,8 @@ static void core_imc_counters_release(struct perf_event *event)
ref->refc = 0;
}
mutex_unlock(&ref->lock);
reset_global_refc(event);
}
static int core_imc_event_init(struct perf_event *event)
......@@ -819,6 +858,29 @@ static int core_imc_event_init(struct perf_event *event)
++ref->refc;
mutex_unlock(&ref->lock);
/*
* Since the system can run either in accumulation or trace-mode
* of IMC at a time, core-imc events are allowed only if no other
* trace/thread imc events are enabled/monitored.
*
* Take the global lock, and check the refc.id
* to know whether any other trace/thread imc
* events are running.
*/
mutex_lock(&imc_global_refc.lock);
if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_CORE) {
/*
* No other trace/thread imc events are running in
* the system, so set the refc.id to core-imc.
*/
imc_global_refc.id = IMC_DOMAIN_CORE;
imc_global_refc.refc++;
} else {
mutex_unlock(&imc_global_refc.lock);
return -EBUSY;
}
mutex_unlock(&imc_global_refc.lock);
event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK);
event->destroy = core_imc_counters_release;
return 0;
......@@ -877,7 +939,23 @@ static int ppc_thread_imc_cpu_online(unsigned int cpu)
static int ppc_thread_imc_cpu_offline(unsigned int cpu)
{
mtspr(SPRN_LDBAR, 0);
/*
* Set the bit 0 of LDBAR to zero.
*
* If bit 0 of LDBAR is unset, it will stop posting
* the counter data to memory.
* For thread-imc, bit 0 of LDBAR will be set to 1 in the
* event_add function. So reset this bit here, to stop the updates
* to memory in the cpu_offline path.
*/
mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
/* Reduce the refc if thread-imc event running on this cpu */
mutex_lock(&imc_global_refc.lock);
if (imc_global_refc.id == IMC_DOMAIN_THREAD)
imc_global_refc.refc--;
mutex_unlock(&imc_global_refc.lock);
return 0;
}
......@@ -916,7 +994,22 @@ static int thread_imc_event_init(struct perf_event *event)
if (!target)
return -EINVAL;
mutex_lock(&imc_global_refc.lock);
/*
* Check if any other trace/core imc events are running in the
* system, if not set the global id to thread-imc.
*/
if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_THREAD) {
imc_global_refc.id = IMC_DOMAIN_THREAD;
imc_global_refc.refc++;
} else {
mutex_unlock(&imc_global_refc.lock);
return -EBUSY;
}
mutex_unlock(&imc_global_refc.lock);
event->pmu->task_ctx_nr = perf_sw_context;
event->destroy = reset_global_refc;
return 0;
}
......@@ -1063,10 +1156,12 @@ static void thread_imc_event_del(struct perf_event *event, int flags)
int core_id;
struct imc_pmu_ref *ref;
mtspr(SPRN_LDBAR, 0);
core_id = smp_processor_id() / threads_per_core;
ref = &core_imc_refc[core_id];
if (!ref) {
pr_debug("imc: Failed to get event reference count\n");
return;
}
mutex_lock(&ref->lock);
ref->refc--;
......@@ -1082,6 +1177,10 @@ static void thread_imc_event_del(struct perf_event *event, int flags)
ref->refc = 0;
}
mutex_unlock(&ref->lock);
/* Set bit 0 of LDBAR to zero, to stop posting updates to memory */
mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
/*
* Take a snapshot and calculate the delta and update
* the event counter values.
......@@ -1133,7 +1232,18 @@ static int ppc_trace_imc_cpu_online(unsigned int cpu)
static int ppc_trace_imc_cpu_offline(unsigned int cpu)
{
mtspr(SPRN_LDBAR, 0);
/*
* No need to set bit 0 of LDBAR to zero, as
* it is set to zero for imc trace-mode
*
* Reduce the refc if any trace-imc event running
* on this cpu.
*/
mutex_lock(&imc_global_refc.lock);
if (imc_global_refc.id == IMC_DOMAIN_TRACE)
imc_global_refc.refc--;
mutex_unlock(&imc_global_refc.lock);
return 0;
}
......@@ -1226,15 +1336,14 @@ static int trace_imc_event_add(struct perf_event *event, int flags)
local_mem = get_trace_imc_event_base_addr();
ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | TRACE_IMC_ENABLE;
if (core_imc_refc)
ref = &core_imc_refc[core_id];
/* trace-imc reference count */
if (trace_imc_refc)
ref = &trace_imc_refc[core_id];
if (!ref) {
/* If core-imc is not enabled, use trace-imc reference count */
if (trace_imc_refc)
ref = &trace_imc_refc[core_id];
if (!ref)
return -EINVAL;
pr_debug("imc: Failed to get the event reference count\n");
return -EINVAL;
}
mtspr(SPRN_LDBAR, ldbar_value);
mutex_lock(&ref->lock);
if (ref->refc == 0) {
......@@ -1242,13 +1351,11 @@ static int trace_imc_event_add(struct perf_event *event, int flags)
get_hard_smp_processor_id(smp_processor_id()))) {
mutex_unlock(&ref->lock);
pr_err("trace-imc: Unable to start the counters for core %d\n", core_id);
mtspr(SPRN_LDBAR, 0);
return -EINVAL;
}
}
++ref->refc;
mutex_unlock(&ref->lock);
return 0;
}
......@@ -1274,16 +1381,13 @@ static void trace_imc_event_del(struct perf_event *event, int flags)
int core_id = smp_processor_id() / threads_per_core;
struct imc_pmu_ref *ref = NULL;
if (core_imc_refc)
ref = &core_imc_refc[core_id];
if (trace_imc_refc)
ref = &trace_imc_refc[core_id];
if (!ref) {
/* If core-imc is not enabled, use trace-imc reference count */
if (trace_imc_refc)
ref = &trace_imc_refc[core_id];
if (!ref)
return;
pr_debug("imc: Failed to get event reference count\n");
return;
}
mtspr(SPRN_LDBAR, 0);
mutex_lock(&ref->lock);
ref->refc--;
if (ref->refc == 0) {
......@@ -1297,6 +1401,7 @@ static void trace_imc_event_del(struct perf_event *event, int flags)
ref->refc = 0;
}
mutex_unlock(&ref->lock);
trace_imc_event_stop(event, flags);
}
......@@ -1314,10 +1419,30 @@ static int trace_imc_event_init(struct perf_event *event)
if (event->attr.sample_period == 0)
return -ENOENT;
/*
* Take the global lock, and make sure
* no other thread is running any core/thread imc
* events
*/
mutex_lock(&imc_global_refc.lock);
if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_TRACE) {
/*
* No core/thread imc events are running in the
* system, so set the refc.id to trace-imc.
*/
imc_global_refc.id = IMC_DOMAIN_TRACE;
imc_global_refc.refc++;
} else {
mutex_unlock(&imc_global_refc.lock);
return -EBUSY;
}
mutex_unlock(&imc_global_refc.lock);
event->hw.idx = -1;
target = event->hw.target;
event->pmu->task_ctx_nr = perf_hw_context;
event->destroy = reset_global_refc;
return 0;
}
......@@ -1429,10 +1554,10 @@ static void cleanup_all_core_imc_memory(void)
static void thread_imc_ldbar_disable(void *dummy)
{
/*
* By Zeroing LDBAR, we disable thread-imc
* updates.
* By setting 0th bit of LDBAR to zero, we disable thread-imc
* updates to memory.
*/
mtspr(SPRN_LDBAR, 0);
mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
}
void thread_imc_disable(void)
......
......@@ -268,14 +268,7 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
domain = IMC_DOMAIN_THREAD;
break;
case IMC_TYPE_TRACE:
/*
* FIXME. Using trace_imc events to monitor application
* or KVM thread performance can cause a checkstop
* (system crash).
* Disable it for now.
*/
pr_info_once("IMC: disabling trace_imc PMU\n");
domain = -1;
domain = IMC_DOMAIN_TRACE;
break;
default:
pr_warn("IMC Unknown Device type \n");
......
......@@ -613,10 +613,8 @@ static int update_flash_db(void)
/* Read in header and db from flash. */
header = kmalloc(buf_len, GFP_KERNEL);
if (!header) {
pr_debug("%s: kmalloc failed\n", __func__);
if (!header)
return -ENOMEM;
}
count = os_area_flash_read(header, buf_len, 0);
if (count < 0) {
......
......@@ -945,6 +945,15 @@ static phys_addr_t ddw_memory_hotplug_max(void)
phys_addr_t max_addr = memory_hotplug_max();
struct device_node *memory;
/*
* The "ibm,pmemory" can appear anywhere in the address space.
* Assuming it is still backed by page structs, set the upper limit
* for the huge DMA window as MAX_PHYSMEM_BITS.
*/
if (of_find_node_by_type(NULL, "ibm,pmemory"))
return (sizeof(phys_addr_t) * 8 <= MAX_PHYSMEM_BITS) ?
(phys_addr_t) -1 : (1ULL << MAX_PHYSMEM_BITS);
for_each_node_by_type(memory, "memory") {
unsigned long start, size;
int n_mem_addr_cells, n_mem_size_cells, len;
......
......@@ -686,6 +686,17 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
#endif
out:
/*
* Enable translation as we will be accessing per-cpu variables
* in save_mce_event() which may fall outside RMO region, also
* leave it enabled because subsequently we will be queuing work
* to workqueues where again per-cpu variables accessed, besides
* fwnmi_release_errinfo() crashes when called in realmode on
* pseries.
* Note: All the realmode handling like flushing SLB entries for
* SLB multihit is done by now.
*/
mtmsr(mfmsr() | MSR_IR | MSR_DR);
save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
&mce_err, regs->nip, eaddr, paddr);
......
......@@ -31,7 +31,7 @@ void ps3_sys_manager_register_ops(const struct ps3_sys_manager_ops *ops)
{
BUG_ON(!ops);
BUG_ON(!ops->dev);
ps3_sys_manager_ops = ops ? *ops : ps3_sys_manager_ops;
ps3_sys_manager_ops = *ops;
}
EXPORT_SYMBOL_GPL(ps3_sys_manager_register_ops);
......
......@@ -331,7 +331,8 @@ COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned i
}
#endif
#if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT)
#if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT) || \
defined(__ARCH_WANT_SYS_LLSEEK)
SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
unsigned long, offset_low, loff_t __user *, result,
unsigned int, whence)
......
......@@ -41,6 +41,11 @@ for dev in `ls -1 /sys/bus/pci/devices/ | grep '\.0$'` ; do
continue;
fi
if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then
echo "$dev, Skipped: ahci doesn't support recovery"
continue
fi
# Don't inject errosr into an already-frozen PE. This happens with
# PEs that contain multiple PCI devices (e.g. multi-function cards)
# and injecting new errors during the recovery process will probably
......
......@@ -25,6 +25,7 @@ $(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -
$(OUTPUT)/tm-trap: CFLAGS += -O0 -pthread -m64
$(OUTPUT)/tm-signal-context-force-tm: CFLAGS += -pthread -m64
$(OUTPUT)/tm-signal-pagefault: CFLAGS += -pthread -m64
$(OUTPUT)/tm-poison: CFLAGS += -m64
SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS))
$(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册