提交 a2c7a54f 编写于 作者: L Linus Torvalds

Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc

Pull powerpc fixes from Benjamin Herrenschmidt:
 "This is mostly bug fixes (some of them regressions, some of them I
  deemed worth merging now) along with some patches from Li Zhong
  hooking up the new context tracking stuff (for the new full NO_HZ)"

* 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (25 commits)
  powerpc: Set show_unhandled_signals to 1 by default
  powerpc/perf: Fix setting of "to" addresses for BHRB
  powerpc/pmu: Fix order of interpreting BHRB target entries
  powerpc/perf: Move BHRB code into CONFIG_PPC64 region
  powerpc: select HAVE_CONTEXT_TRACKING for pSeries
  powerpc: Use the new schedule_user API on userspace preemption
  powerpc: Exit user context on notify resume
  powerpc: Exception hooks for context tracking subsystem
  powerpc: Syscall hooks for context tracking subsystem
  powerpc/booke64: Fix kernel hangs at kernel_dbg_exc
  powerpc: Fix irq_set_affinity() return values
  powerpc: Provide __bswapdi2
  powerpc/powernv: Fix starting of secondary CPUs on OPALv2 and v3
  powerpc/powernv: Detect OPAL v3 API version
  powerpc: Fix MAX_STACK_TRACE_ENTRIES too low warning again
  powerpc: Make CONFIG_RTAS_PROC depend on CONFIG_PROC_FS
  powerpc: Bring all threads online prior to migration/hibernation
  powerpc/rtas_flash: Fix validate_flash buffer overflow issue
  powerpc/kexec: Fix kexec when using VMX optimised memcpy
  powerpc: Fix build errors STRICT_MM_TYPECHECKS
  ...
......@@ -262,8 +262,31 @@ config PPC_EARLY_DEBUG_OPAL_HVSI
Select this to enable early debugging for the PowerNV platform
using an "hvsi" console
config PPC_EARLY_DEBUG_MEMCONS
bool "In memory console"
help
Select this to enable early debugging using an in memory console.
This console provides input and output buffers stored within the
kernel BSS and should be safe to select on any system. A debugger
can then be used to read kernel output or send input to the console.
endchoice
config PPC_MEMCONS_OUTPUT_SIZE
int "In memory console output buffer size"
depends on PPC_EARLY_DEBUG_MEMCONS
default 4096
help
Selects the size of the output buffer (in bytes) of the in memory
console.
config PPC_MEMCONS_INPUT_SIZE
int "In memory console input buffer size"
depends on PPC_EARLY_DEBUG_MEMCONS
default 128
help
Selects the size of the input buffer (in bytes) of the in memory
console.
config PPC_EARLY_DEBUG_OPAL
def_bool y
depends on PPC_EARLY_DEBUG_OPAL_RAW || PPC_EARLY_DEBUG_OPAL_HVSI
......
#ifndef _ASM_POWERPC_CONTEXT_TRACKING_H
#define _ASM_POWERPC_CONTEXT_TRACKING_H
#ifdef CONFIG_CONTEXT_TRACKING
#define SCHEDULE_USER bl .schedule_user
#else
#define SCHEDULE_USER bl .schedule
#endif
#endif
......@@ -52,6 +52,7 @@
#define FW_FEATURE_BEST_ENERGY ASM_CONST(0x0000000080000000)
#define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000)
#define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000)
#define FW_FEATURE_OPALv3 ASM_CONST(0x0000000400000000)
#ifndef __ASSEMBLY__
......@@ -69,7 +70,8 @@ enum {
FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN,
FW_FEATURE_PSERIES_ALWAYS = 0,
FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_OPALv2,
FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_OPALv2 |
FW_FEATURE_OPALv3,
FW_FEATURE_POWERNV_ALWAYS = 0,
FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
......
......@@ -96,11 +96,12 @@ static inline bool arch_irqs_disabled(void)
#endif
#define hard_irq_disable() do { \
u8 _was_enabled = get_paca()->soft_enabled; \
__hard_irq_disable(); \
if (local_paca->soft_enabled) \
trace_hardirqs_off(); \
get_paca()->soft_enabled = 0; \
get_paca()->irq_happened |= PACA_IRQ_HARD_DIS; \
if (_was_enabled) \
trace_hardirqs_off(); \
} while(0)
static inline bool lazy_irq_pending(void)
......
......@@ -243,7 +243,8 @@ enum OpalMCE_TlbErrorType {
enum OpalThreadStatus {
OPAL_THREAD_INACTIVE = 0x0,
OPAL_THREAD_STARTED = 0x1
OPAL_THREAD_STARTED = 0x1,
OPAL_THREAD_UNAVAILABLE = 0x2 /* opal-v3 */
};
enum OpalPciBusCompare {
......@@ -563,6 +564,8 @@ extern void opal_nvram_init(void);
extern int opal_machine_check(struct pt_regs *regs);
extern void opal_shutdown(void);
#endif /* __ASSEMBLY__ */
#endif /* __OPAL_H */
......@@ -186,7 +186,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
static inline pgtable_t pmd_pgtable(pmd_t pmd)
{
return (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE);
return (pgtable_t)(pmd_val(pmd) & ~PMD_MASKED_BITS);
}
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
......
......@@ -47,7 +47,7 @@
* generic accessors and iterators here
*/
#define __real_pte(e,p) ((real_pte_t) { \
(e), ((e) & _PAGE_COMBO) ? \
(e), (pte_val(e) & _PAGE_COMBO) ? \
(pte_val(*((p) + PTRS_PER_PTE))) : 0 })
#define __rpte_to_hidx(r,index) ((pte_val((r).pte) & _PAGE_COMBO) ? \
(((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf))
......
......@@ -264,6 +264,8 @@ extern void rtas_progress(char *s, unsigned short hex);
extern void rtas_initialize(void);
extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data);
extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data);
extern int rtas_online_cpus_mask(cpumask_var_t cpus);
extern int rtas_offline_cpus_mask(cpumask_var_t cpus);
extern int rtas_ibm_suspend_me(struct rtas_args *);
struct rtc_time;
......
......@@ -97,7 +97,7 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_PERFMON_CTXSW 6 /* perfmon needs ctxsw calls */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SINGLESTEP 8 /* singlestepping active */
#define TIF_MEMDIE 9 /* is terminating due to OOM killer */
#define TIF_NOHZ 9 /* in adaptive nohz mode */
#define TIF_SECCOMP 10 /* secure computing */
#define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */
#define TIF_NOERROR 12 /* Force successful syscall return */
......@@ -106,6 +106,7 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_SYSCALL_TRACEPOINT 15 /* syscall tracepoint instrumentation */
#define TIF_EMULATE_STACK_STORE 16 /* Is an instruction emulation
for stack store? */
#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
/* as above, but as bit values */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
......@@ -124,8 +125,10 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_UPROBE (1<<TIF_UPROBE)
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
#define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
#define _TIF_NOHZ (1<<TIF_NOHZ)
#define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT)
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
_TIF_NOHZ)
#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
_TIF_NOTIFY_RESUME | _TIF_UPROBE)
......
......@@ -52,6 +52,7 @@ extern void __init udbg_init_40x_realmode(void);
extern void __init udbg_init_cpm(void);
extern void __init udbg_init_usbgecko(void);
extern void __init udbg_init_wsp(void);
extern void __init udbg_init_memcons(void);
extern void __init udbg_init_ehv_bc(void);
extern void __init udbg_init_ps3gelic(void);
extern void __init udbg_init_debug_opal_raw(void);
......
......@@ -439,8 +439,6 @@ ret_from_fork:
ret_from_kernel_thread:
REST_NVGPRS(r1)
bl schedule_tail
li r3,0
stw r3,0(r1)
mtlr r14
mr r3,r15
PPC440EP_ERR42
......
......@@ -33,6 +33,7 @@
#include <asm/irqflags.h>
#include <asm/ftrace.h>
#include <asm/hw_irq.h>
#include <asm/context_tracking.h>
/*
* System calls.
......@@ -376,8 +377,6 @@ _GLOBAL(ret_from_fork)
_GLOBAL(ret_from_kernel_thread)
bl .schedule_tail
REST_NVGPRS(r1)
li r3,0
std r3,0(r1)
ld r14, 0(r14)
mtlr r14
mr r3,r15
......@@ -634,7 +633,7 @@ _GLOBAL(ret_from_except_lite)
andi. r0,r4,_TIF_NEED_RESCHED
beq 1f
bl .restore_interrupts
bl .schedule
SCHEDULE_USER
b .ret_from_except_lite
1: bl .save_nvgprs
......
......@@ -489,7 +489,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
*/
mfspr r14,SPRN_DBSR /* check single-step/branch taken */
andis. r15,r14,DBSR_IC@h
andis. r15,r14,(DBSR_IC|DBSR_BT)@h
beq+ 1f
LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
......@@ -500,7 +500,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
bge+ cr1,1f
/* here it looks like we got an inappropriate debug exception. */
lis r14,DBSR_IC@h /* clear the IC event */
lis r14,(DBSR_IC|DBSR_BT)@h /* clear the event */
rlwinm r11,r11,0,~MSR_DE /* clear DE in the CSRR1 value */
mtspr SPRN_DBSR,r14
mtspr SPRN_CSRR1,r11
......@@ -555,7 +555,7 @@ kernel_dbg_exc:
*/
mfspr r14,SPRN_DBSR /* check single-step/branch taken */
andis. r15,r14,DBSR_IC@h
andis. r15,r14,(DBSR_IC|DBSR_BT)@h
beq+ 1f
LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
......@@ -566,7 +566,7 @@ kernel_dbg_exc:
bge+ cr1,1f
/* here it looks like we got an inappropriate debug exception. */
lis r14,DBSR_IC@h /* clear the IC event */
lis r14,(DBSR_IC|DBSR_BT)@h /* clear the event */
rlwinm r11,r11,0,~MSR_DE /* clear DE in the DSRR1 value */
mtspr SPRN_DBSR,r14
mtspr SPRN_DSRR1,r11
......
......@@ -17,6 +17,7 @@
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/cpu.h>
#include <linux/hardirq.h>
#include <asm/page.h>
#include <asm/current.h>
......@@ -335,10 +336,13 @@ void default_machine_kexec(struct kimage *image)
pr_debug("kexec: Starting switchover sequence.\n");
/* switch to a staticly allocated stack. Based on irq stack code.
* We setup preempt_count to avoid using VMX in memcpy.
* XXX: the task struct will likely be invalid once we do the copy!
*/
kexec_stack.thread_info.task = current_thread_info()->task;
kexec_stack.thread_info.flags = 0;
kexec_stack.thread_info.preempt_count = HARDIRQ_OFFSET;
kexec_stack.thread_info.cpu = current_thread_info()->cpu;
/* We need a static PACA, too; copy this CPU's PACA over and switch to
* it. Also poison per_cpu_offset to catch anyone using non-static
......
......@@ -657,6 +657,17 @@ _GLOBAL(__ucmpdi2)
li r3,2
blr
_GLOBAL(__bswapdi2)
rotlwi r9,r4,8
rotlwi r10,r3,8
rlwimi r9,r4,24,0,7
rlwimi r10,r3,24,0,7
rlwimi r9,r4,24,16,23
rlwimi r10,r3,24,16,23
mr r3,r9
mr r4,r10
blr
_GLOBAL(abs)
srawi r4,r3,31
xor r3,r3,r4
......
......@@ -234,6 +234,17 @@ _GLOBAL(__flush_dcache_icache)
isync
blr
_GLOBAL(__bswapdi2)
srdi r8,r3,32
rlwinm r7,r3,8,0xffffffff
rlwimi r7,r3,24,0,7
rlwinm r9,r8,8,0xffffffff
rlwimi r7,r3,24,16,23
rlwimi r9,r8,24,0,7
rlwimi r9,r8,24,16,23
sldi r7,r7,32
or r3,r7,r9
blr
#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE)
/*
......
......@@ -359,7 +359,6 @@ static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp,
enum pci_mmap_state mmap_state,
int write_combine)
{
unsigned long prot = pgprot_val(protection);
/* Write combine is always 0 on non-memory space mappings. On
* memory space, if the user didn't pass 1, we check for a
......@@ -376,9 +375,9 @@ static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp,
/* XXX would be nice to have a way to ask for write-through */
if (write_combine)
return pgprot_noncached_wc(prot);
return pgprot_noncached_wc(protection);
else
return pgprot_noncached(prot);
return pgprot_noncached(protection);
}
/*
......
......@@ -143,7 +143,8 @@ EXPORT_SYMBOL(__lshrdi3);
int __ucmpdi2(unsigned long long, unsigned long long);
EXPORT_SYMBOL(__ucmpdi2);
#endif
long long __bswapdi2(long long);
EXPORT_SYMBOL(__bswapdi2);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memmove);
......
......@@ -339,6 +339,13 @@ static void set_debug_reg_defaults(struct thread_struct *thread)
static void prime_debug_regs(struct thread_struct *thread)
{
/*
* We could have inherited MSR_DE from userspace, since
* it doesn't get cleared on exception entry. Make sure
* MSR_DE is clear before we enable any debug events.
*/
mtmsr(mfmsr() & ~MSR_DE);
mtspr(SPRN_IAC1, thread->iac1);
mtspr(SPRN_IAC2, thread->iac2);
#if CONFIG_PPC_ADV_DEBUG_IACS > 2
......@@ -971,6 +978,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
* do some house keeping and then return from the fork or clone
* system call, using the stack frame created above.
*/
((unsigned long *)sp)[0] = 0;
sp -= sizeof(struct pt_regs);
kregs = (struct pt_regs *) sp;
sp -= STACK_FRAME_OVERHEAD;
......
......@@ -32,6 +32,7 @@
#include <trace/syscall.h>
#include <linux/hw_breakpoint.h>
#include <linux/perf_event.h>
#include <linux/context_tracking.h>
#include <asm/uaccess.h>
#include <asm/page.h>
......@@ -1788,6 +1789,8 @@ long do_syscall_trace_enter(struct pt_regs *regs)
{
long ret = 0;
user_exit();
secure_computing_strict(regs->gpr[0]);
if (test_thread_flag(TIF_SYSCALL_TRACE) &&
......@@ -1832,4 +1835,6 @@ void do_syscall_trace_leave(struct pt_regs *regs)
step = test_thread_flag(TIF_SINGLESTEP);
if (step || test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall_exit(regs, step);
user_enter();
}
......@@ -19,6 +19,7 @@
#include <linux/init.h>
#include <linux/capability.h>
#include <linux/delay.h>
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/completion.h>
#include <linux/cpumask.h>
......@@ -807,6 +808,95 @@ static void rtas_percpu_suspend_me(void *info)
__rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1);
}
enum rtas_cpu_state {
DOWN,
UP,
};
#ifndef CONFIG_SMP
static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
cpumask_var_t cpus)
{
if (!cpumask_empty(cpus)) {
cpumask_clear(cpus);
return -EINVAL;
} else
return 0;
}
#else
/* On return cpumask will be altered to indicate CPUs changed.
* CPUs with states changed will be set in the mask,
* CPUs with status unchanged will be unset in the mask. */
static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
cpumask_var_t cpus)
{
int cpu;
int cpuret = 0;
int ret = 0;
if (cpumask_empty(cpus))
return 0;
for_each_cpu(cpu, cpus) {
switch (state) {
case DOWN:
cpuret = cpu_down(cpu);
break;
case UP:
cpuret = cpu_up(cpu);
break;
}
if (cpuret) {
pr_debug("%s: cpu_%s for cpu#%d returned %d.\n",
__func__,
((state == UP) ? "up" : "down"),
cpu, cpuret);
if (!ret)
ret = cpuret;
if (state == UP) {
/* clear bits for unchanged cpus, return */
cpumask_shift_right(cpus, cpus, cpu);
cpumask_shift_left(cpus, cpus, cpu);
break;
} else {
/* clear bit for unchanged cpu, continue */
cpumask_clear_cpu(cpu, cpus);
}
}
}
return ret;
}
#endif
int rtas_online_cpus_mask(cpumask_var_t cpus)
{
int ret;
ret = rtas_cpu_state_change_mask(UP, cpus);
if (ret) {
cpumask_var_t tmp_mask;
if (!alloc_cpumask_var(&tmp_mask, GFP_TEMPORARY))
return ret;
/* Use tmp_mask to preserve cpus mask from first failure */
cpumask_copy(tmp_mask, cpus);
rtas_offline_cpus_mask(tmp_mask);
free_cpumask_var(tmp_mask);
}
return ret;
}
EXPORT_SYMBOL(rtas_online_cpus_mask);
int rtas_offline_cpus_mask(cpumask_var_t cpus)
{
return rtas_cpu_state_change_mask(DOWN, cpus);
}
EXPORT_SYMBOL(rtas_offline_cpus_mask);
int rtas_ibm_suspend_me(struct rtas_args *args)
{
long state;
......@@ -814,6 +904,8 @@ int rtas_ibm_suspend_me(struct rtas_args *args)
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
struct rtas_suspend_me_data data;
DECLARE_COMPLETION_ONSTACK(done);
cpumask_var_t offline_mask;
int cpuret;
if (!rtas_service_present("ibm,suspend-me"))
return -ENOSYS;
......@@ -837,11 +929,24 @@ int rtas_ibm_suspend_me(struct rtas_args *args)
return 0;
}
if (!alloc_cpumask_var(&offline_mask, GFP_TEMPORARY))
return -ENOMEM;
atomic_set(&data.working, 0);
atomic_set(&data.done, 0);
atomic_set(&data.error, 0);
data.token = rtas_token("ibm,suspend-me");
data.complete = &done;
/* All present CPUs must be online */
cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask);
cpuret = rtas_online_cpus_mask(offline_mask);
if (cpuret) {
pr_err("%s: Could not bring present CPUs online.\n", __func__);
atomic_set(&data.error, cpuret);
goto out;
}
stop_topology_update();
/* Call function on all CPUs. One of us will make the
......@@ -857,6 +962,14 @@ int rtas_ibm_suspend_me(struct rtas_args *args)
start_topology_update();
/* Take down CPUs not online prior to suspend */
cpuret = rtas_offline_cpus_mask(offline_mask);
if (cpuret)
pr_warn("%s: Could not restore CPUs to offline state.\n",
__func__);
out:
free_cpumask_var(offline_mask);
return atomic_read(&data.error);
}
#else /* CONFIG_PPC_PSERIES */
......
......@@ -89,6 +89,7 @@
/* Array sizes */
#define VALIDATE_BUF_SIZE 4096
#define VALIDATE_MSG_LEN 256
#define RTAS_MSG_MAXLEN 64
/* Quirk - RTAS requires 4k list length and block size */
......@@ -466,7 +467,7 @@ static void validate_flash(struct rtas_validate_flash_t *args_buf)
}
static int get_validate_flash_msg(struct rtas_validate_flash_t *args_buf,
char *msg)
char *msg, int msglen)
{
int n;
......@@ -474,7 +475,8 @@ static int get_validate_flash_msg(struct rtas_validate_flash_t *args_buf,
n = sprintf(msg, "%d\n", args_buf->update_results);
if ((args_buf->update_results >= VALIDATE_CUR_UNKNOWN) ||
(args_buf->update_results == VALIDATE_TMP_UPDATE))
n += sprintf(msg + n, "%s\n", args_buf->buf);
n += snprintf(msg + n, msglen - n, "%s\n",
args_buf->buf);
} else {
n = sprintf(msg, "%d\n", args_buf->status);
}
......@@ -486,11 +488,11 @@ static ssize_t validate_flash_read(struct file *file, char __user *buf,
{
struct rtas_validate_flash_t *const args_buf =
&rtas_validate_flash_data;
char msg[RTAS_MSG_MAXLEN];
char msg[VALIDATE_MSG_LEN];
int msglen;
mutex_lock(&rtas_validate_flash_mutex);
msglen = get_validate_flash_msg(args_buf, msg);
msglen = get_validate_flash_msg(args_buf, msg, VALIDATE_MSG_LEN);
mutex_unlock(&rtas_validate_flash_mutex);
return simple_read_from_buffer(buf, count, ppos, msg, msglen);
......
......@@ -13,6 +13,7 @@
#include <linux/signal.h>
#include <linux/uprobes.h>
#include <linux/key.h>
#include <linux/context_tracking.h>
#include <asm/hw_breakpoint.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
......@@ -24,7 +25,7 @@
* through debug.exception-trace sysctl.
*/
int show_unhandled_signals = 0;
int show_unhandled_signals = 1;
/*
* Allocate space for the signal frame
......@@ -159,6 +160,8 @@ static int do_signal(struct pt_regs *regs)
void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
{
user_exit();
if (thread_info_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
......@@ -169,4 +172,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
}
user_enter();
}
......@@ -35,6 +35,7 @@
#include <linux/kdebug.h>
#include <linux/debugfs.h>
#include <linux/ratelimit.h>
#include <linux/context_tracking.h>
#include <asm/emulated_ops.h>
#include <asm/pgtable.h>
......@@ -667,6 +668,7 @@ int machine_check_generic(struct pt_regs *regs)
void machine_check_exception(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
int recover = 0;
__get_cpu_var(irq_stat).mce_exceptions++;
......@@ -683,7 +685,7 @@ void machine_check_exception(struct pt_regs *regs)
recover = cur_cpu_spec->machine_check(regs);
if (recover > 0)
return;
goto bail;
#if defined(CONFIG_8xx) && defined(CONFIG_PCI)
/* the qspan pci read routines can cause machine checks -- Cort
......@@ -693,20 +695,23 @@ void machine_check_exception(struct pt_regs *regs)
* -- BenH
*/
bad_page_fault(regs, regs->dar, SIGBUS);
return;
goto bail;
#endif
if (debugger_fault_handler(regs))
return;
goto bail;
if (check_io_access(regs))
return;
goto bail;
die("Machine check", regs, SIGBUS);
/* Must die if the interrupt is not recoverable */
if (!(regs->msr & MSR_RI))
panic("Unrecoverable Machine check");
bail:
exception_exit(prev_state);
}
void SMIException(struct pt_regs *regs)
......@@ -716,20 +721,29 @@ void SMIException(struct pt_regs *regs)
void unknown_exception(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
regs->nip, regs->msr, regs->trap);
_exception(SIGTRAP, regs, 0, 0);
exception_exit(prev_state);
}
void instruction_breakpoint_exception(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
5, SIGTRAP) == NOTIFY_STOP)
return;
goto bail;
if (debugger_iabr_match(regs))
return;
goto bail;
_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
bail:
exception_exit(prev_state);
}
void RunModeException(struct pt_regs *regs)
......@@ -739,15 +753,20 @@ void RunModeException(struct pt_regs *regs)
void __kprobes single_step_exception(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
clear_single_step(regs);
if (notify_die(DIE_SSTEP, "single_step", regs, 5,
5, SIGTRAP) == NOTIFY_STOP)
return;
goto bail;
if (debugger_sstep(regs))
return;
goto bail;
_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
bail:
exception_exit(prev_state);
}
/*
......@@ -1005,6 +1024,7 @@ int is_valid_bugaddr(unsigned long addr)
void __kprobes program_check_exception(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
unsigned int reason = get_reason(regs);
extern int do_mathemu(struct pt_regs *regs);
......@@ -1014,26 +1034,26 @@ void __kprobes program_check_exception(struct pt_regs *regs)
if (reason & REASON_FP) {
/* IEEE FP exception */
parse_fpe(regs);
return;
goto bail;
}
if (reason & REASON_TRAP) {
/* Debugger is first in line to stop recursive faults in
* rcu_lock, notify_die, or atomic_notifier_call_chain */
if (debugger_bpt(regs))
return;
goto bail;
/* trap exception */
if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP)
== NOTIFY_STOP)
return;
goto bail;
if (!(regs->msr & MSR_PR) && /* not user-mode */
report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) {
regs->nip += 4;
return;
goto bail;
}
_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
return;
goto bail;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (reason & REASON_TM) {
......@@ -1049,7 +1069,7 @@ void __kprobes program_check_exception(struct pt_regs *regs)
if (!user_mode(regs) &&
report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) {
regs->nip += 4;
return;
goto bail;
}
/* If usermode caused this, it's done something illegal and
* gets a SIGILL slap on the wrist. We call it an illegal
......@@ -1059,7 +1079,7 @@ void __kprobes program_check_exception(struct pt_regs *regs)
*/
if (user_mode(regs)) {
_exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
return;
goto bail;
} else {
printk(KERN_EMERG "Unexpected TM Bad Thing exception "
"at %lx (msr 0x%x)\n", regs->nip, reason);
......@@ -1083,16 +1103,16 @@ void __kprobes program_check_exception(struct pt_regs *regs)
switch (do_mathemu(regs)) {
case 0:
emulate_single_step(regs);
return;
goto bail;
case 1: {
int code = 0;
code = __parse_fpscr(current->thread.fpscr.val);
_exception(SIGFPE, regs, code, regs->nip);
return;
goto bail;
}
case -EFAULT:
_exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
return;
goto bail;
}
/* fall through on any other errors */
#endif /* CONFIG_MATH_EMULATION */
......@@ -1103,10 +1123,10 @@ void __kprobes program_check_exception(struct pt_regs *regs)
case 0:
regs->nip += 4;
emulate_single_step(regs);
return;
goto bail;
case -EFAULT:
_exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
return;
goto bail;
}
}
......@@ -1114,10 +1134,14 @@ void __kprobes program_check_exception(struct pt_regs *regs)
_exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
else
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
bail:
exception_exit(prev_state);
}
void alignment_exception(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
int sig, code, fixed = 0;
/* We restore the interrupt state now */
......@@ -1131,7 +1155,7 @@ void alignment_exception(struct pt_regs *regs)
if (fixed == 1) {
regs->nip += 4; /* skip over emulated instruction */
emulate_single_step(regs);
return;
goto bail;
}
/* Operand address was bad */
......@@ -1146,6 +1170,9 @@ void alignment_exception(struct pt_regs *regs)
_exception(sig, regs, code, regs->dar);
else
bad_page_fault(regs, regs->dar, sig);
bail:
exception_exit(prev_state);
}
void StackOverflow(struct pt_regs *regs)
......@@ -1174,23 +1201,32 @@ void trace_syscall(struct pt_regs *regs)
void kernel_fp_unavailable_exception(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
printk(KERN_EMERG "Unrecoverable FP Unavailable Exception "
"%lx at %lx\n", regs->trap, regs->nip);
die("Unrecoverable FP Unavailable Exception", regs, SIGABRT);
exception_exit(prev_state);
}
void altivec_unavailable_exception(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
if (user_mode(regs)) {
/* A user program has executed an altivec instruction,
but this kernel doesn't support altivec. */
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
return;
goto bail;
}
printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception "
"%lx at %lx\n", regs->trap, regs->nip);
die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
bail:
exception_exit(prev_state);
}
void vsx_unavailable_exception(struct pt_regs *regs)
......
......@@ -64,6 +64,9 @@ void __init udbg_early_init(void)
udbg_init_usbgecko();
#elif defined(CONFIG_PPC_EARLY_DEBUG_WSP)
udbg_init_wsp();
#elif defined(CONFIG_PPC_EARLY_DEBUG_MEMCONS)
/* In memory console */
udbg_init_memcons();
#elif defined(CONFIG_PPC_EARLY_DEBUG_EHV_BC)
udbg_init_ehv_bc();
#elif defined(CONFIG_PPC_EARLY_DEBUG_PS3GELIC)
......
......@@ -32,6 +32,7 @@
#include <linux/perf_event.h>
#include <linux/magic.h>
#include <linux/ratelimit.h>
#include <linux/context_tracking.h>
#include <asm/firmware.h>
#include <asm/page.h>
......@@ -196,6 +197,7 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
unsigned long error_code)
{
enum ctx_state prev_state = exception_enter();
struct vm_area_struct * vma;
struct mm_struct *mm = current->mm;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
......@@ -204,6 +206,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
int trap = TRAP(regs);
int is_exec = trap == 0x400;
int fault;
int rc = 0;
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
/*
......@@ -230,28 +233,30 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
* look at it
*/
if (error_code & ICSWX_DSI_UCT) {
int rc = acop_handle_fault(regs, address, error_code);
rc = acop_handle_fault(regs, address, error_code);
if (rc)
return rc;
goto bail;
}
#endif /* CONFIG_PPC_ICSWX */
if (notify_page_fault(regs))
return 0;
goto bail;
if (unlikely(debugger_fault_handler(regs)))
return 0;
goto bail;
/* On a kernel SLB miss we can only check for a valid exception entry */
if (!user_mode(regs) && (address >= TASK_SIZE))
return SIGSEGV;
if (!user_mode(regs) && (address >= TASK_SIZE)) {
rc = SIGSEGV;
goto bail;
}
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \
defined(CONFIG_PPC_BOOK3S_64))
if (error_code & DSISR_DABRMATCH) {
/* breakpoint match */
do_break(regs, address, error_code);
return 0;
goto bail;
}
#endif
......@@ -260,8 +265,10 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
local_irq_enable();
if (in_atomic() || mm == NULL) {
if (!user_mode(regs))
return SIGSEGV;
if (!user_mode(regs)) {
rc = SIGSEGV;
goto bail;
}
/* in_atomic() in user mode is really bad,
as is current->mm == NULL. */
printk(KERN_EMERG "Page fault in user mode with "
......@@ -417,9 +424,11 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
*/
fault = handle_mm_fault(mm, vma, address, flags);
if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
int rc = mm_fault_error(regs, address, fault);
rc = mm_fault_error(regs, address, fault);
if (rc >= MM_FAULT_RETURN)
return rc;
goto bail;
else
rc = 0;
}
/*
......@@ -454,7 +463,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
}
up_read(&mm->mmap_sem);
return 0;
goto bail;
bad_area:
up_read(&mm->mmap_sem);
......@@ -463,7 +472,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
/* User mode accesses cause a SIGSEGV */
if (user_mode(regs)) {
_exception(SIGSEGV, regs, code, address);
return 0;
goto bail;
}
if (is_exec && (error_code & DSISR_PROTFAULT))
......@@ -471,7 +480,11 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
" page (%lx) - exploit attempt? (uid: %d)\n",
address, from_kuid(&init_user_ns, current_uid()));
return SIGSEGV;
rc = SIGSEGV;
bail:
exception_exit(prev_state);
return rc;
}
......
......@@ -33,6 +33,7 @@
#include <linux/init.h>
#include <linux/signal.h>
#include <linux/memblock.h>
#include <linux/context_tracking.h>
#include <asm/processor.h>
#include <asm/pgtable.h>
......@@ -954,6 +955,7 @@ void hash_failure_debug(unsigned long ea, unsigned long access,
*/
int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
{
enum ctx_state prev_state = exception_enter();
pgd_t *pgdir;
unsigned long vsid;
struct mm_struct *mm;
......@@ -973,7 +975,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
mm = current->mm;
if (! mm) {
DBG_LOW(" user region with no mm !\n");
return 1;
rc = 1;
goto bail;
}
psize = get_slice_psize(mm, ea);
ssize = user_segment_size(ea);
......@@ -992,19 +995,23 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
/* Not a valid range
* Send the problem up to do_page_fault
*/
return 1;
rc = 1;
goto bail;
}
DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid);
/* Bad address. */
if (!vsid) {
DBG_LOW("Bad address!\n");
return 1;
rc = 1;
goto bail;
}
/* Get pgdir */
pgdir = mm->pgd;
if (pgdir == NULL)
return 1;
if (pgdir == NULL) {
rc = 1;
goto bail;
}
/* Check CPU locality */
tmp = cpumask_of(smp_processor_id());
......@@ -1027,7 +1034,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift);
if (ptep == NULL || !pte_present(*ptep)) {
DBG_LOW(" no PTE !\n");
return 1;
rc = 1;
goto bail;
}
/* Add _PAGE_PRESENT to the required access perm */
......@@ -1038,13 +1046,16 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
*/
if (access & ~pte_val(*ptep)) {
DBG_LOW(" no access !\n");
return 1;
rc = 1;
goto bail;
}
#ifdef CONFIG_HUGETLB_PAGE
if (hugeshift)
return __hash_page_huge(ea, access, vsid, ptep, trap, local,
if (hugeshift) {
rc = __hash_page_huge(ea, access, vsid, ptep, trap, local,
ssize, hugeshift, psize);
goto bail;
}
#endif /* CONFIG_HUGETLB_PAGE */
#ifndef CONFIG_PPC_64K_PAGES
......@@ -1124,6 +1135,9 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
pte_val(*(ptep + PTRS_PER_PTE)));
#endif
DBG_LOW(" -> rc=%d\n", rc);
bail:
exception_exit(prev_state);
return rc;
}
EXPORT_SYMBOL_GPL(hash_page);
......@@ -1259,6 +1273,8 @@ void flush_hash_range(unsigned long number, int local)
*/
void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc)
{
enum ctx_state prev_state = exception_enter();
if (user_mode(regs)) {
#ifdef CONFIG_PPC_SUBPAGE_PROT
if (rc == -2)
......@@ -1268,6 +1284,8 @@ void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc)
_exception(SIGBUS, regs, BUS_ADRERR, address);
} else
bad_page_fault(regs, address, SIGBUS);
exception_exit(prev_state);
}
long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
......
......@@ -215,7 +215,8 @@ static void __meminit vmemmap_create_mapping(unsigned long start,
unsigned long phys)
{
int mapped = htab_bolt_mapping(start, start + page_size, phys,
PAGE_KERNEL, mmu_vmemmap_psize,
pgprot_val(PAGE_KERNEL),
mmu_vmemmap_psize,
mmu_kernel_ssize);
BUG_ON(mapped < 0);
}
......
......@@ -13,11 +13,13 @@
#include <linux/perf_event.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/uaccess.h>
#include <asm/reg.h>
#include <asm/pmc.h>
#include <asm/machdep.h>
#include <asm/firmware.h>
#include <asm/ptrace.h>
#include <asm/code-patching.h>
#define BHRB_MAX_ENTRIES 32
#define BHRB_TARGET 0x0000000000000002
......@@ -100,6 +102,10 @@ static inline int siar_valid(struct pt_regs *regs)
return 1;
}
static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
void power_pmu_flush_branch_stack(void) {}
static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
#endif /* CONFIG_PPC32 */
static bool regs_use_siar(struct pt_regs *regs)
......@@ -308,6 +314,159 @@ static inline int siar_valid(struct pt_regs *regs)
return 1;
}
/* Reset all possible BHRB entries */
static void power_pmu_bhrb_reset(void)
{
asm volatile(PPC_CLRBHRB);
}
static void power_pmu_bhrb_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
if (!ppmu->bhrb_nr)
return;
/* Clear BHRB if we changed task context to avoid data leaks */
if (event->ctx->task && cpuhw->bhrb_context != event->ctx) {
power_pmu_bhrb_reset();
cpuhw->bhrb_context = event->ctx;
}
cpuhw->bhrb_users++;
}
static void power_pmu_bhrb_disable(struct perf_event *event)
{
struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
if (!ppmu->bhrb_nr)
return;
cpuhw->bhrb_users--;
WARN_ON_ONCE(cpuhw->bhrb_users < 0);
if (!cpuhw->disabled && !cpuhw->bhrb_users) {
/* BHRB cannot be turned off when other
* events are active on the PMU.
*/
/* avoid stale pointer */
cpuhw->bhrb_context = NULL;
}
}
/* Called from ctxsw to prevent one process's branch entries to
* mingle with the other process's entries during context switch.
*/
void power_pmu_flush_branch_stack(void)
{
if (ppmu->bhrb_nr)
power_pmu_bhrb_reset();
}
/* Calculate the to address for a branch */
static __u64 power_pmu_bhrb_to(u64 addr)
{
unsigned int instr;
int ret;
__u64 target;
if (is_kernel_addr(addr))
return branch_target((unsigned int *)addr);
/* Userspace: need copy instruction here then translate it */
pagefault_disable();
ret = __get_user_inatomic(instr, (unsigned int __user *)addr);
if (ret) {
pagefault_enable();
return 0;
}
pagefault_enable();
target = branch_target(&instr);
if ((!target) || (instr & BRANCH_ABSOLUTE))
return target;
/* Translate relative branch target from kernel to user address */
return target - (unsigned long)&instr + addr;
}
/* Processing BHRB entries */
void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
{
u64 val;
u64 addr;
int r_index, u_index, pred;
r_index = 0;
u_index = 0;
while (r_index < ppmu->bhrb_nr) {
/* Assembly read function */
val = read_bhrb(r_index++);
if (!val)
/* Terminal marker: End of valid BHRB entries */
break;
else {
addr = val & BHRB_EA;
pred = val & BHRB_PREDICTION;
if (!addr)
/* invalid entry */
continue;
/* Branches are read most recent first (ie. mfbhrb 0 is
* the most recent branch).
* There are two types of valid entries:
* 1) a target entry which is the to address of a
* computed goto like a blr,bctr,btar. The next
* entry read from the bhrb will be branch
* corresponding to this target (ie. the actual
* blr/bctr/btar instruction).
* 2) a from address which is an actual branch. If a
* target entry proceeds this, then this is the
* matching branch for that target. If this is not
* following a target entry, then this is a branch
* where the target is given as an immediate field
* in the instruction (ie. an i or b form branch).
* In this case we need to read the instruction from
* memory to determine the target/to address.
*/
if (val & BHRB_TARGET) {
/* Target branches use two entries
* (ie. computed gotos/XL form)
*/
cpuhw->bhrb_entries[u_index].to = addr;
cpuhw->bhrb_entries[u_index].mispred = pred;
cpuhw->bhrb_entries[u_index].predicted = ~pred;
/* Get from address in next entry */
val = read_bhrb(r_index++);
addr = val & BHRB_EA;
if (val & BHRB_TARGET) {
/* Shouldn't have two targets in a
row.. Reset index and try again */
r_index--;
addr = 0;
}
cpuhw->bhrb_entries[u_index].from = addr;
} else {
/* Branches to immediate field
(ie I or B form) */
cpuhw->bhrb_entries[u_index].from = addr;
cpuhw->bhrb_entries[u_index].to =
power_pmu_bhrb_to(addr);
cpuhw->bhrb_entries[u_index].mispred = pred;
cpuhw->bhrb_entries[u_index].predicted = ~pred;
}
u_index++;
}
}
cpuhw->bhrb_stack.nr = u_index;
return;
}
#endif /* CONFIG_PPC64 */
static void perf_event_interrupt(struct pt_regs *regs);
......@@ -904,47 +1063,6 @@ static int collect_events(struct perf_event *group, int max_count,
return n;
}
/* Reset all possible BHRB entries */
static void power_pmu_bhrb_reset(void)
{
asm volatile(PPC_CLRBHRB);
}
void power_pmu_bhrb_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
if (!ppmu->bhrb_nr)
return;
/* Clear BHRB if we changed task context to avoid data leaks */
if (event->ctx->task && cpuhw->bhrb_context != event->ctx) {
power_pmu_bhrb_reset();
cpuhw->bhrb_context = event->ctx;
}
cpuhw->bhrb_users++;
}
void power_pmu_bhrb_disable(struct perf_event *event)
{
struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
if (!ppmu->bhrb_nr)
return;
cpuhw->bhrb_users--;
WARN_ON_ONCE(cpuhw->bhrb_users < 0);
if (!cpuhw->disabled && !cpuhw->bhrb_users) {
/* BHRB cannot be turned off when other
* events are active on the PMU.
*/
/* avoid stale pointer */
cpuhw->bhrb_context = NULL;
}
}
/*
* Add a event to the PMU.
* If all events are not already frozen, then we disable and
......@@ -1180,15 +1298,6 @@ int power_pmu_commit_txn(struct pmu *pmu)
return 0;
}
/* Called from ctxsw to prevent one process's branch entries to
* mingle with the other process's entries during context switch.
*/
void power_pmu_flush_branch_stack(void)
{
if (ppmu->bhrb_nr)
power_pmu_bhrb_reset();
}
/*
* Return 1 if we might be able to put event on a limited PMC,
* or 0 if not.
......@@ -1458,77 +1567,6 @@ struct pmu power_pmu = {
.flush_branch_stack = power_pmu_flush_branch_stack,
};
/* Processing BHRB entries */
void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
{
u64 val;
u64 addr;
int r_index, u_index, target, pred;
r_index = 0;
u_index = 0;
while (r_index < ppmu->bhrb_nr) {
/* Assembly read function */
val = read_bhrb(r_index);
/* Terminal marker: End of valid BHRB entries */
if (val == 0) {
break;
} else {
/* BHRB field break up */
addr = val & BHRB_EA;
pred = val & BHRB_PREDICTION;
target = val & BHRB_TARGET;
/* Probable Missed entry: Not applicable for POWER8 */
if ((addr == 0) && (target == 0) && (pred == 1)) {
r_index++;
continue;
}
/* Real Missed entry: Power8 based missed entry */
if ((addr == 0) && (target == 1) && (pred == 1)) {
r_index++;
continue;
}
/* Reserved condition: Not a valid entry */
if ((addr == 0) && (target == 1) && (pred == 0)) {
r_index++;
continue;
}
/* Is a target address */
if (val & BHRB_TARGET) {
/* First address cannot be a target address */
if (r_index == 0) {
r_index++;
continue;
}
/* Update target address for the previous entry */
cpuhw->bhrb_entries[u_index - 1].to = addr;
cpuhw->bhrb_entries[u_index - 1].mispred = pred;
cpuhw->bhrb_entries[u_index - 1].predicted = ~pred;
/* Dont increment u_index */
r_index++;
} else {
/* Update address, flags for current entry */
cpuhw->bhrb_entries[u_index].from = addr;
cpuhw->bhrb_entries[u_index].mispred = pred;
cpuhw->bhrb_entries[u_index].predicted = ~pred;
/* Successfully popullated one entry */
u_index++;
r_index++;
}
}
}
cpuhw->bhrb_stack.nr = u_index;
return;
}
/*
* A counter has overflowed; update its count and record
* things if requested. Note that interrupts are hard-disabled
......
......@@ -128,7 +128,7 @@ config PPC_RTAS_DAEMON
config RTAS_PROC
bool "Proc interface to RTAS"
depends on PPC_RTAS
depends on PPC_RTAS && PROC_FS
default y
config RTAS_FLASH
......
......@@ -15,6 +15,7 @@
#include <linux/of.h>
#include <linux/of_platform.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <asm/opal.h>
#include <asm/firmware.h>
......@@ -28,6 +29,8 @@ struct opal {
static struct device_node *opal_node;
static DEFINE_SPINLOCK(opal_write_lock);
extern u64 opal_mc_secondary_handler[];
static unsigned int *opal_irqs;
static unsigned int opal_irq_count;
int __init early_init_dt_scan_opal(unsigned long node,
const char *uname, int depth, void *data)
......@@ -53,7 +56,11 @@ int __init early_init_dt_scan_opal(unsigned long node,
opal.entry, entryp, entrysz);
powerpc_firmware_features |= FW_FEATURE_OPAL;
if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) {
if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
powerpc_firmware_features |= FW_FEATURE_OPALv2;
powerpc_firmware_features |= FW_FEATURE_OPALv3;
printk("OPAL V3 detected !\n");
} else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) {
powerpc_firmware_features |= FW_FEATURE_OPALv2;
printk("OPAL V2 detected !\n");
} else {
......@@ -144,6 +151,13 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) {
len = total_len;
rc = opal_console_write(vtermno, &len, data);
/* Closed or other error drop */
if (rc != OPAL_SUCCESS && rc != OPAL_BUSY &&
rc != OPAL_BUSY_EVENT) {
written = total_len;
break;
}
if (rc == OPAL_SUCCESS) {
total_len -= len;
data += len;
......@@ -316,6 +330,8 @@ static int __init opal_init(void)
irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
pr_debug("opal: Found %d interrupts reserved for OPAL\n",
irqs ? (irqlen / 4) : 0);
opal_irq_count = irqlen / 4;
opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL);
for (i = 0; irqs && i < (irqlen / 4); i++, irqs++) {
unsigned int hwirq = be32_to_cpup(irqs);
unsigned int irq = irq_create_mapping(NULL, hwirq);
......@@ -327,7 +343,19 @@ static int __init opal_init(void)
if (rc)
pr_warning("opal: Error %d requesting irq %d"
" (0x%x)\n", rc, irq, hwirq);
opal_irqs[i] = irq;
}
return 0;
}
subsys_initcall(opal_init);
void opal_shutdown(void)
{
unsigned int i;
for (i = 0; i < opal_irq_count; i++) {
if (opal_irqs[i])
free_irq(opal_irqs[i], 0);
opal_irqs[i] = 0;
}
}
......@@ -1048,6 +1048,12 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
}
static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
{
opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET,
OPAL_ASSERT_RESET);
}
void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
{
struct pci_controller *hose;
......@@ -1178,6 +1184,9 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
/* Setup TCEs */
phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
/* Setup shutdown function for kexec */
phb->shutdown = pnv_pci_ioda_shutdown;
/* Setup MSI support */
pnv_pci_init_ioda_msis(phb);
......
......@@ -450,6 +450,18 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
pnv_pci_dma_fallback_setup(hose, pdev);
}
void pnv_pci_shutdown(void)
{
struct pci_controller *hose;
list_for_each_entry(hose, &hose_list, list_node) {
struct pnv_phb *phb = hose->private_data;
if (phb && phb->shutdown)
phb->shutdown(phb);
}
}
/* Fixup wrong class code in p7ioc and p8 root complex */
static void pnv_p7ioc_rc_quirk(struct pci_dev *dev)
{
......
......@@ -86,6 +86,7 @@ struct pnv_phb {
void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
void (*fixup_phb)(struct pci_controller *hose);
u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
void (*shutdown)(struct pnv_phb *phb);
union {
struct {
......@@ -158,4 +159,5 @@ extern void pnv_pci_init_ioda_hub(struct device_node *np);
extern void pnv_pci_init_ioda2_phb(struct device_node *np);
extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
u64 *startp, u64 *endp);
#endif /* __POWERNV_PCI_H */
......@@ -9,8 +9,10 @@ static inline void pnv_smp_init(void) { }
#ifdef CONFIG_PCI
extern void pnv_pci_init(void);
extern void pnv_pci_shutdown(void);
#else
static inline void pnv_pci_init(void) { }
static inline void pnv_pci_shutdown(void) { }
#endif
#endif /* _POWERNV_H */
......@@ -78,7 +78,9 @@ static void pnv_show_cpuinfo(struct seq_file *m)
if (root)
model = of_get_property(root, "model", NULL);
seq_printf(m, "machine\t\t: PowerNV %s\n", model);
if (firmware_has_feature(FW_FEATURE_OPALv2))
if (firmware_has_feature(FW_FEATURE_OPALv3))
seq_printf(m, "firmware\t: OPAL v3\n");
else if (firmware_has_feature(FW_FEATURE_OPALv2))
seq_printf(m, "firmware\t: OPAL v2\n");
else if (firmware_has_feature(FW_FEATURE_OPAL))
seq_printf(m, "firmware\t: OPAL v1\n");
......@@ -126,6 +128,17 @@ static void pnv_progress(char *s, unsigned short hex)
{
}
static void pnv_shutdown(void)
{
/* Let the PCI code clear up IODA tables */
pnv_pci_shutdown();
/* And unregister all OPAL interrupts so they don't fire
* up while we kexec
*/
opal_shutdown();
}
#ifdef CONFIG_KEXEC
static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
{
......@@ -187,6 +200,7 @@ define_machine(powernv) {
.init_IRQ = pnv_init_IRQ,
.show_cpuinfo = pnv_show_cpuinfo,
.progress = pnv_progress,
.machine_shutdown = pnv_shutdown,
.power_save = power7_idle,
.calibrate_decr = generic_calibrate_decr,
#ifdef CONFIG_KEXEC
......
......@@ -71,18 +71,68 @@ int pnv_smp_kick_cpu(int nr)
BUG_ON(nr < 0 || nr >= NR_CPUS);
/* On OPAL v2 the CPU are still spinning inside OPAL itself,
* get them back now
/*
* If we already started or OPALv2 is not supported, we just
* kick the CPU via the PACA
*/
if (!paca[nr].cpu_start && firmware_has_feature(FW_FEATURE_OPALv2)) {
pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n", nr, pcpu);
rc = opal_start_cpu(pcpu, start_here);
if (paca[nr].cpu_start || !firmware_has_feature(FW_FEATURE_OPALv2))
goto kick;
/*
* At this point, the CPU can either be spinning on the way in
* from kexec or be inside OPAL waiting to be started for the
* first time. OPAL v3 allows us to query OPAL to know if it
* has the CPUs, so we do that
*/
if (firmware_has_feature(FW_FEATURE_OPALv3)) {
uint8_t status;
rc = opal_query_cpu_status(pcpu, &status);
if (rc != OPAL_SUCCESS) {
pr_warn("OPAL Error %ld starting CPU %d\n",
pr_warn("OPAL Error %ld querying CPU %d state\n",
rc, nr);
return -ENODEV;
}
/*
* Already started, just kick it, probably coming from
* kexec and spinning
*/
if (status == OPAL_THREAD_STARTED)
goto kick;
/*
* Available/inactive, let's kick it
*/
if (status == OPAL_THREAD_INACTIVE) {
pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n",
nr, pcpu);
rc = opal_start_cpu(pcpu, start_here);
if (rc != OPAL_SUCCESS) {
pr_warn("OPAL Error %ld starting CPU %d\n",
rc, nr);
return -ENODEV;
}
} else {
/*
* An unavailable CPU (or any other unknown status)
* shouldn't be started. It should also
* not be in the possible map but currently it can
* happen
*/
pr_devel("OPAL: CPU %d (HW 0x%x) is unavailable"
" (status %d)...\n", nr, pcpu, status);
return -ENODEV;
}
} else {
/*
* On OPAL v2, we just kick it and hope for the best,
* we must not test the error from opal_start_cpu() or
* we would fail to get CPUs from kexec.
*/
opal_start_cpu(pcpu, start_here);
}
kick:
return smp_generic_kick_cpu(nr);
}
......
......@@ -18,6 +18,7 @@ config PPC_PSERIES
select PPC_PCI_CHOICE if EXPERT
select ZLIB_DEFLATE
select PPC_DOORBELL
select HAVE_CONTEXT_TRACKING
default y
config PPC_SPLPAR
......
......@@ -16,6 +16,7 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/cpu.h>
#include <linux/delay.h>
#include <linux/suspend.h>
#include <linux/stat.h>
......@@ -126,11 +127,15 @@ static ssize_t store_hibernate(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
cpumask_var_t offline_mask;
int rc;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (!alloc_cpumask_var(&offline_mask, GFP_TEMPORARY))
return -ENOMEM;
stream_id = simple_strtoul(buf, NULL, 16);
do {
......@@ -140,15 +145,32 @@ static ssize_t store_hibernate(struct device *dev,
} while (rc == -EAGAIN);
if (!rc) {
/* All present CPUs must be online */
cpumask_andnot(offline_mask, cpu_present_mask,
cpu_online_mask);
rc = rtas_online_cpus_mask(offline_mask);
if (rc) {
pr_err("%s: Could not bring present CPUs online.\n",
__func__);
goto out;
}
stop_topology_update();
rc = pm_suspend(PM_SUSPEND_MEM);
start_topology_update();
/* Take down CPUs not online prior to suspend */
if (!rtas_offline_cpus_mask(offline_mask))
pr_warn("%s: Could not restore CPUs to offline "
"state.\n", __func__);
}
stream_id = 0;
if (!rc)
rc = count;
out:
free_cpumask_var(offline_mask);
return rc;
}
......
......@@ -361,7 +361,7 @@ static int wsp_chip_set_affinity(struct irq_data *d,
xive = xive_set_server(xive, get_irq_server(ics, hw_irq));
wsp_ics_set_xive(ics, hw_irq, xive);
return 0;
return IRQ_SET_MASK_OK;
}
static struct irq_chip wsp_irq_chip = {
......
......@@ -64,6 +64,8 @@ endif
obj-$(CONFIG_PPC_SCOM) += scom.o
obj-$(CONFIG_PPC_EARLY_DEBUG_MEMCONS) += udbg_memcons.o
subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
obj-$(CONFIG_PPC_XICS) += xics/
......
......@@ -81,7 +81,7 @@ int ehv_pic_set_affinity(struct irq_data *d, const struct cpumask *dest,
ev_int_set_config(src, config, prio, cpuid);
spin_unlock_irqrestore(&ehv_pic_lock, flags);
return 0;
return IRQ_SET_MASK_OK;
}
static unsigned int ehv_pic_type_to_vecpri(unsigned int type)
......
......@@ -836,7 +836,7 @@ int mpic_set_affinity(struct irq_data *d, const struct cpumask *cpumask,
mpic_physmask(mask));
}
return 0;
return IRQ_SET_MASK_OK;
}
static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type)
......
/*
* A udbg backend which logs messages and reads input from in memory
* buffers.
*
* The console output can be read from memcons_output which is a
* circular buffer whose next write position is stored in memcons.output_pos.
*
* Input may be passed by writing into the memcons_input buffer when it is
* empty. The input buffer is empty when both input_pos == input_start and
* *input_start == '\0'.
*
* Copyright (C) 2003-2005 Anton Blanchard and Milton Miller, IBM Corp
* Copyright (C) 2013 Alistair Popple, IBM Corp
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/init.h>
#include <linux/kernel.h>
#include <asm/barrier.h>
#include <asm/page.h>
#include <asm/processor.h>
#include <asm/udbg.h>
struct memcons {
char *output_start;
char *output_pos;
char *output_end;
char *input_start;
char *input_pos;
char *input_end;
};
static char memcons_output[CONFIG_PPC_MEMCONS_OUTPUT_SIZE];
static char memcons_input[CONFIG_PPC_MEMCONS_INPUT_SIZE];
struct memcons memcons = {
.output_start = memcons_output,
.output_pos = memcons_output,
.output_end = &memcons_output[CONFIG_PPC_MEMCONS_OUTPUT_SIZE],
.input_start = memcons_input,
.input_pos = memcons_input,
.input_end = &memcons_input[CONFIG_PPC_MEMCONS_INPUT_SIZE],
};
void memcons_putc(char c)
{
char *new_output_pos;
*memcons.output_pos = c;
wmb();
new_output_pos = memcons.output_pos + 1;
if (new_output_pos >= memcons.output_end)
new_output_pos = memcons.output_start;
memcons.output_pos = new_output_pos;
}
int memcons_getc_poll(void)
{
char c;
char *new_input_pos;
if (*memcons.input_pos) {
c = *memcons.input_pos;
new_input_pos = memcons.input_pos + 1;
if (new_input_pos >= memcons.input_end)
new_input_pos = memcons.input_start;
else if (*new_input_pos == '\0')
new_input_pos = memcons.input_start;
*memcons.input_pos = '\0';
wmb();
memcons.input_pos = new_input_pos;
return c;
}
return -1;
}
int memcons_getc(void)
{
int c;
while (1) {
c = memcons_getc_poll();
if (c == -1)
cpu_relax();
else
break;
}
return c;
}
void udbg_init_memcons(void)
{
udbg_putc = memcons_putc;
udbg_getc = memcons_getc;
udbg_getc_poll = memcons_getc_poll;
}
......@@ -148,7 +148,7 @@ static int ics_opal_set_affinity(struct irq_data *d,
__func__, d->irq, hw_irq, server, rc);
return -1;
}
return 0;
return IRQ_SET_MASK_OK;
}
static struct irq_chip ics_opal_irq_chip = {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册