提交 94a6df25 编写于 作者: L Linus Torvalds

Merge tag 'powerpc-4.12-7' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:
 "Some more powerpc fixes for 4.12. Most of these actually came in last
  week but got held up for some more testing.

   - three fixes for kprobes/ftrace/livepatch interactions.

   - properly handle data breakpoints when using the Radix MMU.

   - fix for perf sampling of registers during call_usermodehelper().

   - properly initialise the thread_info on our emergency stacks

   - add an explicit flush when doing TLB invalidations for a process
     using NPU2.

  Thanks to: Alistair Popple, Naveen N. Rao, Nicholas Piggin, Ravi
  Bangoria, Masami Hiramatsu"

* tag 'powerpc-4.12-7' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  powerpc/64: Initialise thread_info for emergency stacks
  powerpc/powernv/npu-dma: Add explicit flush when sending an ATSD
  powerpc/perf: Fix oops when kthread execs user process
  powerpc/64s: Handle data breakpoints in Radix mode
  powerpc/kprobes: Skip livepatch_handler() for jprobes
  powerpc/ftrace: Pass the correct stack pointer for DYNAMIC_FTRACE_WITH_REGS
  powerpc/kprobes: Pause function_graph tracing during jprobes handling
......@@ -103,6 +103,7 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
extern int kprobe_handler(struct pt_regs *regs);
extern int kprobe_post_handler(struct pt_regs *regs);
extern int is_current_kprobe_addr(unsigned long addr);
#ifdef CONFIG_KPROBES_ON_FTRACE
extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb);
......
......@@ -1411,10 +1411,8 @@ USE_TEXT_SECTION()
.balign IFETCH_ALIGN_BYTES
do_hash_page:
#ifdef CONFIG_PPC_STD_MMU_64
andis. r0,r4,0xa410 /* weird error? */
andis. r0,r4,0xa450 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
andis. r0,r4,DSISR_DABRMATCH@h
bne- handle_dabr_fault
CURRENT_THREAD_INFO(r11, r1)
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
......@@ -1438,11 +1436,16 @@ do_hash_page:
/* Error */
blt- 13f
/* Reload DSISR into r4 for the DABR check below */
ld r4,_DSISR(r1)
#endif /* CONFIG_PPC_STD_MMU_64 */
/* Here we have a page fault that hash_page can't handle. */
handle_page_fault:
11: ld r4,_DAR(r1)
11: andis. r0,r4,DSISR_DABRMATCH@h
bne- handle_dabr_fault
ld r4,_DAR(r1)
ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_page_fault
......
......@@ -43,6 +43,12 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
int is_current_kprobe_addr(unsigned long addr)
{
struct kprobe *p = kprobe_running();
return (p && (unsigned long)p->addr == addr) ? 1 : 0;
}
bool arch_within_kprobe_blacklist(unsigned long addr)
{
return (addr >= (unsigned long)__kprobes_text_start &&
......@@ -617,6 +623,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
#endif
/*
* jprobes use jprobe_return() which skips the normal return
* path of the function, and this messes up the accounting of the
* function graph tracer.
*
* Pause function graph tracing while performing the jprobe function.
*/
pause_graph_tracing();
return 1;
}
NOKPROBE_SYMBOL(setjmp_pre_handler);
......@@ -642,6 +657,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
* saved regs...
*/
memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
/* It's OK to start function graph tracing again */
unpause_graph_tracing();
preempt_enable_no_resched();
return 1;
}
......
......@@ -615,6 +615,24 @@ void __init exc_lvl_early_init(void)
}
#endif
/*
* Emergency stacks are used for a range of things, from asynchronous
* NMIs (system reset, machine check) to synchronous, process context.
* We set preempt_count to zero, even though that isn't necessarily correct. To
* get the right value we'd need to copy it from the previous thread_info, but
* doing that might fault causing more problems.
* TODO: what to do with accounting?
*/
static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
{
ti->task = NULL;
ti->cpu = cpu;
ti->preempt_count = 0;
ti->local_flags = 0;
ti->flags = 0;
klp_init_thread_info(ti);
}
/*
* Stack space used when we detect a bad kernel stack pointer, and
* early in SMP boots before relocation is enabled. Exclusive emergency
......@@ -633,24 +651,31 @@ void __init emergency_stack_init(void)
* Since we use these as temporary stacks during secondary CPU
* bringup, we need to get at them in real mode. This means they
* must also be within the RMO region.
*
* The IRQ stacks allocated elsewhere in this file are zeroed and
* initialized in kernel/irq.c. These are initialized here in order
* to have emergency stacks available as early as possible.
*/
limit = min(safe_stack_limit(), ppc64_rma_size);
for_each_possible_cpu(i) {
struct thread_info *ti;
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
klp_init_thread_info(ti);
memset(ti, 0, THREAD_SIZE);
emerg_stack_init_thread_info(ti, i);
paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
#ifdef CONFIG_PPC_BOOK3S_64
/* emergency stack for NMI exception handling. */
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
klp_init_thread_info(ti);
memset(ti, 0, THREAD_SIZE);
emerg_stack_init_thread_info(ti, i);
paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
/* emergency stack for machine check exception handling. */
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
klp_init_thread_info(ti);
memset(ti, 0, THREAD_SIZE);
emerg_stack_init_thread_info(ti, i);
paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
#endif
}
......
......@@ -45,10 +45,14 @@ _GLOBAL(ftrace_caller)
stdu r1,-SWITCH_FRAME_SIZE(r1)
/* Save all gprs to pt_regs */
SAVE_8GPRS(0,r1)
SAVE_8GPRS(8,r1)
SAVE_8GPRS(16,r1)
SAVE_8GPRS(24,r1)
SAVE_GPR(0, r1)
SAVE_10GPRS(2, r1)
SAVE_10GPRS(12, r1)
SAVE_10GPRS(22, r1)
/* Save previous stack pointer (r1) */
addi r8, r1, SWITCH_FRAME_SIZE
std r8, GPR1(r1)
/* Load special regs for save below */
mfmsr r8
......@@ -95,18 +99,44 @@ ftrace_call:
bl ftrace_stub
nop
/* Load ctr with the possibly modified NIP */
ld r3, _NIP(r1)
mtctr r3
/* Load the possibly modified NIP */
ld r15, _NIP(r1)
#ifdef CONFIG_LIVEPATCH
cmpd r14,r3 /* has NIP been altered? */
cmpd r14, r15 /* has NIP been altered? */
#endif
#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE)
/* NIP has not been altered, skip over further checks */
beq 1f
/* Check if there is an active kprobe on us */
subi r3, r14, 4
bl is_current_kprobe_addr
nop
/*
* If r3 == 1, then this is a kprobe/jprobe.
* else, this is livepatched function.
*
* The conditional branch for livepatch_handler below will use the
* result of this comparison. For kprobe/jprobe, we just need to branch to
* the new NIP, not call livepatch_handler. The branch below is bne, so we
* want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want
* CR0[EQ] = (r3 == 1).
*/
cmpdi r3, 1
1:
#endif
/* Load CTR with the possibly modified NIP */
mtctr r15
/* Restore gprs */
REST_8GPRS(0,r1)
REST_8GPRS(8,r1)
REST_8GPRS(16,r1)
REST_8GPRS(24,r1)
REST_GPR(0,r1)
REST_10GPRS(2,r1)
REST_10GPRS(12,r1)
REST_10GPRS(22,r1)
/* Restore possibly modified LR */
ld r0, _LINK(r1)
......@@ -119,7 +149,10 @@ ftrace_call:
addi r1, r1, SWITCH_FRAME_SIZE
#ifdef CONFIG_LIVEPATCH
/* Based on the cmpd above, if the NIP was altered handle livepatch */
/*
* Based on the cmpd or cmpdi above, if the NIP was altered and we're
* not on a kprobe/jprobe, then handle livepatch.
*/
bne- livepatch_handler
#endif
......
......@@ -101,5 +101,6 @@ void perf_get_regs_user(struct perf_regs *regs_user,
struct pt_regs *regs_user_copy)
{
regs_user->regs = task_pt_regs(current);
regs_user->abi = perf_reg_abi(current);
regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) :
PERF_SAMPLE_REGS_ABI_NONE;
}
......@@ -449,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
return mmio_atsd_reg;
}
static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
{
unsigned long launch;
......@@ -465,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
/* PID */
launch |= pid << PPC_BITLSHIFT(38);
/* No flush */
launch |= !flush << PPC_BITLSHIFT(39);
/* Invalidating the entire process doesn't use a va */
return mmio_launch_invalidate(npu, launch, 0);
}
static int mmio_invalidate_va(struct npu *npu, unsigned long va,
unsigned long pid)
unsigned long pid, bool flush)
{
unsigned long launch;
......@@ -486,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
/* PID */
launch |= pid << PPC_BITLSHIFT(38);
/* No flush */
launch |= !flush << PPC_BITLSHIFT(39);
return mmio_launch_invalidate(npu, launch, va);
}
#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
struct mmio_atsd_reg {
struct npu *npu;
int reg;
};
static void mmio_invalidate_wait(
struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
{
struct npu *npu;
int i, reg;
/* Wait for all invalidations to complete */
for (i = 0; i <= max_npu2_index; i++) {
if (mmio_atsd_reg[i].reg < 0)
continue;
/* Wait for completion */
npu = mmio_atsd_reg[i].npu;
reg = mmio_atsd_reg[i].reg;
while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
cpu_relax();
put_mmio_atsd_reg(npu, reg);
/*
* The GPU requires two flush ATSDs to ensure all entries have
* been flushed. We use PID 0 as it will never be used for a
* process on the GPU.
*/
if (flush)
mmio_invalidate_pid(npu, 0, true);
}
}
/*
* Invalidate either a single address or an entire PID depending on
* the value of va.
*/
static void mmio_invalidate(struct npu_context *npu_context, int va,
unsigned long address)
unsigned long address, bool flush)
{
int i, j, reg;
int i, j;
struct npu *npu;
struct pnv_phb *nphb;
struct pci_dev *npdev;
struct {
struct npu *npu;
int reg;
} mmio_atsd_reg[NV_MAX_NPUS];
struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
unsigned long pid = npu_context->mm->context.id;
/*
......@@ -525,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
if (va)
mmio_atsd_reg[i].reg =
mmio_invalidate_va(npu, address, pid);
mmio_invalidate_va(npu, address, pid,
flush);
else
mmio_atsd_reg[i].reg =
mmio_invalidate_pid(npu, pid);
mmio_invalidate_pid(npu, pid, flush);
/*
* The NPU hardware forwards the shootdown to all GPUs
......@@ -544,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
*/
flush_tlb_mm(npu_context->mm);
/* Wait for all invalidations to complete */
for (i = 0; i <= max_npu2_index; i++) {
if (mmio_atsd_reg[i].reg < 0)
continue;
/* Wait for completion */
npu = mmio_atsd_reg[i].npu;
reg = mmio_atsd_reg[i].reg;
while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
cpu_relax();
put_mmio_atsd_reg(npu, reg);
}
mmio_invalidate_wait(mmio_atsd_reg, flush);
if (flush)
/* Wait for the flush to complete */
mmio_invalidate_wait(mmio_atsd_reg, false);
}
static void pnv_npu2_mn_release(struct mmu_notifier *mn,
......@@ -571,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
* There should be no more translation requests for this PID, but we
* need to ensure any entries for it are removed from the TLB.
*/
mmio_invalidate(npu_context, 0, 0);
mmio_invalidate(npu_context, 0, 0, true);
}
static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
......@@ -581,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
{
struct npu_context *npu_context = mn_to_npu_context(mn);
mmio_invalidate(npu_context, 1, address);
mmio_invalidate(npu_context, 1, address, true);
}
static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
......@@ -590,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
{
struct npu_context *npu_context = mn_to_npu_context(mn);
mmio_invalidate(npu_context, 1, address);
mmio_invalidate(npu_context, 1, address, true);
}
static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
......@@ -600,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
struct npu_context *npu_context = mn_to_npu_context(mn);
unsigned long address;
for (address = start; address <= end; address += PAGE_SIZE)
mmio_invalidate(npu_context, 1, address);
for (address = start; address < end; address += PAGE_SIZE)
mmio_invalidate(npu_context, 1, address, false);
/* Do the flush only on the final addess == end */
mmio_invalidate(npu_context, 1, address, true);
}
static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
......@@ -651,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
/* No nvlink associated with this GPU device */
return ERR_PTR(-ENODEV);
if (!mm) {
/* kernel thread contexts are not supported */
if (!mm || mm->context.id == 0) {
/*
* Kernel thread contexts are not supported and context id 0 is
* reserved on the GPU.
*/
return ERR_PTR(-EINVAL);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册