提交 deed9deb 编写于 作者: L Linus Torvalds

Merge tag 'powerpc-4.13-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:
 "Nothing that really stands out, just a bunch of fixes that have come
  in in the last couple of weeks.

  None of these are actually fixes for code that is new in 4.13. It's
  roughly half older bugs, with fixes going to stable, and half
  fixes/updates for Power9.

  Thanks to: Aneesh Kumar K.V, Anton Blanchard, Balbir Singh, Benjamin
  Herrenschmidt, Madhavan Srinivasan, Michael Neuling, Nicholas Piggin,
  Oliver O'Halloran"

* tag 'powerpc-4.13-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  powerpc/64: Fix atomic64_inc_not_zero() to return an int
  powerpc: Fix emulation of mfocrf in emulate_step()
  powerpc: Fix emulation of mcrf in emulate_step()
  powerpc/perf: Add POWER9 alternate PM_RUN_CYC and PM_RUN_INST_CMPL events
  powerpc/perf: Fix SDAR_MODE value for continous sampling on Power9
  powerpc/asm: Mark cr0 as clobbered in mftb()
  powerpc/powernv: Fix local TLB flush for boot and MCE on POWER9
  powerpc/mm/radix: Synchronize updates to the process table
  powerpc/mm/radix: Properly clear process table entry
  powerpc/powernv: Tell OPAL about our MMU mode on POWER9
  powerpc/kexec: Fix radix to hash kexec due to IAMR/AMOR
...@@ -560,7 +560,7 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) ...@@ -560,7 +560,7 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
* Atomically increments @v by 1, so long as @v is non-zero. * Atomically increments @v by 1, so long as @v is non-zero.
* Returns non-zero if @v was non-zero, and zero otherwise. * Returns non-zero if @v was non-zero, and zero otherwise.
*/ */
static __inline__ long atomic64_inc_not_zero(atomic64_t *v) static __inline__ int atomic64_inc_not_zero(atomic64_t *v)
{ {
long t1, t2; long t1, t2;
...@@ -579,7 +579,7 @@ static __inline__ long atomic64_inc_not_zero(atomic64_t *v) ...@@ -579,7 +579,7 @@ static __inline__ long atomic64_inc_not_zero(atomic64_t *v)
: "r" (&v->counter) : "r" (&v->counter)
: "cc", "xer", "memory"); : "cc", "xer", "memory");
return t1; return t1 != 0;
} }
#endif /* __powerpc64__ */ #endif /* __powerpc64__ */
......
...@@ -876,6 +876,15 @@ struct OpalIoPhb4ErrorData { ...@@ -876,6 +876,15 @@ struct OpalIoPhb4ErrorData {
enum { enum {
OPAL_REINIT_CPUS_HILE_BE = (1 << 0), OPAL_REINIT_CPUS_HILE_BE = (1 << 0),
OPAL_REINIT_CPUS_HILE_LE = (1 << 1), OPAL_REINIT_CPUS_HILE_LE = (1 << 1),
/* These two define the base MMU mode of the host on P9
*
* On P9 Nimbus DD2.0 and Cumlus (and later), KVM can still
* create hash guests in "radix" mode with care (full core
* switch only).
*/
OPAL_REINIT_CPUS_MMU_HASH = (1 << 2),
OPAL_REINIT_CPUS_MMU_RADIX = (1 << 3),
}; };
typedef struct oppanel_line { typedef struct oppanel_line {
......
...@@ -1303,7 +1303,7 @@ static inline void msr_check_and_clear(unsigned long bits) ...@@ -1303,7 +1303,7 @@ static inline void msr_check_and_clear(unsigned long bits)
" .llong 0\n" \ " .llong 0\n" \
".previous" \ ".previous" \
: "=r" (rval) \ : "=r" (rval) \
: "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL)); \ : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \
rval;}) rval;})
#else #else
#define mftb() ({unsigned long rval; \ #define mftb() ({unsigned long rval; \
......
...@@ -218,13 +218,20 @@ __init_tlb_power8: ...@@ -218,13 +218,20 @@ __init_tlb_power8:
ptesync ptesync
1: blr 1: blr
/*
* Flush the TLB in hash mode. Hash must flush with RIC=2 once for process
* and one for partition scope to clear process and partition table entries.
*/
__init_tlb_power9: __init_tlb_power9:
li r6,POWER9_TLB_SETS_HASH li r6,POWER9_TLB_SETS_HASH - 1
mtctr r6 mtctr r6
li r7,0xc00 /* IS field = 0b11 */ li r7,0xc00 /* IS field = 0b11 */
li r8,0
ptesync ptesync
2: tlbiel r7 PPC_TLBIEL(7, 8, 2, 1, 0)
addi r7,r7,0x1000 PPC_TLBIEL(7, 8, 2, 0, 0)
2: addi r7,r7,0x1000
PPC_TLBIEL(7, 8, 0, 0, 0)
bdnz 2b bdnz 2b
ptesync ptesync
1: blr 1: blr
......
...@@ -94,9 +94,6 @@ static void (*init_pmu_registers)(void); ...@@ -94,9 +94,6 @@ static void (*init_pmu_registers)(void);
static void cpufeatures_flush_tlb(void) static void cpufeatures_flush_tlb(void)
{ {
unsigned long rb;
unsigned int i, num_sets;
/* /*
* This is a temporary measure to keep equivalent TLB flush as the * This is a temporary measure to keep equivalent TLB flush as the
* cputable based setup code. * cputable based setup code.
...@@ -105,24 +102,15 @@ static void cpufeatures_flush_tlb(void) ...@@ -105,24 +102,15 @@ static void cpufeatures_flush_tlb(void)
case PVR_POWER8: case PVR_POWER8:
case PVR_POWER8E: case PVR_POWER8E:
case PVR_POWER8NVL: case PVR_POWER8NVL:
num_sets = POWER8_TLB_SETS; __flush_tlb_power8(POWER8_TLB_SETS);
break; break;
case PVR_POWER9: case PVR_POWER9:
num_sets = POWER9_TLB_SETS_HASH; __flush_tlb_power9(POWER9_TLB_SETS_HASH);
break; break;
default: default:
num_sets = 1;
pr_err("unknown CPU version for boot TLB flush\n"); pr_err("unknown CPU version for boot TLB flush\n");
break; break;
} }
asm volatile("ptesync" : : : "memory");
rb = TLBIEL_INVAL_SET;
for (i = 0; i < num_sets; i++) {
asm volatile("tlbiel %0" : : "r" (rb));
rb += 1 << TLBIEL_INVAL_SET_SHIFT;
}
asm volatile("ptesync" : : : "memory");
} }
static void __restore_cpu_cpufeatures(void) static void __restore_cpu_cpufeatures(void)
......
...@@ -53,6 +53,60 @@ static void flush_tlb_206(unsigned int num_sets, unsigned int action) ...@@ -53,6 +53,60 @@ static void flush_tlb_206(unsigned int num_sets, unsigned int action)
asm volatile("ptesync" : : : "memory"); asm volatile("ptesync" : : : "memory");
} }
static void flush_tlb_300(unsigned int num_sets, unsigned int action)
{
unsigned long rb;
unsigned int i;
unsigned int r;
switch (action) {
case TLB_INVAL_SCOPE_GLOBAL:
rb = TLBIEL_INVAL_SET;
break;
case TLB_INVAL_SCOPE_LPID:
rb = TLBIEL_INVAL_SET_LPID;
break;
default:
BUG();
break;
}
asm volatile("ptesync" : : : "memory");
if (early_radix_enabled())
r = 1;
else
r = 0;
/*
* First flush table/PWC caches with set 0, then flush the
* rest of the sets, partition scope. Radix must then do it
* all again with process scope. Hash just has to flush
* process table.
*/
asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
"r"(rb), "r"(0), "i"(2), "i"(0), "r"(r));
for (i = 1; i < num_sets; i++) {
unsigned long set = i * (1<<TLBIEL_INVAL_SET_SHIFT);
asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
"r"(rb+set), "r"(0), "i"(2), "i"(0), "r"(r));
}
asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
"r"(rb), "r"(0), "i"(2), "i"(1), "r"(r));
if (early_radix_enabled()) {
for (i = 1; i < num_sets; i++) {
unsigned long set = i * (1<<TLBIEL_INVAL_SET_SHIFT);
asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
"r"(rb+set), "r"(0), "i"(2), "i"(1), "r"(r));
}
}
asm volatile("ptesync" : : : "memory");
}
/* /*
* Generic routines to flush TLB on POWER processors. These routines * Generic routines to flush TLB on POWER processors. These routines
* are used as flush_tlb hook in the cpu_spec. * are used as flush_tlb hook in the cpu_spec.
...@@ -79,7 +133,7 @@ void __flush_tlb_power9(unsigned int action) ...@@ -79,7 +133,7 @@ void __flush_tlb_power9(unsigned int action)
else else
num_sets = POWER9_TLB_SETS_HASH; num_sets = POWER9_TLB_SETS_HASH;
flush_tlb_206(num_sets, action); flush_tlb_300(num_sets, action);
} }
......
...@@ -614,6 +614,18 @@ _GLOBAL(kexec_sequence) ...@@ -614,6 +614,18 @@ _GLOBAL(kexec_sequence)
li r0,0 li r0,0
std r0,16(r1) std r0,16(r1)
BEGIN_FTR_SECTION
/*
* This is the best time to turn AMR/IAMR off.
* key 0 is used in radix for supervisor<->user
* protection, but on hash key 0 is reserved
* ideally we want to enter with a clean state.
* NOTE, we rely on r0 being 0 from above.
*/
mtspr SPRN_IAMR,r0
mtspr SPRN_AMOR,r0
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
/* save regs for local vars on new stack. /* save regs for local vars on new stack.
* yes, we won't go back, but ... * yes, we won't go back, but ...
*/ */
......
...@@ -683,8 +683,10 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, ...@@ -683,8 +683,10 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
case 19: case 19:
switch ((instr >> 1) & 0x3ff) { switch ((instr >> 1) & 0x3ff) {
case 0: /* mcrf */ case 0: /* mcrf */
rd = (instr >> 21) & 0x1c; rd = 7 - ((instr >> 23) & 0x7);
ra = (instr >> 16) & 0x1c; ra = 7 - ((instr >> 18) & 0x7);
rd *= 4;
ra *= 4;
val = (regs->ccr >> ra) & 0xf; val = (regs->ccr >> ra) & 0xf;
regs->ccr = (regs->ccr & ~(0xfUL << rd)) | (val << rd); regs->ccr = (regs->ccr & ~(0xfUL << rd)) | (val << rd);
goto instr_done; goto instr_done;
...@@ -964,6 +966,19 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, ...@@ -964,6 +966,19 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
#endif #endif
case 19: /* mfcr */ case 19: /* mfcr */
if ((instr >> 20) & 1) {
imm = 0xf0000000UL;
for (sh = 0; sh < 8; ++sh) {
if (instr & (0x80000 >> sh)) {
regs->gpr[rd] = regs->ccr & imm;
break;
}
imm >>= 4;
}
goto instr_done;
}
regs->gpr[rd] = regs->ccr; regs->gpr[rd] = regs->ccr;
regs->gpr[rd] &= 0xffffffffUL; regs->gpr[rd] &= 0xffffffffUL;
goto instr_done; goto instr_done;
......
...@@ -138,6 +138,14 @@ static int radix__init_new_context(struct mm_struct *mm) ...@@ -138,6 +138,14 @@ static int radix__init_new_context(struct mm_struct *mm)
rts_field = radix__get_tree_size(); rts_field = radix__get_tree_size();
process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE); process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE);
/*
* Order the above store with subsequent update of the PID
* register (at which point HW can start loading/caching
* the entry) and the corresponding load by the MMU from
* the L2 cache.
*/
asm volatile("ptesync;isync" : : : "memory");
mm->context.npu_context = NULL; mm->context.npu_context = NULL;
return index; return index;
...@@ -223,9 +231,15 @@ void destroy_context(struct mm_struct *mm) ...@@ -223,9 +231,15 @@ void destroy_context(struct mm_struct *mm)
mm->context.cop_lockp = NULL; mm->context.cop_lockp = NULL;
#endif /* CONFIG_PPC_ICSWX */ #endif /* CONFIG_PPC_ICSWX */
if (radix_enabled()) if (radix_enabled()) {
process_tb[mm->context.id].prtb1 = 0; /*
else * Radix doesn't have a valid bit in the process table
* entries. However we know that at least P9 implementation
* will avoid caching an entry with an invalid RTS field,
* and 0 is invalid. So this will do.
*/
process_tb[mm->context.id].prtb0 = 0;
} else
subpage_prot_free(mm); subpage_prot_free(mm);
destroy_pagetable_page(mm); destroy_pagetable_page(mm);
__destroy_context(mm->context.id); __destroy_context(mm->context.id);
......
...@@ -90,13 +90,15 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) ...@@ -90,13 +90,15 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
* MMCRA[SDAR_MODE] will be set to 0b01 * MMCRA[SDAR_MODE] will be set to 0b01
* For rest * For rest
* MMCRA[SDAR_MODE] will be set from event code. * MMCRA[SDAR_MODE] will be set from event code.
* If sdar_mode from event is zero, default to 0b01. Hardware
* requires that we set a non-zero value.
*/ */
if (cpu_has_feature(CPU_FTR_ARCH_300)) { if (cpu_has_feature(CPU_FTR_ARCH_300)) {
if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE)) if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE))
*mmcra &= MMCRA_SDAR_MODE_NO_UPDATES; *mmcra &= MMCRA_SDAR_MODE_NO_UPDATES;
else if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) else if (!cpu_has_feature(CPU_FTR_POWER9_DD1) && p9_SDAR_MODE(event))
*mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT; *mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
else if (cpu_has_feature(CPU_FTR_POWER9_DD1)) else
*mmcra |= MMCRA_SDAR_MODE_TLB; *mmcra |= MMCRA_SDAR_MODE_TLB;
} else } else
*mmcra |= MMCRA_SDAR_MODE_TLB; *mmcra |= MMCRA_SDAR_MODE_TLB;
......
...@@ -51,8 +51,12 @@ EVENT(PM_DTLB_MISS, 0x300fc) ...@@ -51,8 +51,12 @@ EVENT(PM_DTLB_MISS, 0x300fc)
EVENT(PM_ITLB_MISS, 0x400fc) EVENT(PM_ITLB_MISS, 0x400fc)
/* Run_Instructions */ /* Run_Instructions */
EVENT(PM_RUN_INST_CMPL, 0x500fa) EVENT(PM_RUN_INST_CMPL, 0x500fa)
/* Alternate event code for PM_RUN_INST_CMPL */
EVENT(PM_RUN_INST_CMPL_ALT, 0x400fa)
/* Run_cycles */ /* Run_cycles */
EVENT(PM_RUN_CYC, 0x600f4) EVENT(PM_RUN_CYC, 0x600f4)
/* Alternate event code for Run_cycles */
EVENT(PM_RUN_CYC_ALT, 0x200f4)
/* Instruction Dispatched */ /* Instruction Dispatched */
EVENT(PM_INST_DISP, 0x200f2) EVENT(PM_INST_DISP, 0x200f2)
EVENT(PM_INST_DISP_ALT, 0x300f2) EVENT(PM_INST_DISP_ALT, 0x300f2)
......
...@@ -107,6 +107,8 @@ extern struct attribute_group isa207_pmu_format_group; ...@@ -107,6 +107,8 @@ extern struct attribute_group isa207_pmu_format_group;
/* Table of alternatives, sorted by column 0 */ /* Table of alternatives, sorted by column 0 */
static const unsigned int power9_event_alternatives[][MAX_ALT] = { static const unsigned int power9_event_alternatives[][MAX_ALT] = {
{ PM_INST_DISP, PM_INST_DISP_ALT }, { PM_INST_DISP, PM_INST_DISP_ALT },
{ PM_RUN_CYC_ALT, PM_RUN_CYC },
{ PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL },
}; };
static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[]) static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[])
......
...@@ -59,6 +59,8 @@ static struct task_struct *kopald_tsk; ...@@ -59,6 +59,8 @@ static struct task_struct *kopald_tsk;
void opal_configure_cores(void) void opal_configure_cores(void)
{ {
u64 reinit_flags = 0;
/* Do the actual re-init, This will clobber all FPRs, VRs, etc... /* Do the actual re-init, This will clobber all FPRs, VRs, etc...
* *
* It will preserve non volatile GPRs and HSPRG0/1. It will * It will preserve non volatile GPRs and HSPRG0/1. It will
...@@ -66,11 +68,24 @@ void opal_configure_cores(void) ...@@ -66,11 +68,24 @@ void opal_configure_cores(void)
* but it might clobber a bunch. * but it might clobber a bunch.
*/ */
#ifdef __BIG_ENDIAN__ #ifdef __BIG_ENDIAN__
opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE); reinit_flags |= OPAL_REINIT_CPUS_HILE_BE;
#else #else
opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_LE); reinit_flags |= OPAL_REINIT_CPUS_HILE_LE;
#endif #endif
/*
* POWER9 always support running hash:
* ie. Host hash supports hash guests
* Host radix supports hash/radix guests
*/
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
reinit_flags |= OPAL_REINIT_CPUS_MMU_HASH;
if (early_radix_enabled())
reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX;
}
opal_reinit_cpus(reinit_flags);
/* Restore some bits */ /* Restore some bits */
if (cur_cpu_spec->cpu_restore) if (cur_cpu_spec->cpu_restore)
cur_cpu_spec->cpu_restore(); cur_cpu_spec->cpu_restore();
......
...@@ -225,6 +225,8 @@ static void pnv_kexec_wait_secondaries_down(void) ...@@ -225,6 +225,8 @@ static void pnv_kexec_wait_secondaries_down(void)
static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
{ {
u64 reinit_flags;
if (xive_enabled()) if (xive_enabled())
xive_kexec_teardown_cpu(secondary); xive_kexec_teardown_cpu(secondary);
else else
...@@ -254,8 +256,15 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) ...@@ -254,8 +256,15 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
* We might be running as little-endian - now that interrupts * We might be running as little-endian - now that interrupts
* are disabled, reset the HILE bit to big-endian so we don't * are disabled, reset the HILE bit to big-endian so we don't
* take interrupts in the wrong endian later * take interrupts in the wrong endian later
*
* We reinit to enable both radix and hash on P9 to ensure
* the mode used by the next kernel is always supported.
*/ */
opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE); reinit_flags = OPAL_REINIT_CPUS_HILE_BE;
if (cpu_has_feature(CPU_FTR_ARCH_300))
reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX |
OPAL_REINIT_CPUS_MMU_HASH;
opal_reinit_cpus(reinit_flags);
} }
} }
#endif /* CONFIG_KEXEC_CORE */ #endif /* CONFIG_KEXEC_CORE */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册