提交 91562cf9 编写于 作者: L Linus Torvalds

Merge tag 'powerpc-6.1-3' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:

 - Fix a case of rescheduling with user access unlocked, when preempt is
   enabled.

 - A follow-up fix for a recent fix, which could lead to IRQ state
   assertions firing incorrectly.

 - Two fixes for lockdep warnings seen when using kfence with the Hash
   MMU.

 - Two fixes for preempt warnings seen when using the Hash MMU.

 - Two fixes for the VAS coprocessor mechanism used on pseries.

 - Prevent building some of our older KVM backends when
   CONTEXT_TRACKING_USER is enabled, as it's known to cause crashes.

 - A couple of fixes for issues seen with PMU NMIs.

Thanks to Nicholas Piggin, Guenter Roeck, Frederic Barrat Haren Myneni,
Sachin Sant, and Samuel Holland.

* tag 'powerpc-6.1-3' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  powerpc/64s/interrupt: Fix clear of PACA_IRQS_HARD_DIS when returning to soft-masked context
  powerpc/64s/interrupt: Perf NMI should not take normal exit path
  powerpc/64/interrupt: Prevent NMI PMI causing a dangerous warning
  KVM: PPC: BookS PR-KVM and BookE do not support context tracking
  powerpc: Fix reschedule bug in KUAP-unlocked user copy
  powerpc/64s: Fix hash__change_memory_range preemption warning
  powerpc/64s: Disable preemption in hash lazy mmu mode
  powerpc/64s: make linear_map_hash_lock a raw spinlock
  powerpc/64s: make HPTE lock and native_tlbie_lock irq-safe
  powerpc/64s: Add lockdep for HPTE lock
  powerpc/pseries: Use lparcfg to reconfig VAS windows for DLPAR CPU
  powerpc/pseries/vas: Add VAS IRQ primary handler
...@@ -32,6 +32,11 @@ static inline void arch_enter_lazy_mmu_mode(void) ...@@ -32,6 +32,11 @@ static inline void arch_enter_lazy_mmu_mode(void)
if (radix_enabled()) if (radix_enabled())
return; return;
/*
* apply_to_page_range can call us this preempt enabled when
* operating on kernel page tables.
*/
preempt_disable();
batch = this_cpu_ptr(&ppc64_tlb_batch); batch = this_cpu_ptr(&ppc64_tlb_batch);
batch->active = 1; batch->active = 1;
} }
...@@ -47,6 +52,7 @@ static inline void arch_leave_lazy_mmu_mode(void) ...@@ -47,6 +52,7 @@ static inline void arch_leave_lazy_mmu_mode(void)
if (batch->index) if (batch->index)
__flush_tlb_pending(batch); __flush_tlb_pending(batch);
batch->active = 0; batch->active = 0;
preempt_enable();
} }
#define arch_flush_lazy_mmu_mode() do {} while (0) #define arch_flush_lazy_mmu_mode() do {} while (0)
......
...@@ -813,6 +813,13 @@ kernel_dbg_exc: ...@@ -813,6 +813,13 @@ kernel_dbg_exc:
EXCEPTION_COMMON(0x260) EXCEPTION_COMMON(0x260)
CHECK_NAPPING() CHECK_NAPPING()
addi r3,r1,STACK_FRAME_OVERHEAD addi r3,r1,STACK_FRAME_OVERHEAD
/*
* XXX: Returning from performance_monitor_exception taken as a
* soft-NMI (Linux irqs disabled) may be risky to use interrupt_return
* and could cause bugs in return or elsewhere. That case should just
* restore registers and return. There is a workaround for one known
* problem in interrupt_exit_kernel_prepare().
*/
bl performance_monitor_exception bl performance_monitor_exception
b interrupt_return b interrupt_return
......
...@@ -2357,9 +2357,21 @@ EXC_VIRT_END(performance_monitor, 0x4f00, 0x20) ...@@ -2357,9 +2357,21 @@ EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
EXC_COMMON_BEGIN(performance_monitor_common) EXC_COMMON_BEGIN(performance_monitor_common)
GEN_COMMON performance_monitor GEN_COMMON performance_monitor
addi r3,r1,STACK_FRAME_OVERHEAD addi r3,r1,STACK_FRAME_OVERHEAD
bl performance_monitor_exception lbz r4,PACAIRQSOFTMASK(r13)
cmpdi r4,IRQS_ENABLED
bne 1f
bl performance_monitor_exception_async
b interrupt_return_srr b interrupt_return_srr
1:
bl performance_monitor_exception_nmi
/* Clear MSR_RI before setting SRR0 and SRR1. */
li r9,0
mtmsrd r9,1
kuap_kernel_restore r9, r10
EXCEPTION_RESTORE_REGS hsrr=0
RFI_TO_KERNEL
/** /**
* Interrupt 0xf20 - Vector Unavailable Interrupt. * Interrupt 0xf20 - Vector Unavailable Interrupt.
......
...@@ -374,10 +374,18 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) ...@@ -374,10 +374,18 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
if (regs_is_unrecoverable(regs)) if (regs_is_unrecoverable(regs))
unrecoverable_exception(regs); unrecoverable_exception(regs);
/* /*
* CT_WARN_ON comes here via program_check_exception, * CT_WARN_ON comes here via program_check_exception, so avoid
* so avoid recursion. * recursion.
*
* Skip the assertion on PMIs on 64e to work around a problem caused
* by NMI PMIs incorrectly taking this interrupt return path, it's
* possible for this to hit after interrupt exit to user switches
* context to user. See also the comment in the performance monitor
* handler in exceptions-64e.S
*/ */
if (TRAP(regs) != INTERRUPT_PROGRAM) if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64) &&
TRAP(regs) != INTERRUPT_PROGRAM &&
TRAP(regs) != INTERRUPT_PERFMON)
CT_WARN_ON(ct_state() == CONTEXT_USER); CT_WARN_ON(ct_state() == CONTEXT_USER);
kuap = kuap_get_and_assert_locked(); kuap = kuap_get_and_assert_locked();
......
...@@ -532,15 +532,24 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel) ...@@ -532,15 +532,24 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
* Returning to soft-disabled context. * Returning to soft-disabled context.
* Check if a MUST_HARD_MASK interrupt has become pending, in which * Check if a MUST_HARD_MASK interrupt has become pending, in which
* case we need to disable MSR[EE] in the return context. * case we need to disable MSR[EE] in the return context.
*
* The MSR[EE] check catches among other things the short incoherency
* in hard_irq_disable() between clearing MSR[EE] and setting
* PACA_IRQ_HARD_DIS.
*/ */
ld r12,_MSR(r1) ld r12,_MSR(r1)
andi. r10,r12,MSR_EE andi. r10,r12,MSR_EE
beq .Lfast_kernel_interrupt_return_\srr\() // EE already disabled beq .Lfast_kernel_interrupt_return_\srr\() // EE already disabled
lbz r11,PACAIRQHAPPENED(r13) lbz r11,PACAIRQHAPPENED(r13)
andi. r10,r11,PACA_IRQ_MUST_HARD_MASK andi. r10,r11,PACA_IRQ_MUST_HARD_MASK
beq .Lfast_kernel_interrupt_return_\srr\() // No HARD_MASK pending bne 1f // HARD_MASK is pending
// No HARD_MASK pending, clear possible HARD_DIS set by interrupt
andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
stb r11,PACAIRQHAPPENED(r13)
b .Lfast_kernel_interrupt_return_\srr\()
/* Must clear MSR_EE from _MSR */ 1: /* Must clear MSR_EE from _MSR */
#ifdef CONFIG_PPC_BOOK3S #ifdef CONFIG_PPC_BOOK3S
li r10,0 li r10,0
/* Clear valid before changing _MSR */ /* Clear valid before changing _MSR */
......
...@@ -51,6 +51,7 @@ config KVM_BOOK3S_HV_POSSIBLE ...@@ -51,6 +51,7 @@ config KVM_BOOK3S_HV_POSSIBLE
config KVM_BOOK3S_32 config KVM_BOOK3S_32
tristate "KVM support for PowerPC book3s_32 processors" tristate "KVM support for PowerPC book3s_32 processors"
depends on PPC_BOOK3S_32 && !SMP && !PTE_64BIT depends on PPC_BOOK3S_32 && !SMP && !PTE_64BIT
depends on !CONTEXT_TRACKING_USER
select KVM select KVM
select KVM_BOOK3S_32_HANDLER select KVM_BOOK3S_32_HANDLER
select KVM_BOOK3S_PR_POSSIBLE select KVM_BOOK3S_PR_POSSIBLE
...@@ -105,6 +106,7 @@ config KVM_BOOK3S_64_HV ...@@ -105,6 +106,7 @@ config KVM_BOOK3S_64_HV
config KVM_BOOK3S_64_PR config KVM_BOOK3S_64_PR
tristate "KVM support without using hypervisor mode in host" tristate "KVM support without using hypervisor mode in host"
depends on KVM_BOOK3S_64 depends on KVM_BOOK3S_64
depends on !CONTEXT_TRACKING_USER
select KVM_BOOK3S_PR_POSSIBLE select KVM_BOOK3S_PR_POSSIBLE
help help
Support running guest kernels in virtual machines on processors Support running guest kernels in virtual machines on processors
...@@ -190,6 +192,7 @@ config KVM_EXIT_TIMING ...@@ -190,6 +192,7 @@ config KVM_EXIT_TIMING
config KVM_E500V2 config KVM_E500V2
bool "KVM support for PowerPC E500v2 processors" bool "KVM support for PowerPC E500v2 processors"
depends on PPC_E500 && !PPC_E500MC depends on PPC_E500 && !PPC_E500MC
depends on !CONTEXT_TRACKING_USER
select KVM select KVM
select KVM_MMIO select KVM_MMIO
select MMU_NOTIFIER select MMU_NOTIFIER
...@@ -205,6 +208,7 @@ config KVM_E500V2 ...@@ -205,6 +208,7 @@ config KVM_E500V2
config KVM_E500MC config KVM_E500MC
bool "KVM support for PowerPC E500MC/E5500/E6500 processors" bool "KVM support for PowerPC E500MC/E5500/E6500 processors"
depends on PPC_E500MC depends on PPC_E500MC
depends on !CONTEXT_TRACKING_USER
select KVM select KVM
select KVM_MMIO select KVM_MMIO
select KVM_BOOKE_HV select KVM_BOOKE_HV
......
...@@ -36,7 +36,17 @@ int exit_vmx_usercopy(void) ...@@ -36,7 +36,17 @@ int exit_vmx_usercopy(void)
{ {
disable_kernel_altivec(); disable_kernel_altivec();
pagefault_enable(); pagefault_enable();
preempt_enable(); preempt_enable_no_resched();
/*
* Must never explicitly call schedule (including preempt_enable())
* while in a kuap-unlocked user copy, because the AMR register will
* not be saved and restored across context switch. However preempt
* kernels need to be preempted as soon as possible if need_resched is
* set and we are preemptible. The hack here is to schedule a
* decrementer to fire here and reschedule for us if necessary.
*/
if (IS_ENABLED(CONFIG_PREEMPT) && need_resched())
set_dec(1);
return 0; return 0;
} }
......
...@@ -43,6 +43,29 @@ ...@@ -43,6 +43,29 @@
static DEFINE_RAW_SPINLOCK(native_tlbie_lock); static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
#ifdef CONFIG_LOCKDEP
static struct lockdep_map hpte_lock_map =
STATIC_LOCKDEP_MAP_INIT("hpte_lock", &hpte_lock_map);
static void acquire_hpte_lock(void)
{
lock_map_acquire(&hpte_lock_map);
}
static void release_hpte_lock(void)
{
lock_map_release(&hpte_lock_map);
}
#else
static void acquire_hpte_lock(void)
{
}
static void release_hpte_lock(void)
{
}
#endif
static inline unsigned long ___tlbie(unsigned long vpn, int psize, static inline unsigned long ___tlbie(unsigned long vpn, int psize,
int apsize, int ssize) int apsize, int ssize)
{ {
...@@ -220,6 +243,7 @@ static inline void native_lock_hpte(struct hash_pte *hptep) ...@@ -220,6 +243,7 @@ static inline void native_lock_hpte(struct hash_pte *hptep)
{ {
unsigned long *word = (unsigned long *)&hptep->v; unsigned long *word = (unsigned long *)&hptep->v;
acquire_hpte_lock();
while (1) { while (1) {
if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word)) if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
break; break;
...@@ -234,6 +258,7 @@ static inline void native_unlock_hpte(struct hash_pte *hptep) ...@@ -234,6 +258,7 @@ static inline void native_unlock_hpte(struct hash_pte *hptep)
{ {
unsigned long *word = (unsigned long *)&hptep->v; unsigned long *word = (unsigned long *)&hptep->v;
release_hpte_lock();
clear_bit_unlock(HPTE_LOCK_BIT, word); clear_bit_unlock(HPTE_LOCK_BIT, word);
} }
...@@ -243,8 +268,11 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn, ...@@ -243,8 +268,11 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
{ {
struct hash_pte *hptep = htab_address + hpte_group; struct hash_pte *hptep = htab_address + hpte_group;
unsigned long hpte_v, hpte_r; unsigned long hpte_v, hpte_r;
unsigned long flags;
int i; int i;
local_irq_save(flags);
if (!(vflags & HPTE_V_BOLTED)) { if (!(vflags & HPTE_V_BOLTED)) {
DBG_LOW(" insert(group=%lx, vpn=%016lx, pa=%016lx," DBG_LOW(" insert(group=%lx, vpn=%016lx, pa=%016lx,"
" rflags=%lx, vflags=%lx, psize=%d)\n", " rflags=%lx, vflags=%lx, psize=%d)\n",
...@@ -263,8 +291,10 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn, ...@@ -263,8 +291,10 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
hptep++; hptep++;
} }
if (i == HPTES_PER_GROUP) if (i == HPTES_PER_GROUP) {
local_irq_restore(flags);
return -1; return -1;
}
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
...@@ -286,10 +316,13 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn, ...@@ -286,10 +316,13 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
* Now set the first dword including the valid bit * Now set the first dword including the valid bit
* NOTE: this also unlocks the hpte * NOTE: this also unlocks the hpte
*/ */
release_hpte_lock();
hptep->v = cpu_to_be64(hpte_v); hptep->v = cpu_to_be64(hpte_v);
__asm__ __volatile__ ("ptesync" : : : "memory"); __asm__ __volatile__ ("ptesync" : : : "memory");
local_irq_restore(flags);
return i | (!!(vflags & HPTE_V_SECONDARY) << 3); return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
} }
...@@ -327,6 +360,7 @@ static long native_hpte_remove(unsigned long hpte_group) ...@@ -327,6 +360,7 @@ static long native_hpte_remove(unsigned long hpte_group)
return -1; return -1;
/* Invalidate the hpte. NOTE: this also unlocks it */ /* Invalidate the hpte. NOTE: this also unlocks it */
release_hpte_lock();
hptep->v = 0; hptep->v = 0;
return i; return i;
...@@ -339,6 +373,9 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, ...@@ -339,6 +373,9 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
struct hash_pte *hptep = htab_address + slot; struct hash_pte *hptep = htab_address + slot;
unsigned long hpte_v, want_v; unsigned long hpte_v, want_v;
int ret = 0, local = 0; int ret = 0, local = 0;
unsigned long irqflags;
local_irq_save(irqflags);
want_v = hpte_encode_avpn(vpn, bpsize, ssize); want_v = hpte_encode_avpn(vpn, bpsize, ssize);
...@@ -382,6 +419,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, ...@@ -382,6 +419,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
if (!(flags & HPTE_NOHPTE_UPDATE)) if (!(flags & HPTE_NOHPTE_UPDATE))
tlbie(vpn, bpsize, apsize, ssize, local); tlbie(vpn, bpsize, apsize, ssize, local);
local_irq_restore(irqflags);
return ret; return ret;
} }
...@@ -445,6 +484,9 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, ...@@ -445,6 +484,9 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
unsigned long vsid; unsigned long vsid;
long slot; long slot;
struct hash_pte *hptep; struct hash_pte *hptep;
unsigned long flags;
local_irq_save(flags);
vsid = get_kernel_vsid(ea, ssize); vsid = get_kernel_vsid(ea, ssize);
vpn = hpt_vpn(ea, vsid, ssize); vpn = hpt_vpn(ea, vsid, ssize);
...@@ -463,6 +505,8 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, ...@@ -463,6 +505,8 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
* actual page size will be same. * actual page size will be same.
*/ */
tlbie(vpn, psize, psize, ssize, 0); tlbie(vpn, psize, psize, ssize, 0);
local_irq_restore(flags);
} }
/* /*
...@@ -476,6 +520,9 @@ static int native_hpte_removebolted(unsigned long ea, int psize, int ssize) ...@@ -476,6 +520,9 @@ static int native_hpte_removebolted(unsigned long ea, int psize, int ssize)
unsigned long vsid; unsigned long vsid;
long slot; long slot;
struct hash_pte *hptep; struct hash_pte *hptep;
unsigned long flags;
local_irq_save(flags);
vsid = get_kernel_vsid(ea, ssize); vsid = get_kernel_vsid(ea, ssize);
vpn = hpt_vpn(ea, vsid, ssize); vpn = hpt_vpn(ea, vsid, ssize);
...@@ -493,6 +540,9 @@ static int native_hpte_removebolted(unsigned long ea, int psize, int ssize) ...@@ -493,6 +540,9 @@ static int native_hpte_removebolted(unsigned long ea, int psize, int ssize)
/* Invalidate the TLB */ /* Invalidate the TLB */
tlbie(vpn, psize, psize, ssize, 0); tlbie(vpn, psize, psize, ssize, 0);
local_irq_restore(flags);
return 0; return 0;
} }
...@@ -517,10 +567,11 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, ...@@ -517,10 +567,11 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
/* recheck with locks held */ /* recheck with locks held */
hpte_v = hpte_get_old_v(hptep); hpte_v = hpte_get_old_v(hptep);
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
/* Invalidate the hpte. NOTE: this also unlocks it */ /* Invalidate the hpte. NOTE: this also unlocks it */
release_hpte_lock();
hptep->v = 0; hptep->v = 0;
else } else
native_unlock_hpte(hptep); native_unlock_hpte(hptep);
} }
/* /*
...@@ -580,10 +631,8 @@ static void native_hugepage_invalidate(unsigned long vsid, ...@@ -580,10 +631,8 @@ static void native_hugepage_invalidate(unsigned long vsid,
hpte_v = hpte_get_old_v(hptep); hpte_v = hpte_get_old_v(hptep);
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) { if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
/* /* Invalidate the hpte. NOTE: this also unlocks it */
* Invalidate the hpte. NOTE: this also unlocks it release_hpte_lock();
*/
hptep->v = 0; hptep->v = 0;
} else } else
native_unlock_hpte(hptep); native_unlock_hpte(hptep);
...@@ -765,8 +814,10 @@ static void native_flush_hash_range(unsigned long number, int local) ...@@ -765,8 +814,10 @@ static void native_flush_hash_range(unsigned long number, int local)
if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
native_unlock_hpte(hptep); native_unlock_hpte(hptep);
else else {
release_hpte_lock();
hptep->v = 0; hptep->v = 0;
}
} pte_iterate_hashed_end(); } pte_iterate_hashed_end();
} }
......
...@@ -404,7 +404,8 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage); ...@@ -404,7 +404,8 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage);
struct change_memory_parms { struct change_memory_parms {
unsigned long start, end, newpp; unsigned long start, end, newpp;
unsigned int step, nr_cpus, master_cpu; unsigned int step, nr_cpus;
atomic_t master_cpu;
atomic_t cpu_counter; atomic_t cpu_counter;
}; };
...@@ -478,7 +479,8 @@ static int change_memory_range_fn(void *data) ...@@ -478,7 +479,8 @@ static int change_memory_range_fn(void *data)
{ {
struct change_memory_parms *parms = data; struct change_memory_parms *parms = data;
if (parms->master_cpu != smp_processor_id()) // First CPU goes through, all others wait.
if (atomic_xchg(&parms->master_cpu, 1) == 1)
return chmem_secondary_loop(parms); return chmem_secondary_loop(parms);
// Wait for all but one CPU (this one) to call-in // Wait for all but one CPU (this one) to call-in
...@@ -516,7 +518,7 @@ static bool hash__change_memory_range(unsigned long start, unsigned long end, ...@@ -516,7 +518,7 @@ static bool hash__change_memory_range(unsigned long start, unsigned long end,
chmem_parms.end = end; chmem_parms.end = end;
chmem_parms.step = step; chmem_parms.step = step;
chmem_parms.newpp = newpp; chmem_parms.newpp = newpp;
chmem_parms.master_cpu = smp_processor_id(); atomic_set(&chmem_parms.master_cpu, 0);
cpus_read_lock(); cpus_read_lock();
......
...@@ -1981,7 +1981,7 @@ long hpte_insert_repeating(unsigned long hash, unsigned long vpn, ...@@ -1981,7 +1981,7 @@ long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
} }
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
static DEFINE_SPINLOCK(linear_map_hash_lock); static DEFINE_RAW_SPINLOCK(linear_map_hash_lock);
static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
{ {
...@@ -2005,10 +2005,10 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) ...@@ -2005,10 +2005,10 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
mmu_linear_psize, mmu_kernel_ssize); mmu_linear_psize, mmu_kernel_ssize);
BUG_ON (ret < 0); BUG_ON (ret < 0);
spin_lock(&linear_map_hash_lock); raw_spin_lock(&linear_map_hash_lock);
BUG_ON(linear_map_hash_slots[lmi] & 0x80); BUG_ON(linear_map_hash_slots[lmi] & 0x80);
linear_map_hash_slots[lmi] = ret | 0x80; linear_map_hash_slots[lmi] = ret | 0x80;
spin_unlock(&linear_map_hash_lock); raw_spin_unlock(&linear_map_hash_lock);
} }
static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
...@@ -2018,14 +2018,14 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) ...@@ -2018,14 +2018,14 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
spin_lock(&linear_map_hash_lock); raw_spin_lock(&linear_map_hash_lock);
if (!(linear_map_hash_slots[lmi] & 0x80)) { if (!(linear_map_hash_slots[lmi] & 0x80)) {
spin_unlock(&linear_map_hash_lock); raw_spin_unlock(&linear_map_hash_lock);
return; return;
} }
hidx = linear_map_hash_slots[lmi] & 0x7f; hidx = linear_map_hash_slots[lmi] & 0x7f;
linear_map_hash_slots[lmi] = 0; linear_map_hash_slots[lmi] = 0;
spin_unlock(&linear_map_hash_lock); raw_spin_unlock(&linear_map_hash_lock);
if (hidx & _PTEIDX_SECONDARY) if (hidx & _PTEIDX_SECONDARY)
hash = ~hash; hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include <asm/drmem.h> #include <asm/drmem.h>
#include "pseries.h" #include "pseries.h"
#include "vas.h" /* pseries_vas_dlpar_cpu() */
/* /*
* This isn't a module but we expose that to userspace * This isn't a module but we expose that to userspace
...@@ -748,6 +749,16 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf, ...@@ -748,6 +749,16 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
return -EINVAL; return -EINVAL;
retval = update_ppp(new_entitled_ptr, NULL); retval = update_ppp(new_entitled_ptr, NULL);
if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
/*
* The hypervisor assigns VAS resources based
* on entitled capacity for shared mode.
* Reconfig VAS windows based on DLPAR CPU events.
*/
if (pseries_vas_dlpar_cpu() != 0)
retval = H_HARDWARE;
}
} else if (!strcmp(kbuf, "capacity_weight")) { } else if (!strcmp(kbuf, "capacity_weight")) {
char *endp; char *endp;
*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10); *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
......
...@@ -200,16 +200,41 @@ static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) ...@@ -200,16 +200,41 @@ static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
struct vas_user_win_ref *tsk_ref; struct vas_user_win_ref *tsk_ref;
int rc; int rc;
rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb)); while (atomic_read(&txwin->pending_faults)) {
if (!rc) { rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
tsk_ref = &txwin->vas_win.task_ref; if (!rc) {
vas_dump_crb(&crb); tsk_ref = &txwin->vas_win.task_ref;
vas_update_csb(&crb, tsk_ref); vas_dump_crb(&crb);
vas_update_csb(&crb, tsk_ref);
}
atomic_dec(&txwin->pending_faults);
} }
return IRQ_HANDLED; return IRQ_HANDLED;
} }
/*
* irq_default_primary_handler() can be used only with IRQF_ONESHOT
* which disables IRQ before executing the thread handler and enables
* it after. But this disabling interrupt sets the VAS IRQ OFF
* state in the hypervisor. If the NX generates fault interrupt
* during this window, the hypervisor will not deliver this
* interrupt to the LPAR. So use VAS specific IRQ handler instead
* of calling the default primary handler.
*/
static irqreturn_t pseries_vas_irq_handler(int irq, void *data)
{
struct pseries_vas_window *txwin = data;
/*
* The thread hanlder will process this interrupt if it is
* already running.
*/
atomic_inc(&txwin->pending_faults);
return IRQ_WAKE_THREAD;
}
/* /*
* Allocate window and setup IRQ mapping. * Allocate window and setup IRQ mapping.
*/ */
...@@ -240,8 +265,9 @@ static int allocate_setup_window(struct pseries_vas_window *txwin, ...@@ -240,8 +265,9 @@ static int allocate_setup_window(struct pseries_vas_window *txwin,
goto out_irq; goto out_irq;
} }
rc = request_threaded_irq(txwin->fault_virq, NULL, rc = request_threaded_irq(txwin->fault_virq,
pseries_vas_fault_thread_fn, IRQF_ONESHOT, pseries_vas_irq_handler,
pseries_vas_fault_thread_fn, 0,
txwin->name, txwin); txwin->name, txwin);
if (rc) { if (rc) {
pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n", pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
...@@ -826,6 +852,25 @@ int vas_reconfig_capabilties(u8 type, int new_nr_creds) ...@@ -826,6 +852,25 @@ int vas_reconfig_capabilties(u8 type, int new_nr_creds)
mutex_unlock(&vas_pseries_mutex); mutex_unlock(&vas_pseries_mutex);
return rc; return rc;
} }
int pseries_vas_dlpar_cpu(void)
{
int new_nr_creds, rc;
rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
(u64)virt_to_phys(&hv_cop_caps));
if (!rc) {
new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, new_nr_creds);
}
if (rc)
pr_err("Failed reconfig VAS capabilities with DLPAR\n");
return rc;
}
/* /*
* Total number of default credits available (target_credits) * Total number of default credits available (target_credits)
* in LPAR depends on number of cores configured. It varies based on * in LPAR depends on number of cores configured. It varies based on
...@@ -840,7 +885,15 @@ static int pseries_vas_notifier(struct notifier_block *nb, ...@@ -840,7 +885,15 @@ static int pseries_vas_notifier(struct notifier_block *nb,
struct of_reconfig_data *rd = data; struct of_reconfig_data *rd = data;
struct device_node *dn = rd->dn; struct device_node *dn = rd->dn;
const __be32 *intserv = NULL; const __be32 *intserv = NULL;
int new_nr_creds, len, rc = 0; int len;
/*
* For shared CPU partition, the hypervisor assigns total credits
* based on entitled core capacity. So updating VAS windows will
* be called from lparcfg_write().
*/
if (is_shared_processor())
return NOTIFY_OK;
if ((action == OF_RECONFIG_ATTACH_NODE) || if ((action == OF_RECONFIG_ATTACH_NODE) ||
(action == OF_RECONFIG_DETACH_NODE)) (action == OF_RECONFIG_DETACH_NODE))
...@@ -852,19 +905,7 @@ static int pseries_vas_notifier(struct notifier_block *nb, ...@@ -852,19 +905,7 @@ static int pseries_vas_notifier(struct notifier_block *nb,
if (!intserv) if (!intserv)
return NOTIFY_OK; return NOTIFY_OK;
rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, return pseries_vas_dlpar_cpu();
vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
(u64)virt_to_phys(&hv_cop_caps));
if (!rc) {
new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE,
new_nr_creds);
}
if (rc)
pr_err("Failed reconfig VAS capabilities with DLPAR\n");
return rc;
} }
static struct notifier_block pseries_vas_nb = { static struct notifier_block pseries_vas_nb = {
......
...@@ -132,6 +132,7 @@ struct pseries_vas_window { ...@@ -132,6 +132,7 @@ struct pseries_vas_window {
u64 flags; u64 flags;
char *name; char *name;
int fault_virq; int fault_virq;
atomic_t pending_faults; /* Number of pending faults */
}; };
int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps); int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps);
...@@ -140,10 +141,15 @@ int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps); ...@@ -140,10 +141,15 @@ int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps);
#ifdef CONFIG_PPC_VAS #ifdef CONFIG_PPC_VAS
int vas_migration_handler(int action); int vas_migration_handler(int action);
int pseries_vas_dlpar_cpu(void);
#else #else
static inline int vas_migration_handler(int action) static inline int vas_migration_handler(int action)
{ {
return 0; return 0;
} }
static inline int pseries_vas_dlpar_cpu(void)
{
return 0;
}
#endif #endif
#endif /* _VAS_H */ #endif /* _VAS_H */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册