Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
openeuler
Kernel
提交
b104e41c
K
Kernel
项目概览
openeuler
/
Kernel
大约 2 年 前同步成功
通知
8
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
K
Kernel
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
b104e41c
编写于
5月 19, 2022
作者:
M
Michael Ellerman
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'topic/ppc-kvm' into next
Merge our KVM topic branch.
上级
a5fc286f
ad55bae7
变更
32
显示空白变更内容
内联
并排
Showing
32 changed file
with
921 addition
and
1729 deletion
+921
-1729
arch/powerpc/include/asm/iommu.h
arch/powerpc/include/asm/iommu.h
+2
-4
arch/powerpc/include/asm/kvm_book3s_asm.h
arch/powerpc/include/asm/kvm_book3s_asm.h
+0
-3
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/include/asm/kvm_host.h
+7
-3
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/include/asm/kvm_ppc.h
+2
-12
arch/powerpc/include/asm/mmu_context.h
arch/powerpc/include/asm/mmu_context.h
+0
-5
arch/powerpc/include/asm/reg.h
arch/powerpc/include/asm/reg.h
+0
-3
arch/powerpc/kernel/iommu.c
arch/powerpc/kernel/iommu.c
+2
-2
arch/powerpc/kvm/Makefile
arch/powerpc/kvm/Makefile
+1
-4
arch/powerpc/kvm/book3s_64_mmu_hv.c
arch/powerpc/kvm/book3s_64_mmu_hv.c
+24
-16
arch/powerpc/kvm/book3s_64_vio.c
arch/powerpc/kvm/book3s_64_vio.c
+43
-0
arch/powerpc/kvm/book3s_64_vio_hv.c
arch/powerpc/kvm/book3s_64_vio_hv.c
+0
-672
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv.c
+59
-14
arch/powerpc/kvm/book3s_hv_builtin.c
arch/powerpc/kvm/book3s_hv_builtin.c
+0
-64
arch/powerpc/kvm/book3s_hv_nested.c
arch/powerpc/kvm/book3s_hv_nested.c
+70
-67
arch/powerpc/kvm/book3s_hv_p9_entry.c
arch/powerpc/kvm/book3s_hv_p9_entry.c
+11
-4
arch/powerpc/kvm/book3s_hv_rm_xics.c
arch/powerpc/kvm/book3s_hv_rm_xics.c
+6
-1
arch/powerpc/kvm/book3s_hv_rm_xive.c
arch/powerpc/kvm/book3s_hv_rm_xive.c
+0
-46
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_hv_rmhandlers.S
+14
-16
arch/powerpc/kvm/book3s_hv_uvmem.c
arch/powerpc/kvm/book3s_hv_uvmem.c
+5
-3
arch/powerpc/kvm/book3s_pr_papr.c
arch/powerpc/kvm/book3s_pr_papr.c
+6
-0
arch/powerpc/kvm/book3s_xive.c
arch/powerpc/kvm/book3s_xive.c
+627
-22
arch/powerpc/kvm/book3s_xive.h
arch/powerpc/kvm/book3s_xive.h
+0
-7
arch/powerpc/kvm/book3s_xive_template.c
arch/powerpc/kvm/book3s_xive_template.c
+0
-636
arch/powerpc/kvm/e500mc.c
arch/powerpc/kvm/e500mc.c
+0
-1
arch/powerpc/kvm/powerpc.c
arch/powerpc/kvm/powerpc.c
+13
-17
arch/powerpc/kvm/trace_hv.h
arch/powerpc/kvm/trace_hv.h
+4
-4
arch/powerpc/mm/book3s64/iommu_api.c
arch/powerpc/mm/book3s64/iommu_api.c
+0
-68
arch/powerpc/mm/init_64.c
arch/powerpc/mm/init_64.c
+3
-0
arch/powerpc/platforms/powernv/pci-ioda-tce.c
arch/powerpc/platforms/powernv/pci-ioda-tce.c
+2
-3
arch/powerpc/platforms/powernv/pci-ioda.c
arch/powerpc/platforms/powernv/pci-ioda.c
+18
-28
arch/powerpc/platforms/powernv/pci.h
arch/powerpc/platforms/powernv/pci.h
+1
-2
arch/powerpc/platforms/pseries/iommu.c
arch/powerpc/platforms/pseries/iommu.c
+1
-2
未找到文件。
arch/powerpc/include/asm/iommu.h
浏览文件 @
b104e41c
...
...
@@ -51,13 +51,11 @@ struct iommu_table_ops {
int
(
*
xchg_no_kill
)(
struct
iommu_table
*
tbl
,
long
index
,
unsigned
long
*
hpa
,
enum
dma_data_direction
*
direction
,
bool
realmode
);
enum
dma_data_direction
*
direction
);
void
(
*
tce_kill
)(
struct
iommu_table
*
tbl
,
unsigned
long
index
,
unsigned
long
pages
,
bool
realmode
);
unsigned
long
pages
);
__be64
*
(
*
useraddrptr
)(
struct
iommu_table
*
tbl
,
long
index
,
bool
alloc
);
#endif
...
...
arch/powerpc/include/asm/kvm_book3s_asm.h
浏览文件 @
b104e41c
...
...
@@ -14,9 +14,6 @@
#define XICS_MFRR 0xc
#define XICS_IPI 2
/* interrupt source # for IPIs */
/* LPIDs we support with this build -- runtime limit may be lower */
#define KVMPPC_NR_LPIDS (LPID_RSVD + 1)
/* Maximum number of threads per physical core */
#define MAX_SMT_THREADS 8
...
...
arch/powerpc/include/asm/kvm_host.h
浏览文件 @
b104e41c
...
...
@@ -36,7 +36,12 @@
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
#include <asm/kvm_book3s_asm.h>
/* for MAX_SMT_THREADS */
#define KVM_MAX_VCPU_IDS (MAX_SMT_THREADS * KVM_MAX_VCORES)
#define KVM_MAX_NESTED_GUESTS KVMPPC_NR_LPIDS
/*
* Limit the nested partition table to 4096 entries (because that's what
* hardware supports). Both guest and host use this value.
*/
#define KVM_MAX_NESTED_GUESTS_SHIFT 12
#else
#define KVM_MAX_VCPU_IDS KVM_MAX_VCPUS
...
...
@@ -327,8 +332,7 @@ struct kvm_arch {
struct
list_head
uvmem_pfns
;
struct
mutex
mmu_setup_lock
;
/* nests inside vcpu mutexes */
u64
l1_ptcr
;
int
max_nested_lpid
;
struct
kvm_nested_guest
*
nested_guests
[
KVM_MAX_NESTED_GUESTS
];
struct
idr
kvm_nested_guest_idr
;
/* This array can grow quite large, keep it at the end */
struct
kvmppc_vcore
*
vcores
[
KVM_MAX_VCORES
];
#endif
...
...
arch/powerpc/include/asm/kvm_ppc.h
浏览文件 @
b104e41c
...
...
@@ -177,8 +177,6 @@ extern void kvmppc_setup_partition_table(struct kvm *kvm);
extern
long
kvm_vm_ioctl_create_spapr_tce
(
struct
kvm
*
kvm
,
struct
kvm_create_spapr_tce_64
*
args
);
extern
struct
kvmppc_spapr_tce_table
*
kvmppc_find_table
(
struct
kvm
*
kvm
,
unsigned
long
liobn
);
#define kvmppc_ioba_validate(stt, ioba, npages) \
(iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
(stt)->size, (ioba), (npages)) ? \
...
...
@@ -685,7 +683,7 @@ extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int
level
,
bool
line_status
);
extern
void
kvmppc_xive_push_vcpu
(
struct
kvm_vcpu
*
vcpu
);
extern
void
kvmppc_xive_pull_vcpu
(
struct
kvm_vcpu
*
vcpu
);
extern
void
kvmppc_xive_rearm_escalation
(
struct
kvm_vcpu
*
vcpu
);
extern
bool
kvmppc_xive_rearm_escalation
(
struct
kvm_vcpu
*
vcpu
);
static
inline
int
kvmppc_xive_enabled
(
struct
kvm_vcpu
*
vcpu
)
{
...
...
@@ -723,7 +721,7 @@ static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 ir
int
level
,
bool
line_status
)
{
return
-
ENODEV
;
}
static
inline
void
kvmppc_xive_push_vcpu
(
struct
kvm_vcpu
*
vcpu
)
{
}
static
inline
void
kvmppc_xive_pull_vcpu
(
struct
kvm_vcpu
*
vcpu
)
{
}
static
inline
void
kvmppc_xive_rearm_escalation
(
struct
kvm_vcpu
*
vcpu
)
{
}
static
inline
bool
kvmppc_xive_rearm_escalation
(
struct
kvm_vcpu
*
vcpu
)
{
return
true
;
}
static
inline
int
kvmppc_xive_enabled
(
struct
kvm_vcpu
*
vcpu
)
{
return
0
;
}
...
...
@@ -789,13 +787,6 @@ long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned
long
dest
,
unsigned
long
src
);
long
kvmppc_hpte_hv_fault
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
addr
,
unsigned
long
slb_v
,
unsigned
int
status
,
bool
data
);
unsigned
long
kvmppc_rm_h_xirr
(
struct
kvm_vcpu
*
vcpu
);
unsigned
long
kvmppc_rm_h_xirr_x
(
struct
kvm_vcpu
*
vcpu
);
unsigned
long
kvmppc_rm_h_ipoll
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
server
);
int
kvmppc_rm_h_ipi
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
server
,
unsigned
long
mfrr
);
int
kvmppc_rm_h_cppr
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
cppr
);
int
kvmppc_rm_h_eoi
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
xirr
);
void
kvmppc_guest_entry_inject_int
(
struct
kvm_vcpu
*
vcpu
);
/*
...
...
@@ -877,7 +868,6 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
struct
kvm_dirty_tlb
*
cfg
);
long
kvmppc_alloc_lpid
(
void
);
void
kvmppc_claim_lpid
(
long
lpid
);
void
kvmppc_free_lpid
(
long
lpid
);
void
kvmppc_init_lpid
(
unsigned
long
nr_lpids
);
...
...
arch/powerpc/include/asm/mmu_context.h
浏览文件 @
b104e41c
...
...
@@ -34,15 +34,10 @@ extern void mm_iommu_init(struct mm_struct *mm);
extern
void
mm_iommu_cleanup
(
struct
mm_struct
*
mm
);
extern
struct
mm_iommu_table_group_mem_t
*
mm_iommu_lookup
(
struct
mm_struct
*
mm
,
unsigned
long
ua
,
unsigned
long
size
);
extern
struct
mm_iommu_table_group_mem_t
*
mm_iommu_lookup_rm
(
struct
mm_struct
*
mm
,
unsigned
long
ua
,
unsigned
long
size
);
extern
struct
mm_iommu_table_group_mem_t
*
mm_iommu_get
(
struct
mm_struct
*
mm
,
unsigned
long
ua
,
unsigned
long
entries
);
extern
long
mm_iommu_ua_to_hpa
(
struct
mm_iommu_table_group_mem_t
*
mem
,
unsigned
long
ua
,
unsigned
int
pageshift
,
unsigned
long
*
hpa
);
extern
long
mm_iommu_ua_to_hpa_rm
(
struct
mm_iommu_table_group_mem_t
*
mem
,
unsigned
long
ua
,
unsigned
int
pageshift
,
unsigned
long
*
hpa
);
extern
void
mm_iommu_ua_mark_dirty_rm
(
struct
mm_struct
*
mm
,
unsigned
long
ua
);
extern
bool
mm_iommu_is_devmem
(
struct
mm_struct
*
mm
,
unsigned
long
hpa
,
unsigned
int
pageshift
,
unsigned
long
*
size
);
extern
long
mm_iommu_mapped_inc
(
struct
mm_iommu_table_group_mem_t
*
mem
);
...
...
arch/powerpc/include/asm/reg.h
浏览文件 @
b104e41c
...
...
@@ -417,7 +417,6 @@
#define FSCR_DSCR __MASK(FSCR_DSCR_LG)
#define FSCR_INTR_CAUSE (ASM_CONST(0xFF) << 56)
/* interrupt cause */
#define SPRN_HFSCR 0xbe
/* HV=1 Facility Status & Control Register */
#define HFSCR_PREFIX __MASK(FSCR_PREFIX_LG)
#define HFSCR_MSGP __MASK(FSCR_MSGP_LG)
#define HFSCR_TAR __MASK(FSCR_TAR_LG)
#define HFSCR_EBB __MASK(FSCR_EBB_LG)
...
...
@@ -474,8 +473,6 @@
#ifndef SPRN_LPID
#define SPRN_LPID 0x13F
/* Logical Partition Identifier */
#endif
#define LPID_RSVD_POWER7 0x3ff
/* Reserved LPID for partn switching */
#define LPID_RSVD 0xfff
/* Reserved LPID for partn switching */
#define SPRN_HMER 0x150
/* Hypervisor maintenance exception reg */
#define HMER_DEBUG_TRIG (1ul << (63 - 17))
/* Debug trigger */
#define SPRN_HMEER 0x151
/* Hyp maintenance exception enable reg */
...
...
arch/powerpc/kernel/iommu.c
浏览文件 @
b104e41c
...
...
@@ -1064,7 +1064,7 @@ extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
long
ret
;
unsigned
long
size
=
0
;
ret
=
tbl
->
it_ops
->
xchg_no_kill
(
tbl
,
entry
,
hpa
,
direction
,
false
);
ret
=
tbl
->
it_ops
->
xchg_no_kill
(
tbl
,
entry
,
hpa
,
direction
);
if
(
!
ret
&&
((
*
direction
==
DMA_FROM_DEVICE
)
||
(
*
direction
==
DMA_BIDIRECTIONAL
))
&&
!
mm_iommu_is_devmem
(
mm
,
*
hpa
,
tbl
->
it_page_shift
,
...
...
@@ -1079,7 +1079,7 @@ void iommu_tce_kill(struct iommu_table *tbl,
unsigned
long
entry
,
unsigned
long
pages
)
{
if
(
tbl
->
it_ops
->
tce_kill
)
tbl
->
it_ops
->
tce_kill
(
tbl
,
entry
,
pages
,
false
);
tbl
->
it_ops
->
tce_kill
(
tbl
,
entry
,
pages
);
}
EXPORT_SYMBOL_GPL
(
iommu_tce_kill
);
...
...
arch/powerpc/kvm/Makefile
浏览文件 @
b104e41c
...
...
@@ -37,9 +37,6 @@ kvm-e500mc-objs := \
e500_emulate.o
kvm-objs-$(CONFIG_KVM_E500MC)
:=
$
(
kvm-e500mc-objs
)
kvm-book3s_64-builtin-objs-$(CONFIG_SPAPR_TCE_IOMMU)
:=
\
book3s_64_vio_hv.o
kvm-pr-y
:=
\
fpu.o
\
emulate.o
\
...
...
@@ -76,7 +73,7 @@ kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
book3s_hv_tm.o
kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS)
:=
\
book3s_hv_rm_xics.o
book3s_hv_rm_xive.o
book3s_hv_rm_xics.o
kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM)
+=
\
book3s_hv_tm_builtin.o
...
...
arch/powerpc/kvm/book3s_64_mmu_hv.c
浏览文件 @
b104e41c
...
...
@@ -256,26 +256,34 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
int
kvmppc_mmu_hv_init
(
void
)
{
unsigned
long
host_lpid
,
rsvd_lpid
;
unsigned
long
nr_lpids
;
if
(
!
mmu_has_feature
(
MMU_FTR_LOCKLESS_TLBIE
))
return
-
EINVAL
;
host_lpid
=
0
;
if
(
cpu_has_feature
(
CPU_FTR_HVMODE
))
host_lpid
=
mfspr
(
SPRN_LPID
);
if
(
cpu_has_feature
(
CPU_FTR_HVMODE
))
{
if
(
WARN_ON
(
mfspr
(
SPRN_LPID
)
!=
0
))
return
-
EINVAL
;
nr_lpids
=
1UL
<<
mmu_lpid_bits
;
}
else
{
nr_lpids
=
1UL
<<
KVM_MAX_NESTED_GUESTS_SHIFT
;
}
/* POWER8 and above have 12-bit LPIDs (10-bit in POWER7) */
if
(
!
cpu_has_feature
(
CPU_FTR_ARCH_300
))
{
/* POWER7 has 10-bit LPIDs, POWER8 has 12-bit LPIDs */
if
(
cpu_has_feature
(
CPU_FTR_ARCH_207S
))
rsvd_lpid
=
LPID_RSVD
;
WARN_ON
(
nr_lpids
!=
1UL
<<
12
)
;
else
rsvd_lpid
=
LPID_RSVD_POWER7
;
WARN_ON
(
nr_lpids
!=
1UL
<<
10
)
;
kvmppc_init_lpid
(
rsvd_lpid
+
1
);
/*
* Reserve the last implemented LPID use in partition
* switching for POWER7 and POWER8.
*/
nr_lpids
-=
1
;
}
kvmppc_claim_lpid
(
host_lpid
);
/* rsvd_lpid is reserved for use in partition switching */
kvmppc_claim_lpid
(
rsvd_lpid
);
kvmppc_init_lpid
(
nr_lpids
);
return
0
;
}
...
...
@@ -879,7 +887,7 @@ static bool kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
struct
revmap_entry
*
rev
=
kvm
->
arch
.
hpt
.
rev
;
unsigned
long
head
,
i
,
j
;
__be64
*
hptep
;
int
ret
=
0
;
bool
ret
=
false
;
unsigned
long
*
rmapp
;
rmapp
=
&
memslot
->
arch
.
rmap
[
gfn
-
memslot
->
base_gfn
];
...
...
@@ -887,7 +895,7 @@ static bool kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
lock_rmap
(
rmapp
);
if
(
*
rmapp
&
KVMPPC_RMAP_REFERENCED
)
{
*
rmapp
&=
~
KVMPPC_RMAP_REFERENCED
;
ret
=
1
;
ret
=
true
;
}
if
(
!
(
*
rmapp
&
KVMPPC_RMAP_PRESENT
))
{
unlock_rmap
(
rmapp
);
...
...
@@ -919,7 +927,7 @@ static bool kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
rev
[
i
].
guest_rpte
|=
HPTE_R_R
;
note_hpte_modification
(
kvm
,
&
rev
[
i
]);
}
ret
=
1
;
ret
=
true
;
}
__unlock_hpte
(
hptep
,
be64_to_cpu
(
hptep
[
0
]));
}
while
((
i
=
j
)
!=
head
);
...
...
arch/powerpc/kvm/book3s_64_vio.c
浏览文件 @
b104e41c
...
...
@@ -32,6 +32,18 @@
#include <asm/tce.h>
#include <asm/mmu_context.h>
static
struct
kvmppc_spapr_tce_table
*
kvmppc_find_table
(
struct
kvm
*
kvm
,
unsigned
long
liobn
)
{
struct
kvmppc_spapr_tce_table
*
stt
;
list_for_each_entry_lockless
(
stt
,
&
kvm
->
arch
.
spapr_tce_tables
,
list
)
if
(
stt
->
liobn
==
liobn
)
return
stt
;
return
NULL
;
}
static
unsigned
long
kvmppc_tce_pages
(
unsigned
long
iommu_pages
)
{
return
ALIGN
(
iommu_pages
*
sizeof
(
u64
),
PAGE_SIZE
)
/
PAGE_SIZE
;
...
...
@@ -753,3 +765,34 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
return
ret
;
}
EXPORT_SYMBOL_GPL
(
kvmppc_h_stuff_tce
);
long
kvmppc_h_get_tce
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
liobn
,
unsigned
long
ioba
)
{
struct
kvmppc_spapr_tce_table
*
stt
;
long
ret
;
unsigned
long
idx
;
struct
page
*
page
;
u64
*
tbl
;
stt
=
kvmppc_find_table
(
vcpu
->
kvm
,
liobn
);
if
(
!
stt
)
return
H_TOO_HARD
;
ret
=
kvmppc_ioba_validate
(
stt
,
ioba
,
1
);
if
(
ret
!=
H_SUCCESS
)
return
ret
;
idx
=
(
ioba
>>
stt
->
page_shift
)
-
stt
->
offset
;
page
=
stt
->
pages
[
idx
/
TCES_PER_PAGE
];
if
(
!
page
)
{
vcpu
->
arch
.
regs
.
gpr
[
4
]
=
0
;
return
H_SUCCESS
;
}
tbl
=
(
u64
*
)
page_address
(
page
);
vcpu
->
arch
.
regs
.
gpr
[
4
]
=
tbl
[
idx
%
TCES_PER_PAGE
];
return
H_SUCCESS
;
}
EXPORT_SYMBOL_GPL
(
kvmppc_h_get_tce
);
arch/powerpc/kvm/book3s_64_vio_hv.c
已删除
100644 → 0
浏览文件 @
a5fc286f
// SPDX-License-Identifier: GPL-2.0-only
/*
*
* Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
* Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
* Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
*/
#include <linux/types.h>
#include <linux/string.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/highmem.h>
#include <linux/gfp.h>
#include <linux/slab.h>
#include <linux/hugetlb.h>
#include <linux/list.h>
#include <linux/stringify.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/book3s/64/mmu-hash.h>
#include <asm/mmu_context.h>
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
#include <asm/udbg.h>
#include <asm/iommu.h>
#include <asm/tce.h>
#include <asm/pte-walk.h>
#ifdef CONFIG_BUG
#define WARN_ON_ONCE_RM(condition) ({ \
static bool __section(".data.unlikely") __warned; \
int __ret_warn_once = !!(condition); \
\
if (unlikely(__ret_warn_once && !__warned)) { \
__warned = true; \
pr_err("WARN_ON_ONCE_RM: (%s) at %s:%u\n", \
__stringify(condition), \
__func__, __LINE__); \
dump_stack(); \
} \
unlikely(__ret_warn_once); \
})
#else
#define WARN_ON_ONCE_RM(condition) ({ \
int __ret_warn_on = !!(condition); \
unlikely(__ret_warn_on); \
})
#endif
/*
* Finds a TCE table descriptor by LIOBN.
*
* WARNING: This will be called in real or virtual mode on HV KVM and virtual
* mode on PR KVM
*/
struct
kvmppc_spapr_tce_table
*
kvmppc_find_table
(
struct
kvm
*
kvm
,
unsigned
long
liobn
)
{
struct
kvmppc_spapr_tce_table
*
stt
;
list_for_each_entry_lockless
(
stt
,
&
kvm
->
arch
.
spapr_tce_tables
,
list
)
if
(
stt
->
liobn
==
liobn
)
return
stt
;
return
NULL
;
}
EXPORT_SYMBOL_GPL
(
kvmppc_find_table
);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
static
long
kvmppc_rm_tce_to_ua
(
struct
kvm
*
kvm
,
unsigned
long
tce
,
unsigned
long
*
ua
)
{
unsigned
long
gfn
=
tce
>>
PAGE_SHIFT
;
struct
kvm_memory_slot
*
memslot
;
memslot
=
__gfn_to_memslot
(
kvm_memslots_raw
(
kvm
),
gfn
);
if
(
!
memslot
)
return
-
EINVAL
;
*
ua
=
__gfn_to_hva_memslot
(
memslot
,
gfn
)
|
(
tce
&
~
(
PAGE_MASK
|
TCE_PCI_READ
|
TCE_PCI_WRITE
));
return
0
;
}
/*
* Validates TCE address.
* At the moment flags and page mask are validated.
* As the host kernel does not access those addresses (just puts them
* to the table and user space is supposed to process them), we can skip
* checking other things (such as TCE is a guest RAM address or the page
* was actually allocated).
*/
static
long
kvmppc_rm_tce_validate
(
struct
kvmppc_spapr_tce_table
*
stt
,
unsigned
long
tce
)
{
unsigned
long
gpa
=
tce
&
~
(
TCE_PCI_READ
|
TCE_PCI_WRITE
);
enum
dma_data_direction
dir
=
iommu_tce_direction
(
tce
);
struct
kvmppc_spapr_tce_iommu_table
*
stit
;
unsigned
long
ua
=
0
;
/* Allow userspace to poison TCE table */
if
(
dir
==
DMA_NONE
)
return
H_SUCCESS
;
if
(
iommu_tce_check_gpa
(
stt
->
page_shift
,
gpa
))
return
H_PARAMETER
;
if
(
kvmppc_rm_tce_to_ua
(
stt
->
kvm
,
tce
,
&
ua
))
return
H_TOO_HARD
;
list_for_each_entry_lockless
(
stit
,
&
stt
->
iommu_tables
,
next
)
{
unsigned
long
hpa
=
0
;
struct
mm_iommu_table_group_mem_t
*
mem
;
long
shift
=
stit
->
tbl
->
it_page_shift
;
mem
=
mm_iommu_lookup_rm
(
stt
->
kvm
->
mm
,
ua
,
1ULL
<<
shift
);
if
(
!
mem
)
return
H_TOO_HARD
;
if
(
mm_iommu_ua_to_hpa_rm
(
mem
,
ua
,
shift
,
&
hpa
))
return
H_TOO_HARD
;
}
return
H_SUCCESS
;
}
/* Note on the use of page_address() in real mode,
*
* It is safe to use page_address() in real mode on ppc64 because
* page_address() is always defined as lowmem_page_address()
* which returns __va(PFN_PHYS(page_to_pfn(page))) which is arithmetic
* operation and does not access page struct.
*
* Theoretically page_address() could be defined different
* but either WANT_PAGE_VIRTUAL or HASHED_PAGE_VIRTUAL
* would have to be enabled.
* WANT_PAGE_VIRTUAL is never enabled on ppc32/ppc64,
* HASHED_PAGE_VIRTUAL could be enabled for ppc32 only and only
* if CONFIG_HIGHMEM is defined. As CONFIG_SPARSEMEM_VMEMMAP
* is not expected to be enabled on ppc32, page_address()
* is safe for ppc32 as well.
*
* WARNING: This will be called in real-mode on HV KVM and virtual
* mode on PR KVM
*/
static
u64
*
kvmppc_page_address
(
struct
page
*
page
)
{
#if defined(HASHED_PAGE_VIRTUAL) || defined(WANT_PAGE_VIRTUAL)
#error TODO: fix to avoid page_address() here
#endif
return
(
u64
*
)
page_address
(
page
);
}
/*
* Handles TCE requests for emulated devices.
* Puts guest TCE values to the table and expects user space to convert them.
* Cannot fail so kvmppc_rm_tce_validate must be called before it.
*/
static
void
kvmppc_rm_tce_put
(
struct
kvmppc_spapr_tce_table
*
stt
,
unsigned
long
idx
,
unsigned
long
tce
)
{
struct
page
*
page
;
u64
*
tbl
;
idx
-=
stt
->
offset
;
page
=
stt
->
pages
[
idx
/
TCES_PER_PAGE
];
/*
* kvmppc_rm_ioba_validate() allows pages not be allocated if TCE is
* being cleared, otherwise it returns H_TOO_HARD and we skip this.
*/
if
(
!
page
)
{
WARN_ON_ONCE_RM
(
tce
!=
0
);
return
;
}
tbl
=
kvmppc_page_address
(
page
);
tbl
[
idx
%
TCES_PER_PAGE
]
=
tce
;
}
/*
* TCEs pages are allocated in kvmppc_rm_tce_put() which won't be able to do so
* in real mode.
* Check if kvmppc_rm_tce_put() can succeed in real mode, i.e. a TCEs page is
* allocated or not required (when clearing a tce entry).
*/
static
long
kvmppc_rm_ioba_validate
(
struct
kvmppc_spapr_tce_table
*
stt
,
unsigned
long
ioba
,
unsigned
long
npages
,
bool
clearing
)
{
unsigned
long
i
,
idx
,
sttpage
,
sttpages
;
unsigned
long
ret
=
kvmppc_ioba_validate
(
stt
,
ioba
,
npages
);
if
(
ret
)
return
ret
;
/*
* clearing==true says kvmppc_rm_tce_put won't be allocating pages
* for empty tces.
*/
if
(
clearing
)
return
H_SUCCESS
;
idx
=
(
ioba
>>
stt
->
page_shift
)
-
stt
->
offset
;
sttpage
=
idx
/
TCES_PER_PAGE
;
sttpages
=
ALIGN
(
idx
%
TCES_PER_PAGE
+
npages
,
TCES_PER_PAGE
)
/
TCES_PER_PAGE
;
for
(
i
=
sttpage
;
i
<
sttpage
+
sttpages
;
++
i
)
if
(
!
stt
->
pages
[
i
])
return
H_TOO_HARD
;
return
H_SUCCESS
;
}
static
long
iommu_tce_xchg_no_kill_rm
(
struct
mm_struct
*
mm
,
struct
iommu_table
*
tbl
,
unsigned
long
entry
,
unsigned
long
*
hpa
,
enum
dma_data_direction
*
direction
)
{
long
ret
;
ret
=
tbl
->
it_ops
->
xchg_no_kill
(
tbl
,
entry
,
hpa
,
direction
,
true
);
if
(
!
ret
&&
((
*
direction
==
DMA_FROM_DEVICE
)
||
(
*
direction
==
DMA_BIDIRECTIONAL
)))
{
__be64
*
pua
=
IOMMU_TABLE_USERSPACE_ENTRY_RO
(
tbl
,
entry
);
/*
* kvmppc_rm_tce_iommu_do_map() updates the UA cache after
* calling this so we still get here a valid UA.
*/
if
(
pua
&&
*
pua
)
mm_iommu_ua_mark_dirty_rm
(
mm
,
be64_to_cpu
(
*
pua
));
}
return
ret
;
}
static
void
iommu_tce_kill_rm
(
struct
iommu_table
*
tbl
,
unsigned
long
entry
,
unsigned
long
pages
)
{
if
(
tbl
->
it_ops
->
tce_kill
)
tbl
->
it_ops
->
tce_kill
(
tbl
,
entry
,
pages
,
true
);
}
static
void
kvmppc_rm_clear_tce
(
struct
kvm
*
kvm
,
struct
kvmppc_spapr_tce_table
*
stt
,
struct
iommu_table
*
tbl
,
unsigned
long
entry
)
{
unsigned
long
i
;
unsigned
long
subpages
=
1ULL
<<
(
stt
->
page_shift
-
tbl
->
it_page_shift
);
unsigned
long
io_entry
=
entry
<<
(
stt
->
page_shift
-
tbl
->
it_page_shift
);
for
(
i
=
0
;
i
<
subpages
;
++
i
)
{
unsigned
long
hpa
=
0
;
enum
dma_data_direction
dir
=
DMA_NONE
;
iommu_tce_xchg_no_kill_rm
(
kvm
->
mm
,
tbl
,
io_entry
+
i
,
&
hpa
,
&
dir
);
}
}
static
long
kvmppc_rm_tce_iommu_mapped_dec
(
struct
kvm
*
kvm
,
struct
iommu_table
*
tbl
,
unsigned
long
entry
)
{
struct
mm_iommu_table_group_mem_t
*
mem
=
NULL
;
const
unsigned
long
pgsize
=
1ULL
<<
tbl
->
it_page_shift
;
__be64
*
pua
=
IOMMU_TABLE_USERSPACE_ENTRY_RO
(
tbl
,
entry
);
if
(
!
pua
)
/* it_userspace allocation might be delayed */
return
H_TOO_HARD
;
mem
=
mm_iommu_lookup_rm
(
kvm
->
mm
,
be64_to_cpu
(
*
pua
),
pgsize
);
if
(
!
mem
)
return
H_TOO_HARD
;
mm_iommu_mapped_dec
(
mem
);
*
pua
=
cpu_to_be64
(
0
);
return
H_SUCCESS
;
}
static
long
kvmppc_rm_tce_iommu_do_unmap
(
struct
kvm
*
kvm
,
struct
iommu_table
*
tbl
,
unsigned
long
entry
)
{
enum
dma_data_direction
dir
=
DMA_NONE
;
unsigned
long
hpa
=
0
;
long
ret
;
if
(
iommu_tce_xchg_no_kill_rm
(
kvm
->
mm
,
tbl
,
entry
,
&
hpa
,
&
dir
))
/*
* real mode xchg can fail if struct page crosses
* a page boundary
*/
return
H_TOO_HARD
;
if
(
dir
==
DMA_NONE
)
return
H_SUCCESS
;
ret
=
kvmppc_rm_tce_iommu_mapped_dec
(
kvm
,
tbl
,
entry
);
if
(
ret
)
iommu_tce_xchg_no_kill_rm
(
kvm
->
mm
,
tbl
,
entry
,
&
hpa
,
&
dir
);
return
ret
;
}
static
long
kvmppc_rm_tce_iommu_unmap
(
struct
kvm
*
kvm
,
struct
kvmppc_spapr_tce_table
*
stt
,
struct
iommu_table
*
tbl
,
unsigned
long
entry
)
{
unsigned
long
i
,
ret
=
H_SUCCESS
;
unsigned
long
subpages
=
1ULL
<<
(
stt
->
page_shift
-
tbl
->
it_page_shift
);
unsigned
long
io_entry
=
entry
*
subpages
;
for
(
i
=
0
;
i
<
subpages
;
++
i
)
{
ret
=
kvmppc_rm_tce_iommu_do_unmap
(
kvm
,
tbl
,
io_entry
+
i
);
if
(
ret
!=
H_SUCCESS
)
break
;
}
iommu_tce_kill_rm
(
tbl
,
io_entry
,
subpages
);
return
ret
;
}
static
long
kvmppc_rm_tce_iommu_do_map
(
struct
kvm
*
kvm
,
struct
iommu_table
*
tbl
,
unsigned
long
entry
,
unsigned
long
ua
,
enum
dma_data_direction
dir
)
{
long
ret
;
unsigned
long
hpa
=
0
;
__be64
*
pua
=
IOMMU_TABLE_USERSPACE_ENTRY_RO
(
tbl
,
entry
);
struct
mm_iommu_table_group_mem_t
*
mem
;
if
(
!
pua
)
/* it_userspace allocation might be delayed */
return
H_TOO_HARD
;
mem
=
mm_iommu_lookup_rm
(
kvm
->
mm
,
ua
,
1ULL
<<
tbl
->
it_page_shift
);
if
(
!
mem
)
return
H_TOO_HARD
;
if
(
WARN_ON_ONCE_RM
(
mm_iommu_ua_to_hpa_rm
(
mem
,
ua
,
tbl
->
it_page_shift
,
&
hpa
)))
return
H_TOO_HARD
;
if
(
WARN_ON_ONCE_RM
(
mm_iommu_mapped_inc
(
mem
)))
return
H_TOO_HARD
;
ret
=
iommu_tce_xchg_no_kill_rm
(
kvm
->
mm
,
tbl
,
entry
,
&
hpa
,
&
dir
);
if
(
ret
)
{
mm_iommu_mapped_dec
(
mem
);
/*
* real mode xchg can fail if struct page crosses
* a page boundary
*/
return
H_TOO_HARD
;
}
if
(
dir
!=
DMA_NONE
)
kvmppc_rm_tce_iommu_mapped_dec
(
kvm
,
tbl
,
entry
);
*
pua
=
cpu_to_be64
(
ua
);
return
0
;
}
static
long
kvmppc_rm_tce_iommu_map
(
struct
kvm
*
kvm
,
struct
kvmppc_spapr_tce_table
*
stt
,
struct
iommu_table
*
tbl
,
unsigned
long
entry
,
unsigned
long
ua
,
enum
dma_data_direction
dir
)
{
unsigned
long
i
,
pgoff
,
ret
=
H_SUCCESS
;
unsigned
long
subpages
=
1ULL
<<
(
stt
->
page_shift
-
tbl
->
it_page_shift
);
unsigned
long
io_entry
=
entry
*
subpages
;
for
(
i
=
0
,
pgoff
=
0
;
i
<
subpages
;
++
i
,
pgoff
+=
IOMMU_PAGE_SIZE
(
tbl
))
{
ret
=
kvmppc_rm_tce_iommu_do_map
(
kvm
,
tbl
,
io_entry
+
i
,
ua
+
pgoff
,
dir
);
if
(
ret
!=
H_SUCCESS
)
break
;
}
iommu_tce_kill_rm
(
tbl
,
io_entry
,
subpages
);
return
ret
;
}
long
kvmppc_rm_h_put_tce
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
liobn
,
unsigned
long
ioba
,
unsigned
long
tce
)
{
struct
kvmppc_spapr_tce_table
*
stt
;
long
ret
;
struct
kvmppc_spapr_tce_iommu_table
*
stit
;
unsigned
long
entry
,
ua
=
0
;
enum
dma_data_direction
dir
;
/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
/* liobn, ioba, tce); */
stt
=
kvmppc_find_table
(
vcpu
->
kvm
,
liobn
);
if
(
!
stt
)
return
H_TOO_HARD
;
ret
=
kvmppc_rm_ioba_validate
(
stt
,
ioba
,
1
,
tce
==
0
);
if
(
ret
!=
H_SUCCESS
)
return
ret
;
ret
=
kvmppc_rm_tce_validate
(
stt
,
tce
);
if
(
ret
!=
H_SUCCESS
)
return
ret
;
dir
=
iommu_tce_direction
(
tce
);
if
((
dir
!=
DMA_NONE
)
&&
kvmppc_rm_tce_to_ua
(
vcpu
->
kvm
,
tce
,
&
ua
))
return
H_PARAMETER
;
entry
=
ioba
>>
stt
->
page_shift
;
list_for_each_entry_lockless
(
stit
,
&
stt
->
iommu_tables
,
next
)
{
if
(
dir
==
DMA_NONE
)
ret
=
kvmppc_rm_tce_iommu_unmap
(
vcpu
->
kvm
,
stt
,
stit
->
tbl
,
entry
);
else
ret
=
kvmppc_rm_tce_iommu_map
(
vcpu
->
kvm
,
stt
,
stit
->
tbl
,
entry
,
ua
,
dir
);
if
(
ret
!=
H_SUCCESS
)
{
kvmppc_rm_clear_tce
(
vcpu
->
kvm
,
stt
,
stit
->
tbl
,
entry
);
return
ret
;
}
}
kvmppc_rm_tce_put
(
stt
,
entry
,
tce
);
return
H_SUCCESS
;
}
static
long
kvmppc_rm_ua_to_hpa
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
mmu_seq
,
unsigned
long
ua
,
unsigned
long
*
phpa
)
{
pte_t
*
ptep
,
pte
;
unsigned
shift
=
0
;
/*
* Called in real mode with MSR_EE = 0. We are safe here.
* It is ok to do the lookup with arch.pgdir here, because
* we are doing this on secondary cpus and current task there
* is not the hypervisor. Also this is safe against THP in the
* host, because an IPI to primary thread will wait for the secondary
* to exit which will again result in the below page table walk
* to finish.
*/
/* an rmap lock won't make it safe. because that just ensure hash
* page table entries are removed with rmap lock held. After that
* mmu notifier returns and we go ahead and removing ptes from Qemu page table.
*/
ptep
=
find_kvm_host_pte
(
vcpu
->
kvm
,
mmu_seq
,
ua
,
&
shift
);
if
(
!
ptep
)
return
-
ENXIO
;
pte
=
READ_ONCE
(
*
ptep
);
if
(
!
pte_present
(
pte
))
return
-
ENXIO
;
if
(
!
shift
)
shift
=
PAGE_SHIFT
;
/* Avoid handling anything potentially complicated in realmode */
if
(
shift
>
PAGE_SHIFT
)
return
-
EAGAIN
;
if
(
!
pte_young
(
pte
))
return
-
EAGAIN
;
*
phpa
=
(
pte_pfn
(
pte
)
<<
PAGE_SHIFT
)
|
(
ua
&
((
1ULL
<<
shift
)
-
1
))
|
(
ua
&
~
PAGE_MASK
);
return
0
;
}
long
kvmppc_rm_h_put_tce_indirect
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
liobn
,
unsigned
long
ioba
,
unsigned
long
tce_list
,
unsigned
long
npages
)
{
struct
kvm
*
kvm
=
vcpu
->
kvm
;
struct
kvmppc_spapr_tce_table
*
stt
;
long
i
,
ret
=
H_SUCCESS
;
unsigned
long
tces
,
entry
,
ua
=
0
;
unsigned
long
mmu_seq
;
bool
prereg
=
false
;
struct
kvmppc_spapr_tce_iommu_table
*
stit
;
/*
* used to check for invalidations in progress
*/
mmu_seq
=
kvm
->
mmu_notifier_seq
;
smp_rmb
();
stt
=
kvmppc_find_table
(
vcpu
->
kvm
,
liobn
);
if
(
!
stt
)
return
H_TOO_HARD
;
entry
=
ioba
>>
stt
->
page_shift
;
/*
* The spec says that the maximum size of the list is 512 TCEs
* so the whole table addressed resides in 4K page
*/
if
(
npages
>
512
)
return
H_PARAMETER
;
if
(
tce_list
&
(
SZ_4K
-
1
))
return
H_PARAMETER
;
ret
=
kvmppc_rm_ioba_validate
(
stt
,
ioba
,
npages
,
false
);
if
(
ret
!=
H_SUCCESS
)
return
ret
;
if
(
mm_iommu_preregistered
(
vcpu
->
kvm
->
mm
))
{
/*
* We get here if guest memory was pre-registered which
* is normally VFIO case and gpa->hpa translation does not
* depend on hpt.
*/
struct
mm_iommu_table_group_mem_t
*
mem
;
if
(
kvmppc_rm_tce_to_ua
(
vcpu
->
kvm
,
tce_list
,
&
ua
))
return
H_TOO_HARD
;
mem
=
mm_iommu_lookup_rm
(
vcpu
->
kvm
->
mm
,
ua
,
IOMMU_PAGE_SIZE_4K
);
if
(
mem
)
prereg
=
mm_iommu_ua_to_hpa_rm
(
mem
,
ua
,
IOMMU_PAGE_SHIFT_4K
,
&
tces
)
==
0
;
}
if
(
!
prereg
)
{
/*
* This is usually a case of a guest with emulated devices only
* when TCE list is not in preregistered memory.
* We do not require memory to be preregistered in this case
* so lock rmap and do __find_linux_pte_or_hugepte().
*/
if
(
kvmppc_rm_tce_to_ua
(
vcpu
->
kvm
,
tce_list
,
&
ua
))
return
H_TOO_HARD
;
arch_spin_lock
(
&
kvm
->
mmu_lock
.
rlock
.
raw_lock
);
if
(
kvmppc_rm_ua_to_hpa
(
vcpu
,
mmu_seq
,
ua
,
&
tces
))
{
ret
=
H_TOO_HARD
;
goto
unlock_exit
;
}
}
for
(
i
=
0
;
i
<
npages
;
++
i
)
{
unsigned
long
tce
=
be64_to_cpu
(((
u64
*
)
tces
)[
i
]);
ret
=
kvmppc_rm_tce_validate
(
stt
,
tce
);
if
(
ret
!=
H_SUCCESS
)
goto
unlock_exit
;
}
for
(
i
=
0
;
i
<
npages
;
++
i
)
{
unsigned
long
tce
=
be64_to_cpu
(((
u64
*
)
tces
)[
i
]);
ua
=
0
;
if
(
kvmppc_rm_tce_to_ua
(
vcpu
->
kvm
,
tce
,
&
ua
))
{
ret
=
H_PARAMETER
;
goto
unlock_exit
;
}
list_for_each_entry_lockless
(
stit
,
&
stt
->
iommu_tables
,
next
)
{
ret
=
kvmppc_rm_tce_iommu_map
(
vcpu
->
kvm
,
stt
,
stit
->
tbl
,
entry
+
i
,
ua
,
iommu_tce_direction
(
tce
));
if
(
ret
!=
H_SUCCESS
)
{
kvmppc_rm_clear_tce
(
vcpu
->
kvm
,
stt
,
stit
->
tbl
,
entry
+
i
);
goto
unlock_exit
;
}
}
kvmppc_rm_tce_put
(
stt
,
entry
+
i
,
tce
);
}
unlock_exit:
if
(
!
prereg
)
arch_spin_unlock
(
&
kvm
->
mmu_lock
.
rlock
.
raw_lock
);
return
ret
;
}
long
kvmppc_rm_h_stuff_tce
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
liobn
,
unsigned
long
ioba
,
unsigned
long
tce_value
,
unsigned
long
npages
)
{
struct
kvmppc_spapr_tce_table
*
stt
;
long
i
,
ret
;
struct
kvmppc_spapr_tce_iommu_table
*
stit
;
stt
=
kvmppc_find_table
(
vcpu
->
kvm
,
liobn
);
if
(
!
stt
)
return
H_TOO_HARD
;
ret
=
kvmppc_rm_ioba_validate
(
stt
,
ioba
,
npages
,
tce_value
==
0
);
if
(
ret
!=
H_SUCCESS
)
return
ret
;
/* Check permission bits only to allow userspace poison TCE for debug */
if
(
tce_value
&
(
TCE_PCI_WRITE
|
TCE_PCI_READ
))
return
H_PARAMETER
;
list_for_each_entry_lockless
(
stit
,
&
stt
->
iommu_tables
,
next
)
{
unsigned
long
entry
=
ioba
>>
stt
->
page_shift
;
for
(
i
=
0
;
i
<
npages
;
++
i
)
{
ret
=
kvmppc_rm_tce_iommu_unmap
(
vcpu
->
kvm
,
stt
,
stit
->
tbl
,
entry
+
i
);
if
(
ret
==
H_SUCCESS
)
continue
;
if
(
ret
==
H_TOO_HARD
)
return
ret
;
WARN_ON_ONCE_RM
(
1
);
kvmppc_rm_clear_tce
(
vcpu
->
kvm
,
stt
,
stit
->
tbl
,
entry
+
i
);
}
}
for
(
i
=
0
;
i
<
npages
;
++
i
,
ioba
+=
(
1ULL
<<
stt
->
page_shift
))
kvmppc_rm_tce_put
(
stt
,
ioba
>>
stt
->
page_shift
,
tce_value
);
return
ret
;
}
/* This can be called in either virtual mode or real mode */
long
kvmppc_h_get_tce
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
liobn
,
unsigned
long
ioba
)
{
struct
kvmppc_spapr_tce_table
*
stt
;
long
ret
;
unsigned
long
idx
;
struct
page
*
page
;
u64
*
tbl
;
stt
=
kvmppc_find_table
(
vcpu
->
kvm
,
liobn
);
if
(
!
stt
)
return
H_TOO_HARD
;
ret
=
kvmppc_ioba_validate
(
stt
,
ioba
,
1
);
if
(
ret
!=
H_SUCCESS
)
return
ret
;
idx
=
(
ioba
>>
stt
->
page_shift
)
-
stt
->
offset
;
page
=
stt
->
pages
[
idx
/
TCES_PER_PAGE
];
if
(
!
page
)
{
vcpu
->
arch
.
regs
.
gpr
[
4
]
=
0
;
return
H_SUCCESS
;
}
tbl
=
(
u64
*
)
page_address
(
page
);
vcpu
->
arch
.
regs
.
gpr
[
4
]
=
tbl
[
idx
%
TCES_PER_PAGE
];
return
H_SUCCESS
;
}
EXPORT_SYMBOL_GPL
(
kvmppc_h_get_tce
);
#endif
/* KVM_BOOK3S_HV_POSSIBLE */
arch/powerpc/kvm/book3s_hv.c
浏览文件 @
b104e41c
...
...
@@ -1327,6 +1327,12 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
case
H_CONFER
:
case
H_REGISTER_VPA
:
case
H_SET_MODE
:
#ifdef CONFIG_SPAPR_TCE_IOMMU
case
H_GET_TCE
:
case
H_PUT_TCE
:
case
H_PUT_TCE_INDIRECT
:
case
H_STUFF_TCE
:
#endif
case
H_LOGICAL_CI_LOAD
:
case
H_LOGICAL_CI_STORE
:
#ifdef CONFIG_KVM_XICS
...
...
@@ -2835,7 +2841,7 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
* to trap and then we emulate them.
*/
vcpu
->
arch
.
hfscr
=
HFSCR_TAR
|
HFSCR_EBB
|
HFSCR_PM
|
HFSCR_BHRB
|
HFSCR_DSCR
|
HFSCR_VECVSX
|
HFSCR_FP
|
HFSCR_PREFIX
;
HFSCR_DSCR
|
HFSCR_VECVSX
|
HFSCR_FP
;
if
(
cpu_has_feature
(
CPU_FTR_HVMODE
))
{
vcpu
->
arch
.
hfscr
&=
mfspr
(
SPRN_HFSCR
);
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
...
...
@@ -3968,6 +3974,7 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, uns
kvmhv_save_hv_regs
(
vcpu
,
&
hvregs
);
hvregs
.
lpcr
=
lpcr
;
hvregs
.
amor
=
~
0
;
vcpu
->
arch
.
regs
.
msr
=
vcpu
->
arch
.
shregs
.
msr
;
hvregs
.
version
=
HV_GUEST_STATE_VERSION
;
if
(
vcpu
->
arch
.
nested
)
{
...
...
@@ -4030,6 +4037,8 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, uns
static
int
kvmhv_p9_guest_entry
(
struct
kvm_vcpu
*
vcpu
,
u64
time_limit
,
unsigned
long
lpcr
,
u64
*
tb
)
{
struct
kvm
*
kvm
=
vcpu
->
kvm
;
struct
kvm_nested_guest
*
nested
=
vcpu
->
arch
.
nested
;
u64
next_timer
;
int
trap
;
...
...
@@ -4049,34 +4058,61 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
trap
=
kvmhv_vcpu_entry_p9_nested
(
vcpu
,
time_limit
,
lpcr
,
tb
);
/* H_CEDE has to be handled now, not later */
if
(
trap
==
BOOK3S_INTERRUPT_SYSCALL
&&
!
vcpu
->
arch
.
nested
&&
if
(
trap
==
BOOK3S_INTERRUPT_SYSCALL
&&
!
nested
&&
kvmppc_get_gpr
(
vcpu
,
3
)
==
H_CEDE
)
{
kvmppc_cede
(
vcpu
);
kvmppc_set_gpr
(
vcpu
,
3
,
0
);
trap
=
0
;
}
}
else
{
struct
kvm
*
kvm
=
vcpu
->
kvm
;
}
else
if
(
nested
)
{
__this_cpu_write
(
cpu_in_guest
,
kvm
);
trap
=
kvmhv_vcpu_entry_p9
(
vcpu
,
time_limit
,
lpcr
,
tb
);
__this_cpu_write
(
cpu_in_guest
,
NULL
);
}
else
{
kvmppc_xive_push_vcpu
(
vcpu
);
__this_cpu_write
(
cpu_in_guest
,
kvm
);
trap
=
kvmhv_vcpu_entry_p9
(
vcpu
,
time_limit
,
lpcr
,
tb
);
__this_cpu_write
(
cpu_in_guest
,
NULL
);
if
(
trap
==
BOOK3S_INTERRUPT_SYSCALL
&&
!
vcpu
->
arch
.
nested
&&
if
(
trap
==
BOOK3S_INTERRUPT_SYSCALL
&&
!
(
vcpu
->
arch
.
shregs
.
msr
&
MSR_PR
))
{
unsigned
long
req
=
kvmppc_get_gpr
(
vcpu
,
3
);
/* H_CEDE has to be handled now, not later */
/*
* XIVE rearm and XICS hcalls must be handled
* before xive context is pulled (is this
* true?)
*/
if
(
req
==
H_CEDE
)
{
/* H_CEDE has to be handled now */
kvmppc_cede
(
vcpu
);
kvmppc_xive_rearm_escalation
(
vcpu
);
/* may un-cede */
if
(
!
kvmppc_xive_rearm_escalation
(
vcpu
))
{
/*
* Pending escalation so abort
* the cede.
*/
vcpu
->
arch
.
ceded
=
0
;
}
kvmppc_set_gpr
(
vcpu
,
3
,
0
);
trap
=
0
;
}
else
if
(
req
==
H_ENTER_NESTED
)
{
/*
* L2 should not run with the L1
* context so rearm and pull it.
*/
if
(
!
kvmppc_xive_rearm_escalation
(
vcpu
))
{
/*
* Pending escalation so abort
* H_ENTER_NESTED.
*/
kvmppc_set_gpr
(
vcpu
,
3
,
0
);
trap
=
0
;
}
/* XICS hcalls must be handled before xive is pulled */
}
else
if
(
hcall_is_xics
(
req
))
{
int
ret
;
...
...
@@ -4234,13 +4270,13 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
start_wait
=
ktime_get
();
vc
->
vcore_state
=
VCORE_SLEEPING
;
trace_kvmppc_vcore_blocked
(
vc
,
0
);
trace_kvmppc_vcore_blocked
(
vc
->
runner
,
0
);
spin_unlock
(
&
vc
->
lock
);
schedule
();
finish_rcuwait
(
&
vc
->
wait
);
spin_lock
(
&
vc
->
lock
);
vc
->
vcore_state
=
VCORE_INACTIVE
;
trace_kvmppc_vcore_blocked
(
vc
,
1
);
trace_kvmppc_vcore_blocked
(
vc
->
runner
,
1
);
++
vc
->
runner
->
stat
.
halt_successful_wait
;
cur
=
ktime_get
();
...
...
@@ -4520,9 +4556,14 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
if
(
!
nested
)
{
kvmppc_core_prepare_to_enter
(
vcpu
);
if
(
test_bit
(
BOOK3S_IRQPRIO_EXTERNAL
,
&
vcpu
->
arch
.
pending_exceptions
))
if
(
vcpu
->
arch
.
shregs
.
msr
&
MSR_EE
)
{
if
(
xive_interrupt_pending
(
vcpu
))
kvmppc_inject_interrupt_hv
(
vcpu
,
BOOK3S_INTERRUPT_EXTERNAL
,
0
);
}
else
if
(
test_bit
(
BOOK3S_IRQPRIO_EXTERNAL
,
&
vcpu
->
arch
.
pending_exceptions
))
{
lpcr
|=
LPCR_MER
;
}
}
else
if
(
vcpu
->
arch
.
pending_exceptions
||
vcpu
->
arch
.
doorbell_request
||
xive_interrupt_pending
(
vcpu
))
{
...
...
@@ -4620,9 +4661,9 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
if
(
kvmppc_vcpu_check_block
(
vcpu
))
break
;
trace_kvmppc_vcore_blocked
(
vc
,
0
);
trace_kvmppc_vcore_blocked
(
vc
pu
,
0
);
schedule
();
trace_kvmppc_vcore_blocked
(
vc
,
1
);
trace_kvmppc_vcore_blocked
(
vc
pu
,
1
);
}
finish_rcuwait
(
wait
);
}
...
...
@@ -5284,6 +5325,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
kvm
->
arch
.
host_lpcr
=
lpcr
=
mfspr
(
SPRN_LPCR
);
lpcr
&=
LPCR_PECE
|
LPCR_LPES
;
}
else
{
/*
* The L2 LPES mode will be set by the L0 according to whether
* or not it needs to take external interrupts in HV mode.
*/
lpcr
=
0
;
}
lpcr
|=
(
4UL
<<
LPCR_DPFD_SH
)
|
LPCR_HDICE
|
...
...
arch/powerpc/kvm/book3s_hv_builtin.c
浏览文件 @
b104e41c
...
...
@@ -489,70 +489,6 @@ static long kvmppc_read_one_intr(bool *again)
return
kvmppc_check_passthru
(
xisr
,
xirr
,
again
);
}
#ifdef CONFIG_KVM_XICS
unsigned
long
kvmppc_rm_h_xirr
(
struct
kvm_vcpu
*
vcpu
)
{
if
(
!
kvmppc_xics_enabled
(
vcpu
))
return
H_TOO_HARD
;
if
(
xics_on_xive
())
return
xive_rm_h_xirr
(
vcpu
);
else
return
xics_rm_h_xirr
(
vcpu
);
}
unsigned
long
kvmppc_rm_h_xirr_x
(
struct
kvm_vcpu
*
vcpu
)
{
if
(
!
kvmppc_xics_enabled
(
vcpu
))
return
H_TOO_HARD
;
vcpu
->
arch
.
regs
.
gpr
[
5
]
=
get_tb
();
if
(
xics_on_xive
())
return
xive_rm_h_xirr
(
vcpu
);
else
return
xics_rm_h_xirr
(
vcpu
);
}
unsigned
long
kvmppc_rm_h_ipoll
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
server
)
{
if
(
!
kvmppc_xics_enabled
(
vcpu
))
return
H_TOO_HARD
;
if
(
xics_on_xive
())
return
xive_rm_h_ipoll
(
vcpu
,
server
);
else
return
H_TOO_HARD
;
}
int
kvmppc_rm_h_ipi
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
server
,
unsigned
long
mfrr
)
{
if
(
!
kvmppc_xics_enabled
(
vcpu
))
return
H_TOO_HARD
;
if
(
xics_on_xive
())
return
xive_rm_h_ipi
(
vcpu
,
server
,
mfrr
);
else
return
xics_rm_h_ipi
(
vcpu
,
server
,
mfrr
);
}
int
kvmppc_rm_h_cppr
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
cppr
)
{
if
(
!
kvmppc_xics_enabled
(
vcpu
))
return
H_TOO_HARD
;
if
(
xics_on_xive
())
return
xive_rm_h_cppr
(
vcpu
,
cppr
);
else
return
xics_rm_h_cppr
(
vcpu
,
cppr
);
}
int
kvmppc_rm_h_eoi
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
xirr
)
{
if
(
!
kvmppc_xics_enabled
(
vcpu
))
return
H_TOO_HARD
;
if
(
xics_on_xive
())
return
xive_rm_h_eoi
(
vcpu
,
xirr
);
else
return
xics_rm_h_eoi
(
vcpu
,
xirr
);
}
#endif
/* CONFIG_KVM_XICS */
void
kvmppc_bad_interrupt
(
struct
pt_regs
*
regs
)
{
/*
...
...
arch/powerpc/kvm/book3s_hv_nested.c
浏览文件 @
b104e41c
...
...
@@ -261,8 +261,7 @@ static void load_l2_hv_regs(struct kvm_vcpu *vcpu,
/*
* Don't let L1 change LPCR bits for the L2 except these:
*/
mask
=
LPCR_DPFD
|
LPCR_ILE
|
LPCR_TC
|
LPCR_AIL
|
LPCR_LD
|
LPCR_LPES
|
LPCR_MER
;
mask
=
LPCR_DPFD
|
LPCR_ILE
|
LPCR_TC
|
LPCR_AIL
|
LPCR_LD
|
LPCR_MER
;
/*
* Additional filtering is required depending on hardware
...
...
@@ -439,10 +438,11 @@ long kvmhv_nested_init(void)
if
(
!
radix_enabled
())
return
-
ENODEV
;
/* find log base 2 of KVMPPC_NR_LPIDS, rounding up */
ptb_order
=
__ilog2
(
KVMPPC_NR_LPIDS
-
1
)
+
1
;
if
(
ptb_order
<
8
)
ptb_order
=
8
;
/* Partition table entry is 1<<4 bytes in size, hence the 4. */
ptb_order
=
KVM_MAX_NESTED_GUESTS_SHIFT
+
4
;
/* Minimum partition table size is 1<<12 bytes */
if
(
ptb_order
<
12
)
ptb_order
=
12
;
pseries_partition_tb
=
kmalloc
(
sizeof
(
struct
patb_entry
)
<<
ptb_order
,
GFP_KERNEL
);
if
(
!
pseries_partition_tb
)
{
...
...
@@ -450,7 +450,7 @@ long kvmhv_nested_init(void)
return
-
ENOMEM
;
}
ptcr
=
__pa
(
pseries_partition_tb
)
|
(
ptb_order
-
8
);
ptcr
=
__pa
(
pseries_partition_tb
)
|
(
ptb_order
-
12
);
rc
=
plpar_hcall_norets
(
H_SET_PARTITION_TABLE
,
ptcr
);
if
(
rc
!=
H_SUCCESS
)
{
pr_err
(
"kvm-hv: Parent hypervisor does not support nesting (rc=%ld)
\n
"
,
...
...
@@ -521,11 +521,6 @@ static void kvmhv_set_nested_ptbl(struct kvm_nested_guest *gp)
kvmhv_set_ptbl_entry
(
gp
->
shadow_lpid
,
dw0
,
gp
->
process_table
);
}
void
kvmhv_vm_nested_init
(
struct
kvm
*
kvm
)
{
kvm
->
arch
.
max_nested_lpid
=
-
1
;
}
/*
* Handle the H_SET_PARTITION_TABLE hcall.
* r4 = guest real address of partition table + log_2(size) - 12
...
...
@@ -539,16 +534,14 @@ long kvmhv_set_partition_table(struct kvm_vcpu *vcpu)
long
ret
=
H_SUCCESS
;
srcu_idx
=
srcu_read_lock
(
&
kvm
->
srcu
);
/*
* Limit the partition table to 4096 entries (because that's what
* hardware supports), and check the base address.
*/
if
((
ptcr
&
PRTS_MASK
)
>
12
-
8
||
/* Check partition size and base address. */
if
((
ptcr
&
PRTS_MASK
)
+
12
-
4
>
KVM_MAX_NESTED_GUESTS_SHIFT
||
!
kvm_is_visible_gfn
(
vcpu
->
kvm
,
(
ptcr
&
PRTB_MASK
)
>>
PAGE_SHIFT
))
ret
=
H_PARAMETER
;
srcu_read_unlock
(
&
kvm
->
srcu
,
srcu_idx
);
if
(
ret
==
H_SUCCESS
)
kvm
->
arch
.
l1_ptcr
=
ptcr
;
return
ret
;
}
...
...
@@ -644,7 +637,7 @@ static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp)
ret
=
-
EFAULT
;
ptbl_addr
=
(
kvm
->
arch
.
l1_ptcr
&
PRTB_MASK
)
+
(
gp
->
l1_lpid
<<
4
);
if
(
gp
->
l1_lpid
<
(
1ul
<<
((
kvm
->
arch
.
l1_ptcr
&
PRTS_MASK
)
+
8
)))
{
if
(
gp
->
l1_lpid
<
(
1ul
<<
((
kvm
->
arch
.
l1_ptcr
&
PRTS_MASK
)
+
12
-
4
)))
{
int
srcu_idx
=
srcu_read_lock
(
&
kvm
->
srcu
);
ret
=
kvm_read_guest
(
kvm
,
ptbl_addr
,
&
ptbl_entry
,
sizeof
(
ptbl_entry
));
...
...
@@ -660,6 +653,35 @@ static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp)
kvmhv_set_nested_ptbl
(
gp
);
}
void
kvmhv_vm_nested_init
(
struct
kvm
*
kvm
)
{
idr_init
(
&
kvm
->
arch
.
kvm_nested_guest_idr
);
}
static
struct
kvm_nested_guest
*
__find_nested
(
struct
kvm
*
kvm
,
int
lpid
)
{
return
idr_find
(
&
kvm
->
arch
.
kvm_nested_guest_idr
,
lpid
);
}
static
bool
__prealloc_nested
(
struct
kvm
*
kvm
,
int
lpid
)
{
if
(
idr_alloc
(
&
kvm
->
arch
.
kvm_nested_guest_idr
,
NULL
,
lpid
,
lpid
+
1
,
GFP_KERNEL
)
!=
lpid
)
return
false
;
return
true
;
}
static
void
__add_nested
(
struct
kvm
*
kvm
,
int
lpid
,
struct
kvm_nested_guest
*
gp
)
{
if
(
idr_replace
(
&
kvm
->
arch
.
kvm_nested_guest_idr
,
gp
,
lpid
))
WARN_ON
(
1
);
}
static
void
__remove_nested
(
struct
kvm
*
kvm
,
int
lpid
)
{
idr_remove
(
&
kvm
->
arch
.
kvm_nested_guest_idr
,
lpid
);
}
static
struct
kvm_nested_guest
*
kvmhv_alloc_nested
(
struct
kvm
*
kvm
,
unsigned
int
lpid
)
{
struct
kvm_nested_guest
*
gp
;
...
...
@@ -720,13 +742,8 @@ static void kvmhv_remove_nested(struct kvm_nested_guest *gp)
long
ref
;
spin_lock
(
&
kvm
->
mmu_lock
);
if
(
gp
==
kvm
->
arch
.
nested_guests
[
lpid
])
{
kvm
->
arch
.
nested_guests
[
lpid
]
=
NULL
;
if
(
lpid
==
kvm
->
arch
.
max_nested_lpid
)
{
while
(
--
lpid
>=
0
&&
!
kvm
->
arch
.
nested_guests
[
lpid
])
;
kvm
->
arch
.
max_nested_lpid
=
lpid
;
}
if
(
gp
==
__find_nested
(
kvm
,
lpid
))
{
__remove_nested
(
kvm
,
lpid
);
--
gp
->
refcnt
;
}
ref
=
gp
->
refcnt
;
...
...
@@ -743,24 +760,22 @@ static void kvmhv_remove_nested(struct kvm_nested_guest *gp)
*/
void
kvmhv_release_all_nested
(
struct
kvm
*
kvm
)
{
int
i
;
int
lpid
;
struct
kvm_nested_guest
*
gp
;
struct
kvm_nested_guest
*
freelist
=
NULL
;
struct
kvm_memory_slot
*
memslot
;
int
srcu_idx
,
bkt
;
spin_lock
(
&
kvm
->
mmu_lock
);
for
(
i
=
0
;
i
<=
kvm
->
arch
.
max_nested_lpid
;
i
++
)
{
gp
=
kvm
->
arch
.
nested_guests
[
i
];
if
(
!
gp
)
continue
;
kvm
->
arch
.
nested_guests
[
i
]
=
NULL
;
idr_for_each_entry
(
&
kvm
->
arch
.
kvm_nested_guest_idr
,
gp
,
lpid
)
{
__remove_nested
(
kvm
,
lpid
);
if
(
--
gp
->
refcnt
==
0
)
{
gp
->
next
=
freelist
;
freelist
=
gp
;
}
}
kvm
->
arch
.
max_nested_lpid
=
-
1
;
idr_destroy
(
&
kvm
->
arch
.
kvm_nested_guest_idr
);
/* idr is empty and may be reused at this point */
spin_unlock
(
&
kvm
->
mmu_lock
);
while
((
gp
=
freelist
)
!=
NULL
)
{
freelist
=
gp
->
next
;
...
...
@@ -792,12 +807,11 @@ struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
{
struct
kvm_nested_guest
*
gp
,
*
newgp
;
if
(
l1_lpid
>=
KVM_MAX_NESTED_GUESTS
||
l1_lpid
>=
(
1ul
<<
((
kvm
->
arch
.
l1_ptcr
&
PRTS_MASK
)
+
12
-
4
)))
if
(
l1_lpid
>=
(
1ul
<<
((
kvm
->
arch
.
l1_ptcr
&
PRTS_MASK
)
+
12
-
4
)))
return
NULL
;
spin_lock
(
&
kvm
->
mmu_lock
);
gp
=
kvm
->
arch
.
nested_guests
[
l1_lpid
]
;
gp
=
__find_nested
(
kvm
,
l1_lpid
)
;
if
(
gp
)
++
gp
->
refcnt
;
spin_unlock
(
&
kvm
->
mmu_lock
);
...
...
@@ -808,17 +822,19 @@ struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
newgp
=
kvmhv_alloc_nested
(
kvm
,
l1_lpid
);
if
(
!
newgp
)
return
NULL
;
if
(
!
__prealloc_nested
(
kvm
,
l1_lpid
))
{
kvmhv_release_nested
(
newgp
);
return
NULL
;
}
spin_lock
(
&
kvm
->
mmu_lock
);
if
(
kvm
->
arch
.
nested_guests
[
l1_lpid
])
{
/* someone else beat us to it */
gp
=
kvm
->
arch
.
nested_guests
[
l1_lpid
];
}
else
{
kvm
->
arch
.
nested_guests
[
l1_lpid
]
=
newgp
;
gp
=
__find_nested
(
kvm
,
l1_lpid
);
if
(
!
gp
)
{
__add_nested
(
kvm
,
l1_lpid
,
newgp
);
++
newgp
->
refcnt
;
gp
=
newgp
;
newgp
=
NULL
;
if
(
l1_lpid
>
kvm
->
arch
.
max_nested_lpid
)
kvm
->
arch
.
max_nested_lpid
=
l1_lpid
;
}
++
gp
->
refcnt
;
spin_unlock
(
&
kvm
->
mmu_lock
);
...
...
@@ -841,20 +857,13 @@ void kvmhv_put_nested(struct kvm_nested_guest *gp)
kvmhv_release_nested
(
gp
);
}
static
struct
kvm_nested_guest
*
kvmhv_find_nested
(
struct
kvm
*
kvm
,
int
lpid
)
{
if
(
lpid
>
kvm
->
arch
.
max_nested_lpid
)
return
NULL
;
return
kvm
->
arch
.
nested_guests
[
lpid
];
}
pte_t
*
find_kvm_nested_guest_pte
(
struct
kvm
*
kvm
,
unsigned
long
lpid
,
unsigned
long
ea
,
unsigned
*
hshift
)
{
struct
kvm_nested_guest
*
gp
;
pte_t
*
pte
;
gp
=
kvmhv
_find_nested
(
kvm
,
lpid
);
gp
=
_
_find_nested
(
kvm
,
lpid
);
if
(
!
gp
)
return
NULL
;
...
...
@@ -960,7 +969,7 @@ static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap,
gpa
=
n_rmap
&
RMAP_NESTED_GPA_MASK
;
lpid
=
(
n_rmap
&
RMAP_NESTED_LPID_MASK
)
>>
RMAP_NESTED_LPID_SHIFT
;
gp
=
kvmhv
_find_nested
(
kvm
,
lpid
);
gp
=
_
_find_nested
(
kvm
,
lpid
);
if
(
!
gp
)
return
;
...
...
@@ -1152,17 +1161,14 @@ static void kvmhv_emulate_tlbie_all_lpid(struct kvm_vcpu *vcpu, int ric)
{
struct
kvm
*
kvm
=
vcpu
->
kvm
;
struct
kvm_nested_guest
*
gp
;
int
i
;
int
lpid
;
spin_lock
(
&
kvm
->
mmu_lock
);
for
(
i
=
0
;
i
<=
kvm
->
arch
.
max_nested_lpid
;
i
++
)
{
gp
=
kvm
->
arch
.
nested_guests
[
i
];
if
(
gp
)
{
idr_for_each_entry
(
&
kvm
->
arch
.
kvm_nested_guest_idr
,
gp
,
lpid
)
{
spin_unlock
(
&
kvm
->
mmu_lock
);
kvmhv_emulate_tlbie_lpid
(
vcpu
,
gp
,
ric
);
spin_lock
(
&
kvm
->
mmu_lock
);
}
}
spin_unlock
(
&
kvm
->
mmu_lock
);
}
...
...
@@ -1313,7 +1319,7 @@ long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid,
* H_ENTER_NESTED call. Since we can't differentiate this case from
* the invalid case, we ignore such flush requests and return success.
*/
if
(
!
kvmhv
_find_nested
(
vcpu
->
kvm
,
lpid
))
if
(
!
_
_find_nested
(
vcpu
->
kvm
,
lpid
))
return
H_SUCCESS
;
/*
...
...
@@ -1657,15 +1663,12 @@ long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu)
int
kvmhv_nested_next_lpid
(
struct
kvm
*
kvm
,
int
lpid
)
{
int
ret
=
-
1
;
int
ret
=
lpid
+
1
;
spin_lock
(
&
kvm
->
mmu_lock
);
while
(
++
lpid
<=
kvm
->
arch
.
max_nested_lpid
)
{
if
(
kvm
->
arch
.
nested_guests
[
lpid
])
{
ret
=
lpid
;
break
;
}
}
if
(
!
idr_get_next
(
&
kvm
->
arch
.
kvm_nested_guest_idr
,
&
ret
))
ret
=
-
1
;
spin_unlock
(
&
kvm
->
mmu_lock
);
return
ret
;
}
arch/powerpc/kvm/book3s_hv_p9_entry.c
浏览文件 @
b104e41c
...
...
@@ -539,8 +539,10 @@ static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u6
{
struct
kvm_nested_guest
*
nested
=
vcpu
->
arch
.
nested
;
u32
lpid
;
u32
pid
;
lpid
=
nested
?
nested
->
shadow_lpid
:
kvm
->
arch
.
lpid
;
pid
=
vcpu
->
arch
.
pid
;
/*
* Prior memory accesses to host PID Q3 must be completed before we
...
...
@@ -551,7 +553,7 @@ static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u6
isync
();
mtspr
(
SPRN_LPID
,
lpid
);
mtspr
(
SPRN_LPCR
,
lpcr
);
mtspr
(
SPRN_PID
,
vcpu
->
arch
.
pid
);
mtspr
(
SPRN_PID
,
pid
);
/*
* isync not required here because we are HRFID'ing to guest before
* any guest context access, which is context synchronising.
...
...
@@ -561,9 +563,11 @@ static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u6
static
void
switch_mmu_to_guest_hpt
(
struct
kvm
*
kvm
,
struct
kvm_vcpu
*
vcpu
,
u64
lpcr
)
{
u32
lpid
;
u32
pid
;
int
i
;
lpid
=
kvm
->
arch
.
lpid
;
pid
=
vcpu
->
arch
.
pid
;
/*
* See switch_mmu_to_guest_radix. ptesync should not be required here
...
...
@@ -574,7 +578,7 @@ static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64
isync
();
mtspr
(
SPRN_LPID
,
lpid
);
mtspr
(
SPRN_LPCR
,
lpcr
);
mtspr
(
SPRN_PID
,
vcpu
->
arch
.
pid
);
mtspr
(
SPRN_PID
,
pid
);
for
(
i
=
0
;
i
<
vcpu
->
arch
.
slb_max
;
i
++
)
mtslb
(
vcpu
->
arch
.
slb
[
i
].
orige
,
vcpu
->
arch
.
slb
[
i
].
origv
);
...
...
@@ -585,6 +589,9 @@ static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64
static
void
switch_mmu_to_host
(
struct
kvm
*
kvm
,
u32
pid
)
{
u32
lpid
=
kvm
->
arch
.
host_lpid
;
u64
lpcr
=
kvm
->
arch
.
host_lpcr
;
/*
* The guest has exited, so guest MMU context is no longer being
* non-speculatively accessed, but a hwsync is needed before the
...
...
@@ -594,8 +601,8 @@ static void switch_mmu_to_host(struct kvm *kvm, u32 pid)
asm
volatile
(
"hwsync"
:::
"memory"
);
isync
();
mtspr
(
SPRN_PID
,
pid
);
mtspr
(
SPRN_LPID
,
kvm
->
arch
.
host_
lpid
);
mtspr
(
SPRN_LPCR
,
kvm
->
arch
.
host_
lpcr
);
mtspr
(
SPRN_LPID
,
lpid
);
mtspr
(
SPRN_LPCR
,
lpcr
);
/*
* isync is not required after the switch, because mtmsrd with L=0
* is performed after this switch, which is context synchronising.
...
...
arch/powerpc/kvm/book3s_hv_rm_xics.c
浏览文件 @
b104e41c
...
...
@@ -479,6 +479,11 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
}
}
unsigned
long
xics_rm_h_xirr_x
(
struct
kvm_vcpu
*
vcpu
)
{
vcpu
->
arch
.
regs
.
gpr
[
5
]
=
get_tb
();
return
xics_rm_h_xirr
(
vcpu
);
}
unsigned
long
xics_rm_h_xirr
(
struct
kvm_vcpu
*
vcpu
)
{
...
...
@@ -883,7 +888,7 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
/* --- Non-real mode XICS-related built-in routines --- */
/*
*
/*
* Host Operations poked by RM KVM
*/
static
void
rm_host_ipi_action
(
int
action
,
void
*
data
)
...
...
arch/powerpc/kvm/book3s_hv_rm_xive.c
已删除
100644 → 0
浏览文件 @
a5fc286f
// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/kvm_host.h>
#include <linux/err.h>
#include <linux/kernel_stat.h>
#include <linux/pgtable.h>
#include <asm/kvm_book3s.h>
#include <asm/kvm_ppc.h>
#include <asm/hvcall.h>
#include <asm/xics.h>
#include <asm/debug.h>
#include <asm/synch.h>
#include <asm/cputhreads.h>
#include <asm/ppc-opcode.h>
#include <asm/pnv-pci.h>
#include <asm/opal.h>
#include <asm/smp.h>
#include <asm/xive.h>
#include <asm/xive-regs.h>
#include "book3s_xive.h"
/* XXX */
#include <asm/udbg.h>
//#define DBG(fmt...) udbg_printf(fmt)
#define DBG(fmt...) do { } while(0)
static
inline
void
__iomem
*
get_tima_phys
(
void
)
{
return
local_paca
->
kvm_hstate
.
xive_tima_phys
;
}
#undef XIVE_RUNTIME_CHECKS
#define X_PFX xive_rm_
#define X_STATIC
#define X_STAT_PFX stat_rm_
#define __x_tima get_tima_phys()
#define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_page))
#define __x_trig_page(xd) ((void __iomem *)((xd)->trig_page))
#define __x_writeb __raw_rm_writeb
#define __x_readw __raw_rm_readw
#define __x_readq __raw_rm_readq
#define __x_writeq __raw_rm_writeq
#include "book3s_xive_template.c"
arch/powerpc/kvm/book3s_hv_rmhandlers.S
浏览文件 @
b104e41c
...
...
@@ -50,6 +50,14 @@
#define STACK_SLOT_UAMOR (SFS-88)
#define STACK_SLOT_FSCR (SFS-96)
/*
*
Use
the
last
LPID
(
all
implemented
LPID
bits
=
1
)
for
partition
switching
.
*
This
is
reserved
in
the
LPID
allocator
.
POWER7
only
implements
0x3ff
,
but
*
we
write
0xfff
into
the
LPID
SPR
anyway
,
which
seems
to
work
and
just
*
ignores
the
top
bits
.
*/
#define LPID_RSVD 0xfff
/*
*
Call
kvmppc_hv_entry
in
real
mode
.
*
Must
be
called
with
interrupts
hard
-
disabled
.
...
...
@@ -1784,13 +1792,8 @@ hcall_real_table:
.
long
DOTSYM
(
kvmppc_h_clear_mod
)
-
hcall_real_table
.
long
DOTSYM
(
kvmppc_h_clear_ref
)
-
hcall_real_table
.
long
DOTSYM
(
kvmppc_h_protect
)
-
hcall_real_table
#ifdef CONFIG_SPAPR_TCE_IOMMU
.
long
DOTSYM
(
kvmppc_h_get_tce
)
-
hcall_real_table
.
long
DOTSYM
(
kvmppc_rm_h_put_tce
)
-
hcall_real_table
#else
.
long
0
/*
0x1c
*/
.
long
0
/*
0x20
*/
#endif
.
long
0
/*
0x24
-
H_SET_SPRG0
*/
.
long
DOTSYM
(
kvmppc_h_set_dabr
)
-
hcall_real_table
.
long
DOTSYM
(
kvmppc_rm_h_page_init
)
-
hcall_real_table
...
...
@@ -1808,11 +1811,11 @@ hcall_real_table:
.
long
0
/*
0x5c
*/
.
long
0
/*
0x60
*/
#ifdef CONFIG_KVM_XICS
.
long
DOTSYM
(
kvmppc
_rm_h_eoi
)
-
hcall_real_table
.
long
DOTSYM
(
kvmppc
_rm_h_cppr
)
-
hcall_real_table
.
long
DOTSYM
(
kvmppc
_rm_h_ipi
)
-
hcall_real_table
.
long
DOTSYM
(
kvmppc_rm_h_ipoll
)
-
hcall_real_table
.
long
DOTSYM
(
kvmppc
_rm_h_xirr
)
-
hcall_real_table
.
long
DOTSYM
(
xics
_rm_h_eoi
)
-
hcall_real_table
.
long
DOTSYM
(
xics
_rm_h_cppr
)
-
hcall_real_table
.
long
DOTSYM
(
xics
_rm_h_ipi
)
-
hcall_real_table
.
long
0
/*
0x70
-
H_IPOLL
*/
.
long
DOTSYM
(
xics
_rm_h_xirr
)
-
hcall_real_table
#else
.
long
0
/*
0x64
-
H_EOI
*/
.
long
0
/*
0x68
-
H_CPPR
*/
...
...
@@ -1868,13 +1871,8 @@ hcall_real_table:
.
long
0
/*
0x12c
*/
.
long
0
/*
0x130
*/
.
long
DOTSYM
(
kvmppc_h_set_xdabr
)
-
hcall_real_table
#ifdef CONFIG_SPAPR_TCE_IOMMU
.
long
DOTSYM
(
kvmppc_rm_h_stuff_tce
)
-
hcall_real_table
.
long
DOTSYM
(
kvmppc_rm_h_put_tce_indirect
)
-
hcall_real_table
#else
.
long
0
/*
0x138
*/
.
long
0
/*
0x13c
*/
#endif
.
long
0
/*
0x140
*/
.
long
0
/*
0x144
*/
.
long
0
/*
0x148
*/
...
...
@@ -1987,7 +1985,7 @@ hcall_real_table:
.
long
0
/*
0x2f4
*/
.
long
0
/*
0x2f8
*/
#ifdef CONFIG_KVM_XICS
.
long
DOTSYM
(
kvmppc
_rm_h_xirr_x
)
-
hcall_real_table
.
long
DOTSYM
(
xics
_rm_h_xirr_x
)
-
hcall_real_table
#else
.
long
0
/*
0x2fc
-
H_XIRR_X
*/
#endif
...
...
arch/powerpc/kvm/book3s_hv_uvmem.c
浏览文件 @
b104e41c
...
...
@@ -361,13 +361,15 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
static
bool
kvmppc_next_nontransitioned_gfn
(
const
struct
kvm_memory_slot
*
memslot
,
struct
kvm
*
kvm
,
unsigned
long
*
gfn
)
{
struct
kvmppc_uvmem_slot
*
p
;
struct
kvmppc_uvmem_slot
*
p
=
NULL
,
*
iter
;
bool
ret
=
false
;
unsigned
long
i
;
list_for_each_entry
(
p
,
&
kvm
->
arch
.
uvmem_pfns
,
list
)
if
(
*
gfn
>=
p
->
base_pfn
&&
*
gfn
<
p
->
base_pfn
+
p
->
nr_pfns
)
list_for_each_entry
(
iter
,
&
kvm
->
arch
.
uvmem_pfns
,
list
)
if
(
*
gfn
>=
iter
->
base_pfn
&&
*
gfn
<
iter
->
base_pfn
+
iter
->
nr_pfns
)
{
p
=
iter
;
break
;
}
if
(
!
p
)
return
ret
;
/*
...
...
arch/powerpc/kvm/book3s_pr_papr.c
浏览文件 @
b104e41c
...
...
@@ -433,9 +433,12 @@ int kvmppc_hcall_impl_pr(unsigned long cmd)
case
H_REMOVE
:
case
H_PROTECT
:
case
H_BULK_REMOVE
:
#ifdef CONFIG_SPAPR_TCE_IOMMU
case
H_GET_TCE
:
case
H_PUT_TCE
:
case
H_PUT_TCE_INDIRECT
:
case
H_STUFF_TCE
:
#endif
case
H_CEDE
:
case
H_LOGICAL_CI_LOAD
:
case
H_LOGICAL_CI_STORE
:
...
...
@@ -464,7 +467,10 @@ static unsigned int default_hcall_list[] = {
H_REMOVE
,
H_PROTECT
,
H_BULK_REMOVE
,
#ifdef CONFIG_SPAPR_TCE_IOMMU
H_GET_TCE
,
H_PUT_TCE
,
#endif
H_CEDE
,
H_SET_MODE
,
#ifdef CONFIG_KVM_XICS
...
...
arch/powerpc/kvm/book3s_xive.c
浏览文件 @
b104e41c
...
...
@@ -30,27 +30,629 @@
#include "book3s_xive.h"
/*
* Virtual mode variants of the hcalls for use on radix/radix
* with AIL. They require the VCPU's VP to be "pushed"
*
* We still instantiate them here because we use some of the
* generated utility functions as well in this file.
*/
#define XIVE_RUNTIME_CHECKS
#define X_PFX xive_vm_
#define X_STATIC static
#define X_STAT_PFX stat_vm_
#define __x_tima xive_tima
#define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_mmio))
#define __x_trig_page(xd) ((void __iomem *)((xd)->trig_mmio))
#define __x_writeb __raw_writeb
#define __x_readw __raw_readw
#define __x_readq __raw_readq
#define __x_writeq __raw_writeq
#include "book3s_xive_template.c"
/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
#define XICS_DUMMY 1
static
void
xive_vm_ack_pending
(
struct
kvmppc_xive_vcpu
*
xc
)
{
u8
cppr
;
u16
ack
;
/*
* Ensure any previous store to CPPR is ordered vs.
* the subsequent loads from PIPR or ACK.
*/
eieio
();
/* Perform the acknowledge OS to register cycle. */
ack
=
be16_to_cpu
(
__raw_readw
(
xive_tima
+
TM_SPC_ACK_OS_REG
));
/* Synchronize subsequent queue accesses */
mb
();
/* XXX Check grouping level */
/* Anything ? */
if
(
!
((
ack
>>
8
)
&
TM_QW1_NSR_EO
))
return
;
/* Grab CPPR of the most favored pending interrupt */
cppr
=
ack
&
0xff
;
if
(
cppr
<
8
)
xc
->
pending
|=
1
<<
cppr
;
/* Check consistency */
if
(
cppr
>=
xc
->
hw_cppr
)
pr_warn
(
"KVM-XIVE: CPU %d odd ack CPPR, got %d at %d
\n
"
,
smp_processor_id
(),
cppr
,
xc
->
hw_cppr
);
/*
* Update our image of the HW CPPR. We don't yet modify
* xc->cppr, this will be done as we scan for interrupts
* in the queues.
*/
xc
->
hw_cppr
=
cppr
;
}
static
u8
xive_vm_esb_load
(
struct
xive_irq_data
*
xd
,
u32
offset
)
{
u64
val
;
if
(
offset
==
XIVE_ESB_SET_PQ_10
&&
xd
->
flags
&
XIVE_IRQ_FLAG_STORE_EOI
)
offset
|=
XIVE_ESB_LD_ST_MO
;
val
=
__raw_readq
(
__x_eoi_page
(
xd
)
+
offset
);
#ifdef __LITTLE_ENDIAN__
val
>>=
64
-
8
;
#endif
return
(
u8
)
val
;
}
static
void
xive_vm_source_eoi
(
u32
hw_irq
,
struct
xive_irq_data
*
xd
)
{
/* If the XIVE supports the new "store EOI facility, use it */
if
(
xd
->
flags
&
XIVE_IRQ_FLAG_STORE_EOI
)
__raw_writeq
(
0
,
__x_eoi_page
(
xd
)
+
XIVE_ESB_STORE_EOI
);
else
if
(
xd
->
flags
&
XIVE_IRQ_FLAG_LSI
)
{
/*
* For LSIs the HW EOI cycle is used rather than PQ bits,
* as they are automatically re-triggred in HW when still
* pending.
*/
__raw_readq
(
__x_eoi_page
(
xd
)
+
XIVE_ESB_LOAD_EOI
);
}
else
{
uint64_t
eoi_val
;
/*
* Otherwise for EOI, we use the special MMIO that does
* a clear of both P and Q and returns the old Q,
* except for LSIs where we use the "EOI cycle" special
* load.
*
* This allows us to then do a re-trigger if Q was set
* rather than synthetizing an interrupt in software
*/
eoi_val
=
xive_vm_esb_load
(
xd
,
XIVE_ESB_SET_PQ_00
);
/* Re-trigger if needed */
if
((
eoi_val
&
1
)
&&
__x_trig_page
(
xd
))
__raw_writeq
(
0
,
__x_trig_page
(
xd
));
}
}
enum
{
scan_fetch
,
scan_poll
,
scan_eoi
,
};
static
u32
xive_vm_scan_interrupts
(
struct
kvmppc_xive_vcpu
*
xc
,
u8
pending
,
int
scan_type
)
{
u32
hirq
=
0
;
u8
prio
=
0xff
;
/* Find highest pending priority */
while
((
xc
->
mfrr
!=
0xff
||
pending
!=
0
)
&&
hirq
==
0
)
{
struct
xive_q
*
q
;
u32
idx
,
toggle
;
__be32
*
qpage
;
/*
* If pending is 0 this will return 0xff which is what
* we want
*/
prio
=
ffs
(
pending
)
-
1
;
/* Don't scan past the guest cppr */
if
(
prio
>=
xc
->
cppr
||
prio
>
7
)
{
if
(
xc
->
mfrr
<
xc
->
cppr
)
{
prio
=
xc
->
mfrr
;
hirq
=
XICS_IPI
;
}
break
;
}
/* Grab queue and pointers */
q
=
&
xc
->
queues
[
prio
];
idx
=
q
->
idx
;
toggle
=
q
->
toggle
;
/*
* Snapshot the queue page. The test further down for EOI
* must use the same "copy" that was used by __xive_read_eq
* since qpage can be set concurrently and we don't want
* to miss an EOI.
*/
qpage
=
READ_ONCE
(
q
->
qpage
);
skip_ipi:
/*
* Try to fetch from the queue. Will return 0 for a
* non-queueing priority (ie, qpage = 0).
*/
hirq
=
__xive_read_eq
(
qpage
,
q
->
msk
,
&
idx
,
&
toggle
);
/*
* If this was a signal for an MFFR change done by
* H_IPI we skip it. Additionally, if we were fetching
* we EOI it now, thus re-enabling reception of a new
* such signal.
*
* We also need to do that if prio is 0 and we had no
* page for the queue. In this case, we have non-queued
* IPI that needs to be EOId.
*
* This is safe because if we have another pending MFRR
* change that wasn't observed above, the Q bit will have
* been set and another occurrence of the IPI will trigger.
*/
if
(
hirq
==
XICS_IPI
||
(
prio
==
0
&&
!
qpage
))
{
if
(
scan_type
==
scan_fetch
)
{
xive_vm_source_eoi
(
xc
->
vp_ipi
,
&
xc
->
vp_ipi_data
);
q
->
idx
=
idx
;
q
->
toggle
=
toggle
;
}
/* Loop back on same queue with updated idx/toggle */
WARN_ON
(
hirq
&&
hirq
!=
XICS_IPI
);
if
(
hirq
)
goto
skip_ipi
;
}
/* If it's the dummy interrupt, continue searching */
if
(
hirq
==
XICS_DUMMY
)
goto
skip_ipi
;
/* Clear the pending bit if the queue is now empty */
if
(
!
hirq
)
{
pending
&=
~
(
1
<<
prio
);
/*
* Check if the queue count needs adjusting due to
* interrupts being moved away.
*/
if
(
atomic_read
(
&
q
->
pending_count
))
{
int
p
=
atomic_xchg
(
&
q
->
pending_count
,
0
);
if
(
p
)
{
WARN_ON
(
p
>
atomic_read
(
&
q
->
count
));
atomic_sub
(
p
,
&
q
->
count
);
}
}
}
/*
* If the most favoured prio we found pending is less
* favored (or equal) than a pending IPI, we return
* the IPI instead.
*/
if
(
prio
>=
xc
->
mfrr
&&
xc
->
mfrr
<
xc
->
cppr
)
{
prio
=
xc
->
mfrr
;
hirq
=
XICS_IPI
;
break
;
}
/* If fetching, update queue pointers */
if
(
scan_type
==
scan_fetch
)
{
q
->
idx
=
idx
;
q
->
toggle
=
toggle
;
}
}
/* If we are just taking a "peek", do nothing else */
if
(
scan_type
==
scan_poll
)
return
hirq
;
/* Update the pending bits */
xc
->
pending
=
pending
;
/*
* If this is an EOI that's it, no CPPR adjustment done here,
* all we needed was cleanup the stale pending bits and check
* if there's anything left.
*/
if
(
scan_type
==
scan_eoi
)
return
hirq
;
/*
* If we found an interrupt, adjust what the guest CPPR should
* be as if we had just fetched that interrupt from HW.
*
* Note: This can only make xc->cppr smaller as the previous
* loop will only exit with hirq != 0 if prio is lower than
* the current xc->cppr. Thus we don't need to re-check xc->mfrr
* for pending IPIs.
*/
if
(
hirq
)
xc
->
cppr
=
prio
;
/*
* If it was an IPI the HW CPPR might have been lowered too much
* as the HW interrupt we use for IPIs is routed to priority 0.
*
* We re-sync it here.
*/
if
(
xc
->
cppr
!=
xc
->
hw_cppr
)
{
xc
->
hw_cppr
=
xc
->
cppr
;
__raw_writeb
(
xc
->
cppr
,
xive_tima
+
TM_QW1_OS
+
TM_CPPR
);
}
return
hirq
;
}
static
unsigned
long
xive_vm_h_xirr
(
struct
kvm_vcpu
*
vcpu
)
{
struct
kvmppc_xive_vcpu
*
xc
=
vcpu
->
arch
.
xive_vcpu
;
u8
old_cppr
;
u32
hirq
;
pr_devel
(
"H_XIRR
\n
"
);
xc
->
stat_vm_h_xirr
++
;
/* First collect pending bits from HW */
xive_vm_ack_pending
(
xc
);
pr_devel
(
" new pending=0x%02x hw_cppr=%d cppr=%d
\n
"
,
xc
->
pending
,
xc
->
hw_cppr
,
xc
->
cppr
);
/* Grab previous CPPR and reverse map it */
old_cppr
=
xive_prio_to_guest
(
xc
->
cppr
);
/* Scan for actual interrupts */
hirq
=
xive_vm_scan_interrupts
(
xc
,
xc
->
pending
,
scan_fetch
);
pr_devel
(
" got hirq=0x%x hw_cppr=%d cppr=%d
\n
"
,
hirq
,
xc
->
hw_cppr
,
xc
->
cppr
);
/* That should never hit */
if
(
hirq
&
0xff000000
)
pr_warn
(
"XIVE: Weird guest interrupt number 0x%08x
\n
"
,
hirq
);
/*
* XXX We could check if the interrupt is masked here and
* filter it. If we chose to do so, we would need to do:
*
* if (masked) {
* lock();
* if (masked) {
* old_Q = true;
* hirq = 0;
* }
* unlock();
* }
*/
/* Return interrupt and old CPPR in GPR4 */
vcpu
->
arch
.
regs
.
gpr
[
4
]
=
hirq
|
(
old_cppr
<<
24
);
return
H_SUCCESS
;
}
static
unsigned
long
xive_vm_h_ipoll
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
server
)
{
struct
kvmppc_xive_vcpu
*
xc
=
vcpu
->
arch
.
xive_vcpu
;
u8
pending
=
xc
->
pending
;
u32
hirq
;
pr_devel
(
"H_IPOLL(server=%ld)
\n
"
,
server
);
xc
->
stat_vm_h_ipoll
++
;
/* Grab the target VCPU if not the current one */
if
(
xc
->
server_num
!=
server
)
{
vcpu
=
kvmppc_xive_find_server
(
vcpu
->
kvm
,
server
);
if
(
!
vcpu
)
return
H_PARAMETER
;
xc
=
vcpu
->
arch
.
xive_vcpu
;
/* Scan all priorities */
pending
=
0xff
;
}
else
{
/* Grab pending interrupt if any */
__be64
qw1
=
__raw_readq
(
xive_tima
+
TM_QW1_OS
);
u8
pipr
=
be64_to_cpu
(
qw1
)
&
0xff
;
if
(
pipr
<
8
)
pending
|=
1
<<
pipr
;
}
hirq
=
xive_vm_scan_interrupts
(
xc
,
pending
,
scan_poll
);
/* Return interrupt and old CPPR in GPR4 */
vcpu
->
arch
.
regs
.
gpr
[
4
]
=
hirq
|
(
xc
->
cppr
<<
24
);
return
H_SUCCESS
;
}
static
void
xive_vm_push_pending_to_hw
(
struct
kvmppc_xive_vcpu
*
xc
)
{
u8
pending
,
prio
;
pending
=
xc
->
pending
;
if
(
xc
->
mfrr
!=
0xff
)
{
if
(
xc
->
mfrr
<
8
)
pending
|=
1
<<
xc
->
mfrr
;
else
pending
|=
0x80
;
}
if
(
!
pending
)
return
;
prio
=
ffs
(
pending
)
-
1
;
__raw_writeb
(
prio
,
xive_tima
+
TM_SPC_SET_OS_PENDING
);
}
static
void
xive_vm_scan_for_rerouted_irqs
(
struct
kvmppc_xive
*
xive
,
struct
kvmppc_xive_vcpu
*
xc
)
{
unsigned
int
prio
;
/* For each priority that is now masked */
for
(
prio
=
xc
->
cppr
;
prio
<
KVMPPC_XIVE_Q_COUNT
;
prio
++
)
{
struct
xive_q
*
q
=
&
xc
->
queues
[
prio
];
struct
kvmppc_xive_irq_state
*
state
;
struct
kvmppc_xive_src_block
*
sb
;
u32
idx
,
toggle
,
entry
,
irq
,
hw_num
;
struct
xive_irq_data
*
xd
;
__be32
*
qpage
;
u16
src
;
idx
=
q
->
idx
;
toggle
=
q
->
toggle
;
qpage
=
READ_ONCE
(
q
->
qpage
);
if
(
!
qpage
)
continue
;
/* For each interrupt in the queue */
for
(;;)
{
entry
=
be32_to_cpup
(
qpage
+
idx
);
/* No more ? */
if
((
entry
>>
31
)
==
toggle
)
break
;
irq
=
entry
&
0x7fffffff
;
/* Skip dummies and IPIs */
if
(
irq
==
XICS_DUMMY
||
irq
==
XICS_IPI
)
goto
next
;
sb
=
kvmppc_xive_find_source
(
xive
,
irq
,
&
src
);
if
(
!
sb
)
goto
next
;
state
=
&
sb
->
irq_state
[
src
];
/* Has it been rerouted ? */
if
(
xc
->
server_num
==
state
->
act_server
)
goto
next
;
/*
* Allright, it *has* been re-routed, kill it from
* the queue.
*/
qpage
[
idx
]
=
cpu_to_be32
((
entry
&
0x80000000
)
|
XICS_DUMMY
);
/* Find the HW interrupt */
kvmppc_xive_select_irq
(
state
,
&
hw_num
,
&
xd
);
/* If it's not an LSI, set PQ to 11 the EOI will force a resend */
if
(
!
(
xd
->
flags
&
XIVE_IRQ_FLAG_LSI
))
xive_vm_esb_load
(
xd
,
XIVE_ESB_SET_PQ_11
);
/* EOI the source */
xive_vm_source_eoi
(
hw_num
,
xd
);
next:
idx
=
(
idx
+
1
)
&
q
->
msk
;
if
(
idx
==
0
)
toggle
^=
1
;
}
}
}
static
int
xive_vm_h_cppr
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
cppr
)
{
struct
kvmppc_xive_vcpu
*
xc
=
vcpu
->
arch
.
xive_vcpu
;
struct
kvmppc_xive
*
xive
=
vcpu
->
kvm
->
arch
.
xive
;
u8
old_cppr
;
pr_devel
(
"H_CPPR(cppr=%ld)
\n
"
,
cppr
);
xc
->
stat_vm_h_cppr
++
;
/* Map CPPR */
cppr
=
xive_prio_from_guest
(
cppr
);
/* Remember old and update SW state */
old_cppr
=
xc
->
cppr
;
xc
->
cppr
=
cppr
;
/*
* Order the above update of xc->cppr with the subsequent
* read of xc->mfrr inside push_pending_to_hw()
*/
smp_mb
();
if
(
cppr
>
old_cppr
)
{
/*
* We are masking less, we need to look for pending things
* to deliver and set VP pending bits accordingly to trigger
* a new interrupt otherwise we might miss MFRR changes for
* which we have optimized out sending an IPI signal.
*/
xive_vm_push_pending_to_hw
(
xc
);
}
else
{
/*
* We are masking more, we need to check the queue for any
* interrupt that has been routed to another CPU, take
* it out (replace it with the dummy) and retrigger it.
*
* This is necessary since those interrupts may otherwise
* never be processed, at least not until this CPU restores
* its CPPR.
*
* This is in theory racy vs. HW adding new interrupts to
* the queue. In practice this works because the interesting
* cases are when the guest has done a set_xive() to move the
* interrupt away, which flushes the xive, followed by the
* target CPU doing a H_CPPR. So any new interrupt coming into
* the queue must still be routed to us and isn't a source
* of concern.
*/
xive_vm_scan_for_rerouted_irqs
(
xive
,
xc
);
}
/* Apply new CPPR */
xc
->
hw_cppr
=
cppr
;
__raw_writeb
(
cppr
,
xive_tima
+
TM_QW1_OS
+
TM_CPPR
);
return
H_SUCCESS
;
}
static
int
xive_vm_h_eoi
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
xirr
)
{
struct
kvmppc_xive
*
xive
=
vcpu
->
kvm
->
arch
.
xive
;
struct
kvmppc_xive_src_block
*
sb
;
struct
kvmppc_xive_irq_state
*
state
;
struct
kvmppc_xive_vcpu
*
xc
=
vcpu
->
arch
.
xive_vcpu
;
struct
xive_irq_data
*
xd
;
u8
new_cppr
=
xirr
>>
24
;
u32
irq
=
xirr
&
0x00ffffff
,
hw_num
;
u16
src
;
int
rc
=
0
;
pr_devel
(
"H_EOI(xirr=%08lx)
\n
"
,
xirr
);
xc
->
stat_vm_h_eoi
++
;
xc
->
cppr
=
xive_prio_from_guest
(
new_cppr
);
/*
* IPIs are synthetized from MFRR and thus don't need
* any special EOI handling. The underlying interrupt
* used to signal MFRR changes is EOId when fetched from
* the queue.
*/
if
(
irq
==
XICS_IPI
||
irq
==
0
)
{
/*
* This barrier orders the setting of xc->cppr vs.
* subsquent test of xc->mfrr done inside
* scan_interrupts and push_pending_to_hw
*/
smp_mb
();
goto
bail
;
}
/* Find interrupt source */
sb
=
kvmppc_xive_find_source
(
xive
,
irq
,
&
src
);
if
(
!
sb
)
{
pr_devel
(
" source not found !
\n
"
);
rc
=
H_PARAMETER
;
/* Same as above */
smp_mb
();
goto
bail
;
}
state
=
&
sb
->
irq_state
[
src
];
kvmppc_xive_select_irq
(
state
,
&
hw_num
,
&
xd
);
state
->
in_eoi
=
true
;
/*
* This barrier orders both setting of in_eoi above vs,
* subsequent test of guest_priority, and the setting
* of xc->cppr vs. subsquent test of xc->mfrr done inside
* scan_interrupts and push_pending_to_hw
*/
smp_mb
();
again:
if
(
state
->
guest_priority
==
MASKED
)
{
arch_spin_lock
(
&
sb
->
lock
);
if
(
state
->
guest_priority
!=
MASKED
)
{
arch_spin_unlock
(
&
sb
->
lock
);
goto
again
;
}
pr_devel
(
" EOI on saved P...
\n
"
);
/* Clear old_p, that will cause unmask to perform an EOI */
state
->
old_p
=
false
;
arch_spin_unlock
(
&
sb
->
lock
);
}
else
{
pr_devel
(
" EOI on source...
\n
"
);
/* Perform EOI on the source */
xive_vm_source_eoi
(
hw_num
,
xd
);
/* If it's an emulated LSI, check level and resend */
if
(
state
->
lsi
&&
state
->
asserted
)
__raw_writeq
(
0
,
__x_trig_page
(
xd
));
}
/*
* This barrier orders the above guest_priority check
* and spin_lock/unlock with clearing in_eoi below.
*
* It also has to be a full mb() as it must ensure
* the MMIOs done in source_eoi() are completed before
* state->in_eoi is visible.
*/
mb
();
state
->
in_eoi
=
false
;
bail:
/* Re-evaluate pending IRQs and update HW */
xive_vm_scan_interrupts
(
xc
,
xc
->
pending
,
scan_eoi
);
xive_vm_push_pending_to_hw
(
xc
);
pr_devel
(
" after scan pending=%02x
\n
"
,
xc
->
pending
);
/* Apply new CPPR */
xc
->
hw_cppr
=
xc
->
cppr
;
__raw_writeb
(
xc
->
cppr
,
xive_tima
+
TM_QW1_OS
+
TM_CPPR
);
return
rc
;
}
static
int
xive_vm_h_ipi
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
server
,
unsigned
long
mfrr
)
{
struct
kvmppc_xive_vcpu
*
xc
=
vcpu
->
arch
.
xive_vcpu
;
pr_devel
(
"H_IPI(server=%08lx,mfrr=%ld)
\n
"
,
server
,
mfrr
);
xc
->
stat_vm_h_ipi
++
;
/* Find target */
vcpu
=
kvmppc_xive_find_server
(
vcpu
->
kvm
,
server
);
if
(
!
vcpu
)
return
H_PARAMETER
;
xc
=
vcpu
->
arch
.
xive_vcpu
;
/* Locklessly write over MFRR */
xc
->
mfrr
=
mfrr
;
/*
* The load of xc->cppr below and the subsequent MMIO store
* to the IPI must happen after the above mfrr update is
* globally visible so that:
*
* - Synchronize with another CPU doing an H_EOI or a H_CPPR
* updating xc->cppr then reading xc->mfrr.
*
* - The target of the IPI sees the xc->mfrr update
*/
mb
();
/* Shoot the IPI if most favored than target cppr */
if
(
mfrr
<
xc
->
cppr
)
__raw_writeq
(
0
,
__x_trig_page
(
&
xc
->
vp_ipi_data
));
return
H_SUCCESS
;
}
/*
* We leave a gap of a couple of interrupts in the queue to
...
...
@@ -179,12 +781,13 @@ void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL
(
kvmppc_xive_pull_vcpu
);
void
kvmppc_xive_rearm_escalation
(
struct
kvm_vcpu
*
vcpu
)
bool
kvmppc_xive_rearm_escalation
(
struct
kvm_vcpu
*
vcpu
)
{
void
__iomem
*
esc_vaddr
=
(
void
__iomem
*
)
vcpu
->
arch
.
xive_esc_vaddr
;
bool
ret
=
true
;
if
(
!
esc_vaddr
)
return
;
return
ret
;
/* we are using XIVE with single escalation */
...
...
@@ -197,7 +800,7 @@ void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu)
* we also don't want to set xive_esc_on to 1 here in
* case we race with xive_esc_irq().
*/
vcpu
->
arch
.
ceded
=
0
;
ret
=
false
;
/*
* The escalation interrupts are special as we don't EOI them.
* There is no need to use the load-after-store ordering offset
...
...
@@ -210,6 +813,8 @@ void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu)
__raw_readq
(
esc_vaddr
+
XIVE_ESB_SET_PQ_00
);
}
mb
();
return
ret
;
}
EXPORT_SYMBOL_GPL
(
kvmppc_xive_rearm_escalation
);
...
...
@@ -238,7 +843,7 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
vcpu
->
arch
.
irq_pending
=
1
;
smp_mb
();
if
(
vcpu
->
arch
.
ceded
)
if
(
vcpu
->
arch
.
ceded
||
vcpu
->
arch
.
nested
)
kvmppc_fast_vcpu_kick
(
vcpu
);
/* Since we have the no-EOI flag, the interrupt is effectively
...
...
arch/powerpc/kvm/book3s_xive.h
浏览文件 @
b104e41c
...
...
@@ -285,13 +285,6 @@ static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
return
cur
&
0x7fffffff
;
}
extern
unsigned
long
xive_rm_h_xirr
(
struct
kvm_vcpu
*
vcpu
);
extern
unsigned
long
xive_rm_h_ipoll
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
server
);
extern
int
xive_rm_h_ipi
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
server
,
unsigned
long
mfrr
);
extern
int
xive_rm_h_cppr
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
cppr
);
extern
int
xive_rm_h_eoi
(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
xirr
);
/*
* Common Xive routines for XICS-over-XIVE and XIVE native
*/
...
...
arch/powerpc/kvm/book3s_xive_template.c
已删除
100644 → 0
浏览文件 @
a5fc286f
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2017 Benjamin Herrenschmidt, IBM Corporation
*/
/* File to be included by other .c files */
#define XGLUE(a,b) a##b
#define GLUE(a,b) XGLUE(a,b)
/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
#define XICS_DUMMY 1
static
void
GLUE
(
X_PFX
,
ack_pending
)(
struct
kvmppc_xive_vcpu
*
xc
)
{
u8
cppr
;
u16
ack
;
/*
* Ensure any previous store to CPPR is ordered vs.
* the subsequent loads from PIPR or ACK.
*/
eieio
();
/* Perform the acknowledge OS to register cycle. */
ack
=
be16_to_cpu
(
__x_readw
(
__x_tima
+
TM_SPC_ACK_OS_REG
));
/* Synchronize subsequent queue accesses */
mb
();
/* XXX Check grouping level */
/* Anything ? */
if
(
!
((
ack
>>
8
)
&
TM_QW1_NSR_EO
))
return
;
/* Grab CPPR of the most favored pending interrupt */
cppr
=
ack
&
0xff
;
if
(
cppr
<
8
)
xc
->
pending
|=
1
<<
cppr
;
#ifdef XIVE_RUNTIME_CHECKS
/* Check consistency */
if
(
cppr
>=
xc
->
hw_cppr
)
pr_warn
(
"KVM-XIVE: CPU %d odd ack CPPR, got %d at %d
\n
"
,
smp_processor_id
(),
cppr
,
xc
->
hw_cppr
);
#endif
/*
* Update our image of the HW CPPR. We don't yet modify
* xc->cppr, this will be done as we scan for interrupts
* in the queues.
*/
xc
->
hw_cppr
=
cppr
;
}
static
u8
GLUE
(
X_PFX
,
esb_load
)(
struct
xive_irq_data
*
xd
,
u32
offset
)
{
u64
val
;
if
(
offset
==
XIVE_ESB_SET_PQ_10
&&
xd
->
flags
&
XIVE_IRQ_FLAG_STORE_EOI
)
offset
|=
XIVE_ESB_LD_ST_MO
;
val
=
__x_readq
(
__x_eoi_page
(
xd
)
+
offset
);
#ifdef __LITTLE_ENDIAN__
val
>>=
64
-
8
;
#endif
return
(
u8
)
val
;
}
static
void
GLUE
(
X_PFX
,
source_eoi
)(
u32
hw_irq
,
struct
xive_irq_data
*
xd
)
{
/* If the XIVE supports the new "store EOI facility, use it */
if
(
xd
->
flags
&
XIVE_IRQ_FLAG_STORE_EOI
)
__x_writeq
(
0
,
__x_eoi_page
(
xd
)
+
XIVE_ESB_STORE_EOI
);
else
if
(
xd
->
flags
&
XIVE_IRQ_FLAG_LSI
)
{
/*
* For LSIs the HW EOI cycle is used rather than PQ bits,
* as they are automatically re-triggred in HW when still
* pending.
*/
__x_readq
(
__x_eoi_page
(
xd
)
+
XIVE_ESB_LOAD_EOI
);
}
else
{
uint64_t
eoi_val
;
/*
* Otherwise for EOI, we use the special MMIO that does
* a clear of both P and Q and returns the old Q,
* except for LSIs where we use the "EOI cycle" special
* load.
*
* This allows us to then do a re-trigger if Q was set
* rather than synthetizing an interrupt in software
*/
eoi_val
=
GLUE
(
X_PFX
,
esb_load
)(
xd
,
XIVE_ESB_SET_PQ_00
);
/* Re-trigger if needed */
if
((
eoi_val
&
1
)
&&
__x_trig_page
(
xd
))
__x_writeq
(
0
,
__x_trig_page
(
xd
));
}
}
enum
{
scan_fetch
,
scan_poll
,
scan_eoi
,
};
static
u32
GLUE
(
X_PFX
,
scan_interrupts
)(
struct
kvmppc_xive_vcpu
*
xc
,
u8
pending
,
int
scan_type
)
{
u32
hirq
=
0
;
u8
prio
=
0xff
;
/* Find highest pending priority */
while
((
xc
->
mfrr
!=
0xff
||
pending
!=
0
)
&&
hirq
==
0
)
{
struct
xive_q
*
q
;
u32
idx
,
toggle
;
__be32
*
qpage
;
/*
* If pending is 0 this will return 0xff which is what
* we want
*/
prio
=
ffs
(
pending
)
-
1
;
/* Don't scan past the guest cppr */
if
(
prio
>=
xc
->
cppr
||
prio
>
7
)
{
if
(
xc
->
mfrr
<
xc
->
cppr
)
{
prio
=
xc
->
mfrr
;
hirq
=
XICS_IPI
;
}
break
;
}
/* Grab queue and pointers */
q
=
&
xc
->
queues
[
prio
];
idx
=
q
->
idx
;
toggle
=
q
->
toggle
;
/*
* Snapshot the queue page. The test further down for EOI
* must use the same "copy" that was used by __xive_read_eq
* since qpage can be set concurrently and we don't want
* to miss an EOI.
*/
qpage
=
READ_ONCE
(
q
->
qpage
);
skip_ipi:
/*
* Try to fetch from the queue. Will return 0 for a
* non-queueing priority (ie, qpage = 0).
*/
hirq
=
__xive_read_eq
(
qpage
,
q
->
msk
,
&
idx
,
&
toggle
);
/*
* If this was a signal for an MFFR change done by
* H_IPI we skip it. Additionally, if we were fetching
* we EOI it now, thus re-enabling reception of a new
* such signal.
*
* We also need to do that if prio is 0 and we had no
* page for the queue. In this case, we have non-queued
* IPI that needs to be EOId.
*
* This is safe because if we have another pending MFRR
* change that wasn't observed above, the Q bit will have
* been set and another occurrence of the IPI will trigger.
*/
if
(
hirq
==
XICS_IPI
||
(
prio
==
0
&&
!
qpage
))
{
if
(
scan_type
==
scan_fetch
)
{
GLUE
(
X_PFX
,
source_eoi
)(
xc
->
vp_ipi
,
&
xc
->
vp_ipi_data
);
q
->
idx
=
idx
;
q
->
toggle
=
toggle
;
}
/* Loop back on same queue with updated idx/toggle */
#ifdef XIVE_RUNTIME_CHECKS
WARN_ON
(
hirq
&&
hirq
!=
XICS_IPI
);
#endif
if
(
hirq
)
goto
skip_ipi
;
}
/* If it's the dummy interrupt, continue searching */
if
(
hirq
==
XICS_DUMMY
)
goto
skip_ipi
;
/* Clear the pending bit if the queue is now empty */
if
(
!
hirq
)
{
pending
&=
~
(
1
<<
prio
);
/*
* Check if the queue count needs adjusting due to
* interrupts being moved away.
*/
if
(
atomic_read
(
&
q
->
pending_count
))
{
int
p
=
atomic_xchg
(
&
q
->
pending_count
,
0
);
if
(
p
)
{
#ifdef XIVE_RUNTIME_CHECKS
WARN_ON
(
p
>
atomic_read
(
&
q
->
count
));
#endif
atomic_sub
(
p
,
&
q
->
count
);
}
}
}
/*
* If the most favoured prio we found pending is less
* favored (or equal) than a pending IPI, we return
* the IPI instead.
*/
if
(
prio
>=
xc
->
mfrr
&&
xc
->
mfrr
<
xc
->
cppr
)
{
prio
=
xc
->
mfrr
;
hirq
=
XICS_IPI
;
break
;
}
/* If fetching, update queue pointers */
if
(
scan_type
==
scan_fetch
)
{
q
->
idx
=
idx
;
q
->
toggle
=
toggle
;
}
}
/* If we are just taking a "peek", do nothing else */
if
(
scan_type
==
scan_poll
)
return
hirq
;
/* Update the pending bits */
xc
->
pending
=
pending
;
/*
* If this is an EOI that's it, no CPPR adjustment done here,
* all we needed was cleanup the stale pending bits and check
* if there's anything left.
*/
if
(
scan_type
==
scan_eoi
)
return
hirq
;
/*
* If we found an interrupt, adjust what the guest CPPR should
* be as if we had just fetched that interrupt from HW.
*
* Note: This can only make xc->cppr smaller as the previous
* loop will only exit with hirq != 0 if prio is lower than
* the current xc->cppr. Thus we don't need to re-check xc->mfrr
* for pending IPIs.
*/
if
(
hirq
)
xc
->
cppr
=
prio
;
/*
* If it was an IPI the HW CPPR might have been lowered too much
* as the HW interrupt we use for IPIs is routed to priority 0.
*
* We re-sync it here.
*/
if
(
xc
->
cppr
!=
xc
->
hw_cppr
)
{
xc
->
hw_cppr
=
xc
->
cppr
;
__x_writeb
(
xc
->
cppr
,
__x_tima
+
TM_QW1_OS
+
TM_CPPR
);
}
return
hirq
;
}
X_STATIC
unsigned
long
GLUE
(
X_PFX
,
h_xirr
)(
struct
kvm_vcpu
*
vcpu
)
{
struct
kvmppc_xive_vcpu
*
xc
=
vcpu
->
arch
.
xive_vcpu
;
u8
old_cppr
;
u32
hirq
;
pr_devel
(
"H_XIRR
\n
"
);
xc
->
GLUE
(
X_STAT_PFX
,
h_xirr
)
++
;
/* First collect pending bits from HW */
GLUE
(
X_PFX
,
ack_pending
)(
xc
);
pr_devel
(
" new pending=0x%02x hw_cppr=%d cppr=%d
\n
"
,
xc
->
pending
,
xc
->
hw_cppr
,
xc
->
cppr
);
/* Grab previous CPPR and reverse map it */
old_cppr
=
xive_prio_to_guest
(
xc
->
cppr
);
/* Scan for actual interrupts */
hirq
=
GLUE
(
X_PFX
,
scan_interrupts
)(
xc
,
xc
->
pending
,
scan_fetch
);
pr_devel
(
" got hirq=0x%x hw_cppr=%d cppr=%d
\n
"
,
hirq
,
xc
->
hw_cppr
,
xc
->
cppr
);
#ifdef XIVE_RUNTIME_CHECKS
/* That should never hit */
if
(
hirq
&
0xff000000
)
pr_warn
(
"XIVE: Weird guest interrupt number 0x%08x
\n
"
,
hirq
);
#endif
/*
* XXX We could check if the interrupt is masked here and
* filter it. If we chose to do so, we would need to do:
*
* if (masked) {
* lock();
* if (masked) {
* old_Q = true;
* hirq = 0;
* }
* unlock();
* }
*/
/* Return interrupt and old CPPR in GPR4 */
vcpu
->
arch
.
regs
.
gpr
[
4
]
=
hirq
|
(
old_cppr
<<
24
);
return
H_SUCCESS
;
}
X_STATIC
unsigned
long
GLUE
(
X_PFX
,
h_ipoll
)(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
server
)
{
struct
kvmppc_xive_vcpu
*
xc
=
vcpu
->
arch
.
xive_vcpu
;
u8
pending
=
xc
->
pending
;
u32
hirq
;
pr_devel
(
"H_IPOLL(server=%ld)
\n
"
,
server
);
xc
->
GLUE
(
X_STAT_PFX
,
h_ipoll
)
++
;
/* Grab the target VCPU if not the current one */
if
(
xc
->
server_num
!=
server
)
{
vcpu
=
kvmppc_xive_find_server
(
vcpu
->
kvm
,
server
);
if
(
!
vcpu
)
return
H_PARAMETER
;
xc
=
vcpu
->
arch
.
xive_vcpu
;
/* Scan all priorities */
pending
=
0xff
;
}
else
{
/* Grab pending interrupt if any */
__be64
qw1
=
__x_readq
(
__x_tima
+
TM_QW1_OS
);
u8
pipr
=
be64_to_cpu
(
qw1
)
&
0xff
;
if
(
pipr
<
8
)
pending
|=
1
<<
pipr
;
}
hirq
=
GLUE
(
X_PFX
,
scan_interrupts
)(
xc
,
pending
,
scan_poll
);
/* Return interrupt and old CPPR in GPR4 */
vcpu
->
arch
.
regs
.
gpr
[
4
]
=
hirq
|
(
xc
->
cppr
<<
24
);
return
H_SUCCESS
;
}
static
void
GLUE
(
X_PFX
,
push_pending_to_hw
)(
struct
kvmppc_xive_vcpu
*
xc
)
{
u8
pending
,
prio
;
pending
=
xc
->
pending
;
if
(
xc
->
mfrr
!=
0xff
)
{
if
(
xc
->
mfrr
<
8
)
pending
|=
1
<<
xc
->
mfrr
;
else
pending
|=
0x80
;
}
if
(
!
pending
)
return
;
prio
=
ffs
(
pending
)
-
1
;
__x_writeb
(
prio
,
__x_tima
+
TM_SPC_SET_OS_PENDING
);
}
static
void
GLUE
(
X_PFX
,
scan_for_rerouted_irqs
)(
struct
kvmppc_xive
*
xive
,
struct
kvmppc_xive_vcpu
*
xc
)
{
unsigned
int
prio
;
/* For each priority that is now masked */
for
(
prio
=
xc
->
cppr
;
prio
<
KVMPPC_XIVE_Q_COUNT
;
prio
++
)
{
struct
xive_q
*
q
=
&
xc
->
queues
[
prio
];
struct
kvmppc_xive_irq_state
*
state
;
struct
kvmppc_xive_src_block
*
sb
;
u32
idx
,
toggle
,
entry
,
irq
,
hw_num
;
struct
xive_irq_data
*
xd
;
__be32
*
qpage
;
u16
src
;
idx
=
q
->
idx
;
toggle
=
q
->
toggle
;
qpage
=
READ_ONCE
(
q
->
qpage
);
if
(
!
qpage
)
continue
;
/* For each interrupt in the queue */
for
(;;)
{
entry
=
be32_to_cpup
(
qpage
+
idx
);
/* No more ? */
if
((
entry
>>
31
)
==
toggle
)
break
;
irq
=
entry
&
0x7fffffff
;
/* Skip dummies and IPIs */
if
(
irq
==
XICS_DUMMY
||
irq
==
XICS_IPI
)
goto
next
;
sb
=
kvmppc_xive_find_source
(
xive
,
irq
,
&
src
);
if
(
!
sb
)
goto
next
;
state
=
&
sb
->
irq_state
[
src
];
/* Has it been rerouted ? */
if
(
xc
->
server_num
==
state
->
act_server
)
goto
next
;
/*
* Allright, it *has* been re-routed, kill it from
* the queue.
*/
qpage
[
idx
]
=
cpu_to_be32
((
entry
&
0x80000000
)
|
XICS_DUMMY
);
/* Find the HW interrupt */
kvmppc_xive_select_irq
(
state
,
&
hw_num
,
&
xd
);
/* If it's not an LSI, set PQ to 11 the EOI will force a resend */
if
(
!
(
xd
->
flags
&
XIVE_IRQ_FLAG_LSI
))
GLUE
(
X_PFX
,
esb_load
)(
xd
,
XIVE_ESB_SET_PQ_11
);
/* EOI the source */
GLUE
(
X_PFX
,
source_eoi
)(
hw_num
,
xd
);
next:
idx
=
(
idx
+
1
)
&
q
->
msk
;
if
(
idx
==
0
)
toggle
^=
1
;
}
}
}
X_STATIC
int
GLUE
(
X_PFX
,
h_cppr
)(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
cppr
)
{
struct
kvmppc_xive_vcpu
*
xc
=
vcpu
->
arch
.
xive_vcpu
;
struct
kvmppc_xive
*
xive
=
vcpu
->
kvm
->
arch
.
xive
;
u8
old_cppr
;
pr_devel
(
"H_CPPR(cppr=%ld)
\n
"
,
cppr
);
xc
->
GLUE
(
X_STAT_PFX
,
h_cppr
)
++
;
/* Map CPPR */
cppr
=
xive_prio_from_guest
(
cppr
);
/* Remember old and update SW state */
old_cppr
=
xc
->
cppr
;
xc
->
cppr
=
cppr
;
/*
* Order the above update of xc->cppr with the subsequent
* read of xc->mfrr inside push_pending_to_hw()
*/
smp_mb
();
if
(
cppr
>
old_cppr
)
{
/*
* We are masking less, we need to look for pending things
* to deliver and set VP pending bits accordingly to trigger
* a new interrupt otherwise we might miss MFRR changes for
* which we have optimized out sending an IPI signal.
*/
GLUE
(
X_PFX
,
push_pending_to_hw
)(
xc
);
}
else
{
/*
* We are masking more, we need to check the queue for any
* interrupt that has been routed to another CPU, take
* it out (replace it with the dummy) and retrigger it.
*
* This is necessary since those interrupts may otherwise
* never be processed, at least not until this CPU restores
* its CPPR.
*
* This is in theory racy vs. HW adding new interrupts to
* the queue. In practice this works because the interesting
* cases are when the guest has done a set_xive() to move the
* interrupt away, which flushes the xive, followed by the
* target CPU doing a H_CPPR. So any new interrupt coming into
* the queue must still be routed to us and isn't a source
* of concern.
*/
GLUE
(
X_PFX
,
scan_for_rerouted_irqs
)(
xive
,
xc
);
}
/* Apply new CPPR */
xc
->
hw_cppr
=
cppr
;
__x_writeb
(
cppr
,
__x_tima
+
TM_QW1_OS
+
TM_CPPR
);
return
H_SUCCESS
;
}
X_STATIC
int
GLUE
(
X_PFX
,
h_eoi
)(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
xirr
)
{
struct
kvmppc_xive
*
xive
=
vcpu
->
kvm
->
arch
.
xive
;
struct
kvmppc_xive_src_block
*
sb
;
struct
kvmppc_xive_irq_state
*
state
;
struct
kvmppc_xive_vcpu
*
xc
=
vcpu
->
arch
.
xive_vcpu
;
struct
xive_irq_data
*
xd
;
u8
new_cppr
=
xirr
>>
24
;
u32
irq
=
xirr
&
0x00ffffff
,
hw_num
;
u16
src
;
int
rc
=
0
;
pr_devel
(
"H_EOI(xirr=%08lx)
\n
"
,
xirr
);
xc
->
GLUE
(
X_STAT_PFX
,
h_eoi
)
++
;
xc
->
cppr
=
xive_prio_from_guest
(
new_cppr
);
/*
* IPIs are synthetized from MFRR and thus don't need
* any special EOI handling. The underlying interrupt
* used to signal MFRR changes is EOId when fetched from
* the queue.
*/
if
(
irq
==
XICS_IPI
||
irq
==
0
)
{
/*
* This barrier orders the setting of xc->cppr vs.
* subsquent test of xc->mfrr done inside
* scan_interrupts and push_pending_to_hw
*/
smp_mb
();
goto
bail
;
}
/* Find interrupt source */
sb
=
kvmppc_xive_find_source
(
xive
,
irq
,
&
src
);
if
(
!
sb
)
{
pr_devel
(
" source not found !
\n
"
);
rc
=
H_PARAMETER
;
/* Same as above */
smp_mb
();
goto
bail
;
}
state
=
&
sb
->
irq_state
[
src
];
kvmppc_xive_select_irq
(
state
,
&
hw_num
,
&
xd
);
state
->
in_eoi
=
true
;
/*
* This barrier orders both setting of in_eoi above vs,
* subsequent test of guest_priority, and the setting
* of xc->cppr vs. subsquent test of xc->mfrr done inside
* scan_interrupts and push_pending_to_hw
*/
smp_mb
();
again:
if
(
state
->
guest_priority
==
MASKED
)
{
arch_spin_lock
(
&
sb
->
lock
);
if
(
state
->
guest_priority
!=
MASKED
)
{
arch_spin_unlock
(
&
sb
->
lock
);
goto
again
;
}
pr_devel
(
" EOI on saved P...
\n
"
);
/* Clear old_p, that will cause unmask to perform an EOI */
state
->
old_p
=
false
;
arch_spin_unlock
(
&
sb
->
lock
);
}
else
{
pr_devel
(
" EOI on source...
\n
"
);
/* Perform EOI on the source */
GLUE
(
X_PFX
,
source_eoi
)(
hw_num
,
xd
);
/* If it's an emulated LSI, check level and resend */
if
(
state
->
lsi
&&
state
->
asserted
)
__x_writeq
(
0
,
__x_trig_page
(
xd
));
}
/*
* This barrier orders the above guest_priority check
* and spin_lock/unlock with clearing in_eoi below.
*
* It also has to be a full mb() as it must ensure
* the MMIOs done in source_eoi() are completed before
* state->in_eoi is visible.
*/
mb
();
state
->
in_eoi
=
false
;
bail:
/* Re-evaluate pending IRQs and update HW */
GLUE
(
X_PFX
,
scan_interrupts
)(
xc
,
xc
->
pending
,
scan_eoi
);
GLUE
(
X_PFX
,
push_pending_to_hw
)(
xc
);
pr_devel
(
" after scan pending=%02x
\n
"
,
xc
->
pending
);
/* Apply new CPPR */
xc
->
hw_cppr
=
xc
->
cppr
;
__x_writeb
(
xc
->
cppr
,
__x_tima
+
TM_QW1_OS
+
TM_CPPR
);
return
rc
;
}
X_STATIC
int
GLUE
(
X_PFX
,
h_ipi
)(
struct
kvm_vcpu
*
vcpu
,
unsigned
long
server
,
unsigned
long
mfrr
)
{
struct
kvmppc_xive_vcpu
*
xc
=
vcpu
->
arch
.
xive_vcpu
;
pr_devel
(
"H_IPI(server=%08lx,mfrr=%ld)
\n
"
,
server
,
mfrr
);
xc
->
GLUE
(
X_STAT_PFX
,
h_ipi
)
++
;
/* Find target */
vcpu
=
kvmppc_xive_find_server
(
vcpu
->
kvm
,
server
);
if
(
!
vcpu
)
return
H_PARAMETER
;
xc
=
vcpu
->
arch
.
xive_vcpu
;
/* Locklessly write over MFRR */
xc
->
mfrr
=
mfrr
;
/*
* The load of xc->cppr below and the subsequent MMIO store
* to the IPI must happen after the above mfrr update is
* globally visible so that:
*
* - Synchronize with another CPU doing an H_EOI or a H_CPPR
* updating xc->cppr then reading xc->mfrr.
*
* - The target of the IPI sees the xc->mfrr update
*/
mb
();
/* Shoot the IPI if most favored than target cppr */
if
(
mfrr
<
xc
->
cppr
)
__x_writeq
(
0
,
__x_trig_page
(
&
xc
->
vp_ipi_data
));
return
H_SUCCESS
;
}
arch/powerpc/kvm/e500mc.c
浏览文件 @
b104e41c
...
...
@@ -399,7 +399,6 @@ static int __init kvmppc_e500mc_init(void)
* allocator.
*/
kvmppc_init_lpid
(
KVMPPC_NR_LPIDS
/
threads_per_core
);
kvmppc_claim_lpid
(
0
);
/* host */
r
=
kvm_init
(
NULL
,
sizeof
(
struct
kvmppc_vcpu_e500
),
0
,
THIS_MODULE
);
if
(
r
)
...
...
arch/powerpc/kvm/powerpc.c
浏览文件 @
b104e41c
...
...
@@ -2497,41 +2497,37 @@ long kvm_arch_vm_ioctl(struct file *filp,
return
r
;
}
static
unsigned
long
lpid_inuse
[
BITS_TO_LONGS
(
KVMPPC_NR_LPIDS
)]
;
static
DEFINE_IDA
(
lpid_inuse
)
;
static
unsigned
long
nr_lpids
;
long
kvmppc_alloc_lpid
(
void
)
{
long
lpid
;
int
lpid
;
do
{
lpid
=
find_first_zero_bit
(
lpid_inuse
,
KVMPPC_NR_LPIDS
);
if
(
lpid
>=
nr_lpids
)
{
/* The host LPID must always be 0 (allocation starts at 1) */
lpid
=
ida_alloc_range
(
&
lpid_inuse
,
1
,
nr_lpids
-
1
,
GFP_KERNEL
);
if
(
lpid
<
0
)
{
if
(
lpid
==
-
ENOMEM
)
pr_err
(
"%s: Out of memory
\n
"
,
__func__
);
else
pr_err
(
"%s: No LPIDs free
\n
"
,
__func__
);
return
-
ENOMEM
;
}
}
while
(
test_and_set_bit
(
lpid
,
lpid_inuse
));
return
lpid
;
}
EXPORT_SYMBOL_GPL
(
kvmppc_alloc_lpid
);
void
kvmppc_claim_lpid
(
long
lpid
)
{
set_bit
(
lpid
,
lpid_inuse
);
}
EXPORT_SYMBOL_GPL
(
kvmppc_claim_lpid
);
void
kvmppc_free_lpid
(
long
lpid
)
{
clear_bit
(
lpid
,
lpid_inuse
);
ida_free
(
&
lpid_inuse
,
lpid
);
}
EXPORT_SYMBOL_GPL
(
kvmppc_free_lpid
);
/* nr_lpids_param includes the host LPID */
void
kvmppc_init_lpid
(
unsigned
long
nr_lpids_param
)
{
nr_lpids
=
min_t
(
unsigned
long
,
KVMPPC_NR_LPIDS
,
nr_lpids_param
);
memset
(
lpid_inuse
,
0
,
sizeof
(
lpid_inuse
));
nr_lpids
=
nr_lpids_param
;
}
EXPORT_SYMBOL_GPL
(
kvmppc_init_lpid
);
...
...
arch/powerpc/kvm/trace_hv.h
浏览文件 @
b104e41c
...
...
@@ -409,9 +409,9 @@ TRACE_EVENT(kvmppc_run_core,
);
TRACE_EVENT
(
kvmppc_vcore_blocked
,
TP_PROTO
(
struct
kvm
ppc_vcore
*
vc
,
int
where
),
TP_PROTO
(
struct
kvm
_vcpu
*
vcpu
,
int
where
),
TP_ARGS
(
vc
,
where
),
TP_ARGS
(
vc
pu
,
where
),
TP_STRUCT__entry
(
__field
(
int
,
n_runnable
)
...
...
@@ -421,8 +421,8 @@ TRACE_EVENT(kvmppc_vcore_blocked,
),
TP_fast_assign
(
__entry
->
runner_vcpu
=
vc
->
runner
->
vcpu_id
;
__entry
->
n_runnable
=
vc
->
n_runnable
;
__entry
->
runner_vcpu
=
vc
pu
->
vcpu_id
;
__entry
->
n_runnable
=
vc
pu
->
arch
.
vcore
->
n_runnable
;
__entry
->
where
=
where
;
__entry
->
tgid
=
current
->
tgid
;
),
...
...
arch/powerpc/mm/book3s64/iommu_api.c
浏览文件 @
b104e41c
...
...
@@ -305,24 +305,6 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
}
EXPORT_SYMBOL_GPL
(
mm_iommu_lookup
);
struct
mm_iommu_table_group_mem_t
*
mm_iommu_lookup_rm
(
struct
mm_struct
*
mm
,
unsigned
long
ua
,
unsigned
long
size
)
{
struct
mm_iommu_table_group_mem_t
*
mem
,
*
ret
=
NULL
;
list_for_each_entry_lockless
(
mem
,
&
mm
->
context
.
iommu_group_mem_list
,
next
)
{
if
((
mem
->
ua
<=
ua
)
&&
(
ua
+
size
<=
mem
->
ua
+
(
mem
->
entries
<<
PAGE_SHIFT
)))
{
ret
=
mem
;
break
;
}
}
return
ret
;
}
struct
mm_iommu_table_group_mem_t
*
mm_iommu_get
(
struct
mm_struct
*
mm
,
unsigned
long
ua
,
unsigned
long
entries
)
{
...
...
@@ -369,56 +351,6 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
}
EXPORT_SYMBOL_GPL
(
mm_iommu_ua_to_hpa
);
long
mm_iommu_ua_to_hpa_rm
(
struct
mm_iommu_table_group_mem_t
*
mem
,
unsigned
long
ua
,
unsigned
int
pageshift
,
unsigned
long
*
hpa
)
{
const
long
entry
=
(
ua
-
mem
->
ua
)
>>
PAGE_SHIFT
;
unsigned
long
*
pa
;
if
(
entry
>=
mem
->
entries
)
return
-
EFAULT
;
if
(
pageshift
>
mem
->
pageshift
)
return
-
EFAULT
;
if
(
!
mem
->
hpas
)
{
*
hpa
=
mem
->
dev_hpa
+
(
ua
-
mem
->
ua
);
return
0
;
}
pa
=
(
void
*
)
vmalloc_to_phys
(
&
mem
->
hpas
[
entry
]);
if
(
!
pa
)
return
-
EFAULT
;
*
hpa
=
(
*
pa
&
MM_IOMMU_TABLE_GROUP_PAGE_MASK
)
|
(
ua
&
~
PAGE_MASK
);
return
0
;
}
extern
void
mm_iommu_ua_mark_dirty_rm
(
struct
mm_struct
*
mm
,
unsigned
long
ua
)
{
struct
mm_iommu_table_group_mem_t
*
mem
;
long
entry
;
void
*
va
;
unsigned
long
*
pa
;
mem
=
mm_iommu_lookup_rm
(
mm
,
ua
,
PAGE_SIZE
);
if
(
!
mem
)
return
;
if
(
mem
->
dev_hpa
!=
MM_IOMMU_TABLE_INVALID_HPA
)
return
;
entry
=
(
ua
-
mem
->
ua
)
>>
PAGE_SHIFT
;
va
=
&
mem
->
hpas
[
entry
];
pa
=
(
void
*
)
vmalloc_to_phys
(
va
);
if
(
!
pa
)
return
;
*
pa
|=
MM_IOMMU_TABLE_GROUP_PAGE_DIRTY
;
}
bool
mm_iommu_is_devmem
(
struct
mm_struct
*
mm
,
unsigned
long
hpa
,
unsigned
int
pageshift
,
unsigned
long
*
size
)
{
...
...
arch/powerpc/mm/init_64.c
浏览文件 @
b104e41c
...
...
@@ -372,6 +372,9 @@ void register_page_bootmem_memmap(unsigned long section_nr,
#ifdef CONFIG_PPC_BOOK3S_64
unsigned
int
mmu_lpid_bits
;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
EXPORT_SYMBOL_GPL
(
mmu_lpid_bits
);
#endif
unsigned
int
mmu_pid_bits
;
static
bool
disable_radix
=
!
IS_ENABLED
(
CONFIG_PPC_RADIX_MMU_DEFAULT
);
...
...
arch/powerpc/platforms/powernv/pci-ioda-tce.c
浏览文件 @
b104e41c
...
...
@@ -145,8 +145,7 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
#ifdef CONFIG_IOMMU_API
int
pnv_tce_xchg
(
struct
iommu_table
*
tbl
,
long
index
,
unsigned
long
*
hpa
,
enum
dma_data_direction
*
direction
,
bool
alloc
)
unsigned
long
*
hpa
,
enum
dma_data_direction
*
direction
)
{
u64
proto_tce
=
iommu_direction_to_tce_perm
(
*
direction
);
unsigned
long
newtce
=
*
hpa
|
proto_tce
,
oldtce
;
...
...
@@ -164,7 +163,7 @@ int pnv_tce_xchg(struct iommu_table *tbl, long index,
}
if
(
!
ptce
)
{
ptce
=
pnv_tce
(
tbl
,
false
,
idx
,
alloc
);
ptce
=
pnv_tce
(
tbl
,
false
,
idx
,
true
);
if
(
!
ptce
)
return
-
ENOMEM
;
}
...
...
arch/powerpc/platforms/powernv/pci-ioda.c
浏览文件 @
b104e41c
...
...
@@ -1268,22 +1268,20 @@ static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
return
false
;
}
static
inline
__be64
__iomem
*
pnv_ioda_get_inval_reg
(
struct
pnv_phb
*
phb
,
bool
real_mode
)
static
inline
__be64
__iomem
*
pnv_ioda_get_inval_reg
(
struct
pnv_phb
*
phb
)
{
return
real_mode
?
(
__be64
__iomem
*
)(
phb
->
regs_phys
+
0x210
)
:
(
phb
->
regs
+
0x210
);
return
phb
->
regs
+
0x210
;
}
static
void
pnv_pci_p7ioc_tce_invalidate
(
struct
iommu_table
*
tbl
,
unsigned
long
index
,
unsigned
long
npages
,
bool
rm
)
unsigned
long
index
,
unsigned
long
npages
)
{
struct
iommu_table_group_link
*
tgl
=
list_first_entry_or_null
(
&
tbl
->
it_group_list
,
struct
iommu_table_group_link
,
next
);
struct
pnv_ioda_pe
*
pe
=
container_of
(
tgl
->
table_group
,
struct
pnv_ioda_pe
,
table_group
);
__be64
__iomem
*
invalidate
=
pnv_ioda_get_inval_reg
(
pe
->
phb
,
rm
);
__be64
__iomem
*
invalidate
=
pnv_ioda_get_inval_reg
(
pe
->
phb
);
unsigned
long
start
,
end
,
inc
;
start
=
__pa
(((
__be64
*
)
tbl
->
it_base
)
+
index
-
tbl
->
it_offset
);
...
...
@@ -1298,11 +1296,7 @@ static void pnv_pci_p7ioc_tce_invalidate(struct iommu_table *tbl,
mb
();
/* Ensure above stores are visible */
while
(
start
<=
end
)
{
if
(
rm
)
__raw_rm_writeq_be
(
start
,
invalidate
);
else
__raw_writeq_be
(
start
,
invalidate
);
start
+=
inc
;
}
...
...
@@ -1321,7 +1315,7 @@ static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index,
attrs
);
if
(
!
ret
)
pnv_pci_p7ioc_tce_invalidate
(
tbl
,
index
,
npages
,
false
);
pnv_pci_p7ioc_tce_invalidate
(
tbl
,
index
,
npages
);
return
ret
;
}
...
...
@@ -1329,10 +1323,9 @@ static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index,
#ifdef CONFIG_IOMMU_API
/* Common for IODA1 and IODA2 */
static
int
pnv_ioda_tce_xchg_no_kill
(
struct
iommu_table
*
tbl
,
long
index
,
unsigned
long
*
hpa
,
enum
dma_data_direction
*
direction
,
bool
realmode
)
unsigned
long
*
hpa
,
enum
dma_data_direction
*
direction
)
{
return
pnv_tce_xchg
(
tbl
,
index
,
hpa
,
direction
,
!
realmode
);
return
pnv_tce_xchg
(
tbl
,
index
,
hpa
,
direction
);
}
#endif
...
...
@@ -1341,7 +1334,7 @@ static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index,
{
pnv_tce_free
(
tbl
,
index
,
npages
);
pnv_pci_p7ioc_tce_invalidate
(
tbl
,
index
,
npages
,
false
);
pnv_pci_p7ioc_tce_invalidate
(
tbl
,
index
,
npages
);
}
static
struct
iommu_table_ops
pnv_ioda1_iommu_ops
=
{
...
...
@@ -1362,18 +1355,18 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = {
static
inline
void
pnv_pci_phb3_tce_invalidate_pe
(
struct
pnv_ioda_pe
*
pe
)
{
/* 01xb - invalidate TCEs that match the specified PE# */
__be64
__iomem
*
invalidate
=
pnv_ioda_get_inval_reg
(
pe
->
phb
,
false
);
__be64
__iomem
*
invalidate
=
pnv_ioda_get_inval_reg
(
pe
->
phb
);
unsigned
long
val
=
PHB3_TCE_KILL_INVAL_PE
|
(
pe
->
pe_number
&
0xFF
);
mb
();
/* Ensure above stores are visible */
__raw_writeq_be
(
val
,
invalidate
);
}
static
void
pnv_pci_phb3_tce_invalidate
(
struct
pnv_ioda_pe
*
pe
,
bool
rm
,
static
void
pnv_pci_phb3_tce_invalidate
(
struct
pnv_ioda_pe
*
pe
,
unsigned
shift
,
unsigned
long
index
,
unsigned
long
npages
)
{
__be64
__iomem
*
invalidate
=
pnv_ioda_get_inval_reg
(
pe
->
phb
,
rm
);
__be64
__iomem
*
invalidate
=
pnv_ioda_get_inval_reg
(
pe
->
phb
);
unsigned
long
start
,
end
,
inc
;
/* We'll invalidate DMA address in PE scope */
...
...
@@ -1388,9 +1381,6 @@ static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe, bool rm,
mb
();
while
(
start
<=
end
)
{
if
(
rm
)
__raw_rm_writeq_be
(
start
,
invalidate
);
else
__raw_writeq_be
(
start
,
invalidate
);
start
+=
inc
;
}
...
...
@@ -1408,7 +1398,7 @@ static inline void pnv_pci_ioda2_tce_invalidate_pe(struct pnv_ioda_pe *pe)
}
static
void
pnv_pci_ioda2_tce_invalidate
(
struct
iommu_table
*
tbl
,
unsigned
long
index
,
unsigned
long
npages
,
bool
rm
)
unsigned
long
index
,
unsigned
long
npages
)
{
struct
iommu_table_group_link
*
tgl
;
...
...
@@ -1419,7 +1409,7 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
unsigned
int
shift
=
tbl
->
it_page_shift
;
if
(
phb
->
model
==
PNV_PHB_MODEL_PHB3
&&
phb
->
regs
)
pnv_pci_phb3_tce_invalidate
(
pe
,
rm
,
shift
,
pnv_pci_phb3_tce_invalidate
(
pe
,
shift
,
index
,
npages
);
else
opal_pci_tce_kill
(
phb
->
opal_id
,
...
...
@@ -1438,7 +1428,7 @@ static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
attrs
);
if
(
!
ret
)
pnv_pci_ioda2_tce_invalidate
(
tbl
,
index
,
npages
,
false
);
pnv_pci_ioda2_tce_invalidate
(
tbl
,
index
,
npages
);
return
ret
;
}
...
...
@@ -1448,7 +1438,7 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
{
pnv_tce_free
(
tbl
,
index
,
npages
);
pnv_pci_ioda2_tce_invalidate
(
tbl
,
index
,
npages
,
false
);
pnv_pci_ioda2_tce_invalidate
(
tbl
,
index
,
npages
);
}
static
struct
iommu_table_ops
pnv_ioda2_iommu_ops
=
{
...
...
@@ -2739,7 +2729,7 @@ static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe)
if
(
rc
!=
OPAL_SUCCESS
)
return
;
pnv_pci_p7ioc_tce_invalidate
(
tbl
,
tbl
->
it_offset
,
tbl
->
it_size
,
false
);
pnv_pci_p7ioc_tce_invalidate
(
tbl
,
tbl
->
it_offset
,
tbl
->
it_size
);
if
(
pe
->
table_group
.
group
)
{
iommu_group_put
(
pe
->
table_group
.
group
);
WARN_ON
(
pe
->
table_group
.
group
);
...
...
arch/powerpc/platforms/powernv/pci.h
浏览文件 @
b104e41c
...
...
@@ -311,8 +311,7 @@ extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
unsigned
long
attrs
);
extern
void
pnv_tce_free
(
struct
iommu_table
*
tbl
,
long
index
,
long
npages
);
extern
int
pnv_tce_xchg
(
struct
iommu_table
*
tbl
,
long
index
,
unsigned
long
*
hpa
,
enum
dma_data_direction
*
direction
,
bool
alloc
);
unsigned
long
*
hpa
,
enum
dma_data_direction
*
direction
);
extern
__be64
*
pnv_tce_useraddrptr
(
struct
iommu_table
*
tbl
,
long
index
,
bool
alloc
);
extern
unsigned
long
pnv_tce_get
(
struct
iommu_table
*
tbl
,
long
index
);
...
...
arch/powerpc/platforms/pseries/iommu.c
浏览文件 @
b104e41c
...
...
@@ -666,8 +666,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
#ifdef CONFIG_IOMMU_API
static
int
tce_exchange_pseries
(
struct
iommu_table
*
tbl
,
long
index
,
unsigned
long
*
tce
,
enum
dma_data_direction
*
direction
,
bool
realmode
)
long
*
tce
,
enum
dma_data_direction
*
direction
)
{
long
rc
;
unsigned
long
ioba
=
(
unsigned
long
)
index
<<
tbl
->
it_page_shift
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录