Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
openeuler
Kernel
提交
a96223f1
K
Kernel
项目概览
openeuler
/
Kernel
大约 1 年 前同步成功
通知
5
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
K
Kernel
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
a96223f1
编写于
2月 03, 2018
作者:
T
Thomas Gleixner
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'msr-bitmaps' of
git://git.kernel.org/pub/scm/virt/kvm/kvm
into x86/pti
Pull the KVM prerequisites so the IBPB patches apply.
上级
af189c95
904e14fb
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
186 addition
and
254 deletion
+186
-254
arch/x86/kvm/vmx.c
arch/x86/kvm/vmx.c
+186
-254
未找到文件。
arch/x86/kvm/vmx.c
浏览文件 @
a96223f1
...
...
@@ -112,6 +112,14 @@ static u64 __read_mostly host_xss;
static
bool
__read_mostly
enable_pml
=
1
;
module_param_named
(
pml
,
enable_pml
,
bool
,
S_IRUGO
);
#define MSR_TYPE_R 1
#define MSR_TYPE_W 2
#define MSR_TYPE_RW 3
#define MSR_BITMAP_MODE_X2APIC 1
#define MSR_BITMAP_MODE_X2APIC_APICV 2
#define MSR_BITMAP_MODE_LM 4
#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
/* Guest_tsc -> host_tsc conversion requires 64-bit division. */
...
...
@@ -186,7 +194,6 @@ module_param(ple_window_max, int, S_IRUGO);
extern
const
ulong
vmx_return
;
#define NR_AUTOLOAD_MSRS 8
#define VMCS02_POOL_SIZE 1
struct
vmcs
{
u32
revision_id
;
...
...
@@ -211,6 +218,7 @@ struct loaded_vmcs {
int
soft_vnmi_blocked
;
ktime_t
entry_time
;
s64
vnmi_blocked_time
;
unsigned
long
*
msr_bitmap
;
struct
list_head
loaded_vmcss_on_cpu_link
;
};
...
...
@@ -227,7 +235,7 @@ struct shared_msr_entry {
* stored in guest memory specified by VMPTRLD, but is opaque to the guest,
* which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
* More than one of these structures may exist, if L1 runs multiple L2 guests.
* nested_vmx_run() will use the data here to build
a
vmcs02: a VMCS for the
* nested_vmx_run() will use the data here to build
the
vmcs02: a VMCS for the
* underlying hardware which will be used to run L2.
* This structure is packed to ensure that its layout is identical across
* machines (necessary for live migration).
...
...
@@ -410,13 +418,6 @@ struct __packed vmcs12 {
*/
#define VMCS12_SIZE 0x1000
/* Used to remember the last vmcs02 used for some recently used vmcs12s */
struct
vmcs02_list
{
struct
list_head
list
;
gpa_t
vmptr
;
struct
loaded_vmcs
vmcs02
;
};
/*
* The nested_vmx structure is part of vcpu_vmx, and holds information we need
* for correct emulation of VMX (i.e., nested VMX) on this vcpu.
...
...
@@ -441,15 +442,15 @@ struct nested_vmx {
*/
bool
sync_shadow_vmcs
;
/* vmcs02_list cache of VMCSs recently used to run L2 guests */
struct
list_head
vmcs02_pool
;
int
vmcs02_num
;
bool
change_vmcs01_virtual_x2apic_mode
;
/* L2 must run next, and mustn't decide to exit to L1. */
bool
nested_run_pending
;
struct
loaded_vmcs
vmcs02
;
/*
* Guest pages referred to in
vmcs02 with host-physical pointers, so
* we must keep them pinned while L2 runs.
* Guest pages referred to in
the vmcs02 with host-physical
*
pointers, so
we must keep them pinned while L2 runs.
*/
struct
page
*
apic_access_page
;
struct
page
*
virtual_apic_page
;
...
...
@@ -458,8 +459,6 @@ struct nested_vmx {
bool
pi_pending
;
u16
posted_intr_nv
;
unsigned
long
*
msr_bitmap
;
struct
hrtimer
preemption_timer
;
bool
preemption_timer_expired
;
...
...
@@ -582,6 +581,7 @@ struct vcpu_vmx {
struct
kvm_vcpu
vcpu
;
unsigned
long
host_rsp
;
u8
fail
;
u8
msr_bitmap_mode
;
u32
exit_intr_info
;
u32
idt_vectoring_info
;
ulong
rflags
;
...
...
@@ -933,6 +933,7 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
static
void
vmx_set_nmi_mask
(
struct
kvm_vcpu
*
vcpu
,
bool
masked
);
static
bool
nested_vmx_is_page_fault_vmexit
(
struct
vmcs12
*
vmcs12
,
u16
error_code
);
static
void
vmx_update_msr_bitmap
(
struct
kvm_vcpu
*
vcpu
);
static
DEFINE_PER_CPU
(
struct
vmcs
*
,
vmxarea
);
static
DEFINE_PER_CPU
(
struct
vmcs
*
,
current_vmcs
);
...
...
@@ -952,12 +953,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
enum
{
VMX_IO_BITMAP_A
,
VMX_IO_BITMAP_B
,
VMX_MSR_BITMAP_LEGACY
,
VMX_MSR_BITMAP_LONGMODE
,
VMX_MSR_BITMAP_LEGACY_X2APIC_APICV
,
VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV
,
VMX_MSR_BITMAP_LEGACY_X2APIC
,
VMX_MSR_BITMAP_LONGMODE_X2APIC
,
VMX_VMREAD_BITMAP
,
VMX_VMWRITE_BITMAP
,
VMX_BITMAP_NR
...
...
@@ -967,12 +962,6 @@ static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
#define vmx_io_bitmap_a (vmx_bitmap[VMX_IO_BITMAP_A])
#define vmx_io_bitmap_b (vmx_bitmap[VMX_IO_BITMAP_B])
#define vmx_msr_bitmap_legacy (vmx_bitmap[VMX_MSR_BITMAP_LEGACY])
#define vmx_msr_bitmap_longmode (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE])
#define vmx_msr_bitmap_legacy_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC_APICV])
#define vmx_msr_bitmap_longmode_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV])
#define vmx_msr_bitmap_legacy_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC])
#define vmx_msr_bitmap_longmode_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC])
#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
...
...
@@ -2570,36 +2559,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
vmx
->
guest_msrs
[
from
]
=
tmp
;
}
static
void
vmx_set_msr_bitmap
(
struct
kvm_vcpu
*
vcpu
)
{
unsigned
long
*
msr_bitmap
;
if
(
is_guest_mode
(
vcpu
))
msr_bitmap
=
to_vmx
(
vcpu
)
->
nested
.
msr_bitmap
;
else
if
(
cpu_has_secondary_exec_ctrls
()
&&
(
vmcs_read32
(
SECONDARY_VM_EXEC_CONTROL
)
&
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE
))
{
if
(
enable_apicv
&&
kvm_vcpu_apicv_active
(
vcpu
))
{
if
(
is_long_mode
(
vcpu
))
msr_bitmap
=
vmx_msr_bitmap_longmode_x2apic_apicv
;
else
msr_bitmap
=
vmx_msr_bitmap_legacy_x2apic_apicv
;
}
else
{
if
(
is_long_mode
(
vcpu
))
msr_bitmap
=
vmx_msr_bitmap_longmode_x2apic
;
else
msr_bitmap
=
vmx_msr_bitmap_legacy_x2apic
;
}
}
else
{
if
(
is_long_mode
(
vcpu
))
msr_bitmap
=
vmx_msr_bitmap_longmode
;
else
msr_bitmap
=
vmx_msr_bitmap_legacy
;
}
vmcs_write64
(
MSR_BITMAP
,
__pa
(
msr_bitmap
));
}
/*
* Set up the vmcs to automatically save and restore system
* msrs. Don't touch the 64-bit msrs if the guest is in legacy
...
...
@@ -2640,7 +2599,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
vmx
->
save_nmsrs
=
save_nmsrs
;
if
(
cpu_has_vmx_msr_bitmap
())
vmx_
set
_msr_bitmap
(
&
vmx
->
vcpu
);
vmx_
update
_msr_bitmap
(
&
vmx
->
vcpu
);
}
/*
...
...
@@ -3835,11 +3794,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu)
return
vmcs
;
}
static
struct
vmcs
*
alloc_vmcs
(
void
)
{
return
alloc_vmcs_cpu
(
raw_smp_processor_id
());
}
static
void
free_vmcs
(
struct
vmcs
*
vmcs
)
{
free_pages
((
unsigned
long
)
vmcs
,
vmcs_config
.
order
);
...
...
@@ -3855,9 +3809,38 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
loaded_vmcs_clear
(
loaded_vmcs
);
free_vmcs
(
loaded_vmcs
->
vmcs
);
loaded_vmcs
->
vmcs
=
NULL
;
if
(
loaded_vmcs
->
msr_bitmap
)
free_page
((
unsigned
long
)
loaded_vmcs
->
msr_bitmap
);
WARN_ON
(
loaded_vmcs
->
shadow_vmcs
!=
NULL
);
}
static
struct
vmcs
*
alloc_vmcs
(
void
)
{
return
alloc_vmcs_cpu
(
raw_smp_processor_id
());
}
static
int
alloc_loaded_vmcs
(
struct
loaded_vmcs
*
loaded_vmcs
)
{
loaded_vmcs
->
vmcs
=
alloc_vmcs
();
if
(
!
loaded_vmcs
->
vmcs
)
return
-
ENOMEM
;
loaded_vmcs
->
shadow_vmcs
=
NULL
;
loaded_vmcs_init
(
loaded_vmcs
);
if
(
cpu_has_vmx_msr_bitmap
())
{
loaded_vmcs
->
msr_bitmap
=
(
unsigned
long
*
)
__get_free_page
(
GFP_KERNEL
);
if
(
!
loaded_vmcs
->
msr_bitmap
)
goto
out_vmcs
;
memset
(
loaded_vmcs
->
msr_bitmap
,
0xff
,
PAGE_SIZE
);
}
return
0
;
out_vmcs:
free_loaded_vmcs
(
loaded_vmcs
);
return
-
ENOMEM
;
}
static
void
free_kvm_area
(
void
)
{
int
cpu
;
...
...
@@ -4916,10 +4899,8 @@ static void free_vpid(int vpid)
spin_unlock
(
&
vmx_vpid_lock
);
}
#define MSR_TYPE_R 1
#define MSR_TYPE_W 2
static
void
__vmx_disable_intercept_for_msr
(
unsigned
long
*
msr_bitmap
,
u32
msr
,
int
type
)
static
void
__always_inline
vmx_disable_intercept_for_msr
(
unsigned
long
*
msr_bitmap
,
u32
msr
,
int
type
)
{
int
f
=
sizeof
(
unsigned
long
);
...
...
@@ -4953,6 +4934,50 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
}
}
static
void
__always_inline
vmx_enable_intercept_for_msr
(
unsigned
long
*
msr_bitmap
,
u32
msr
,
int
type
)
{
int
f
=
sizeof
(
unsigned
long
);
if
(
!
cpu_has_vmx_msr_bitmap
())
return
;
/*
* See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
* have the write-low and read-high bitmap offsets the wrong way round.
* We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
*/
if
(
msr
<=
0x1fff
)
{
if
(
type
&
MSR_TYPE_R
)
/* read-low */
__set_bit
(
msr
,
msr_bitmap
+
0x000
/
f
);
if
(
type
&
MSR_TYPE_W
)
/* write-low */
__set_bit
(
msr
,
msr_bitmap
+
0x800
/
f
);
}
else
if
((
msr
>=
0xc0000000
)
&&
(
msr
<=
0xc0001fff
))
{
msr
&=
0x1fff
;
if
(
type
&
MSR_TYPE_R
)
/* read-high */
__set_bit
(
msr
,
msr_bitmap
+
0x400
/
f
);
if
(
type
&
MSR_TYPE_W
)
/* write-high */
__set_bit
(
msr
,
msr_bitmap
+
0xc00
/
f
);
}
}
static
void
__always_inline
vmx_set_intercept_for_msr
(
unsigned
long
*
msr_bitmap
,
u32
msr
,
int
type
,
bool
value
)
{
if
(
value
)
vmx_enable_intercept_for_msr
(
msr_bitmap
,
msr
,
type
);
else
vmx_disable_intercept_for_msr
(
msr_bitmap
,
msr
,
type
);
}
/*
* If a msr is allowed by L0, we should check whether it is allowed by L1.
* The corresponding bit will be cleared unless both of L0 and L1 allow it.
...
...
@@ -4999,30 +5024,70 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
}
}
static
void
vmx_disable_intercept_for_msr
(
u32
msr
,
bool
longmode_only
)
static
u8
vmx_msr_bitmap_mode
(
struct
kvm_vcpu
*
vcpu
)
{
if
(
!
longmode_only
)
__vmx_disable_intercept_for_msr
(
vmx_msr_bitmap_legacy
,
msr
,
MSR_TYPE_R
|
MSR_TYPE_W
);
__vmx_disable_intercept_for_msr
(
vmx_msr_bitmap_longmode
,
msr
,
MSR_TYPE_R
|
MSR_TYPE_W
);
u8
mode
=
0
;
if
(
cpu_has_secondary_exec_ctrls
()
&&
(
vmcs_read32
(
SECONDARY_VM_EXEC_CONTROL
)
&
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE
))
{
mode
|=
MSR_BITMAP_MODE_X2APIC
;
if
(
enable_apicv
&&
kvm_vcpu_apicv_active
(
vcpu
))
mode
|=
MSR_BITMAP_MODE_X2APIC_APICV
;
}
if
(
is_long_mode
(
vcpu
))
mode
|=
MSR_BITMAP_MODE_LM
;
return
mode
;
}
static
void
vmx_disable_intercept_msr_x2apic
(
u32
msr
,
int
type
,
bool
apicv_active
)
#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
static
void
vmx_update_msr_bitmap_x2apic
(
unsigned
long
*
msr_bitmap
,
u8
mode
)
{
if
(
apicv_active
)
{
__vmx_disable_intercept_for_msr
(
vmx_msr_bitmap_legacy_x2apic_apicv
,
msr
,
type
);
__vmx_disable_intercept_for_msr
(
vmx_msr_bitmap_longmode_x2apic_apicv
,
msr
,
type
);
}
else
{
__vmx_disable_intercept_for_msr
(
vmx_msr_bitmap_legacy_x2apic
,
msr
,
type
);
__vmx_disable_intercept_for_msr
(
vmx_msr_bitmap_longmode_x2apic
,
msr
,
type
);
int
msr
;
for
(
msr
=
0x800
;
msr
<=
0x8ff
;
msr
+=
BITS_PER_LONG
)
{
unsigned
word
=
msr
/
BITS_PER_LONG
;
msr_bitmap
[
word
]
=
(
mode
&
MSR_BITMAP_MODE_X2APIC_APICV
)
?
0
:
~
0
;
msr_bitmap
[
word
+
(
0x800
/
sizeof
(
long
))]
=
~
0
;
}
if
(
mode
&
MSR_BITMAP_MODE_X2APIC
)
{
/*
* TPR reads and writes can be virtualized even if virtual interrupt
* delivery is not in use.
*/
vmx_disable_intercept_for_msr
(
msr_bitmap
,
X2APIC_MSR
(
APIC_TASKPRI
),
MSR_TYPE_RW
);
if
(
mode
&
MSR_BITMAP_MODE_X2APIC_APICV
)
{
vmx_enable_intercept_for_msr
(
msr_bitmap
,
X2APIC_MSR
(
APIC_TMCCT
),
MSR_TYPE_R
);
vmx_disable_intercept_for_msr
(
msr_bitmap
,
X2APIC_MSR
(
APIC_EOI
),
MSR_TYPE_W
);
vmx_disable_intercept_for_msr
(
msr_bitmap
,
X2APIC_MSR
(
APIC_SELF_IPI
),
MSR_TYPE_W
);
}
}
}
static
void
vmx_update_msr_bitmap
(
struct
kvm_vcpu
*
vcpu
)
{
struct
vcpu_vmx
*
vmx
=
to_vmx
(
vcpu
);
unsigned
long
*
msr_bitmap
=
vmx
->
vmcs01
.
msr_bitmap
;
u8
mode
=
vmx_msr_bitmap_mode
(
vcpu
);
u8
changed
=
mode
^
vmx
->
msr_bitmap_mode
;
if
(
!
changed
)
return
;
vmx_set_intercept_for_msr
(
msr_bitmap
,
MSR_KERNEL_GS_BASE
,
MSR_TYPE_RW
,
!
(
mode
&
MSR_BITMAP_MODE_LM
));
if
(
changed
&
(
MSR_BITMAP_MODE_X2APIC
|
MSR_BITMAP_MODE_X2APIC_APICV
))
vmx_update_msr_bitmap_x2apic
(
msr_bitmap
,
mode
);
vmx
->
msr_bitmap_mode
=
mode
;
}
static
bool
vmx_get_enable_apicv
(
struct
kvm_vcpu
*
vcpu
)
{
return
enable_apicv
;
...
...
@@ -5272,7 +5337,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
}
if
(
cpu_has_vmx_msr_bitmap
())
vmx_
set
_msr_bitmap
(
vcpu
);
vmx_
update
_msr_bitmap
(
vcpu
);
}
static
u32
vmx_exec_control
(
struct
vcpu_vmx
*
vmx
)
...
...
@@ -5459,7 +5524,7 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write64
(
VMWRITE_BITMAP
,
__pa
(
vmx_vmwrite_bitmap
));
}
if
(
cpu_has_vmx_msr_bitmap
())
vmcs_write64
(
MSR_BITMAP
,
__pa
(
vmx
_msr_bitmap_legacy
));
vmcs_write64
(
MSR_BITMAP
,
__pa
(
vmx
->
vmcs01
.
msr_bitmap
));
vmcs_write64
(
VMCS_LINK_POINTER
,
-
1ull
);
/* 22.3.1.5 */
...
...
@@ -6742,7 +6807,7 @@ void vmx_enable_tdp(void)
static
__init
int
hardware_setup
(
void
)
{
int
r
=
-
ENOMEM
,
i
,
msr
;
int
r
=
-
ENOMEM
,
i
;
rdmsrl_safe
(
MSR_EFER
,
&
host_efer
);
...
...
@@ -6762,9 +6827,6 @@ static __init int hardware_setup(void)
memset
(
vmx_io_bitmap_b
,
0xff
,
PAGE_SIZE
);
memset
(
vmx_msr_bitmap_legacy
,
0xff
,
PAGE_SIZE
);
memset
(
vmx_msr_bitmap_longmode
,
0xff
,
PAGE_SIZE
);
if
(
setup_vmcs_config
(
&
vmcs_config
)
<
0
)
{
r
=
-
EIO
;
goto
out
;
...
...
@@ -6833,42 +6895,8 @@ static __init int hardware_setup(void)
kvm_tsc_scaling_ratio_frac_bits
=
48
;
}
vmx_disable_intercept_for_msr
(
MSR_FS_BASE
,
false
);
vmx_disable_intercept_for_msr
(
MSR_GS_BASE
,
false
);
vmx_disable_intercept_for_msr
(
MSR_KERNEL_GS_BASE
,
true
);
vmx_disable_intercept_for_msr
(
MSR_IA32_SYSENTER_CS
,
false
);
vmx_disable_intercept_for_msr
(
MSR_IA32_SYSENTER_ESP
,
false
);
vmx_disable_intercept_for_msr
(
MSR_IA32_SYSENTER_EIP
,
false
);
memcpy
(
vmx_msr_bitmap_legacy_x2apic_apicv
,
vmx_msr_bitmap_legacy
,
PAGE_SIZE
);
memcpy
(
vmx_msr_bitmap_longmode_x2apic_apicv
,
vmx_msr_bitmap_longmode
,
PAGE_SIZE
);
memcpy
(
vmx_msr_bitmap_legacy_x2apic
,
vmx_msr_bitmap_legacy
,
PAGE_SIZE
);
memcpy
(
vmx_msr_bitmap_longmode_x2apic
,
vmx_msr_bitmap_longmode
,
PAGE_SIZE
);
set_bit
(
0
,
vmx_vpid_bitmap
);
/* 0 is reserved for host */
for
(
msr
=
0x800
;
msr
<=
0x8ff
;
msr
++
)
{
if
(
msr
==
0x839
/* TMCCT */
)
continue
;
vmx_disable_intercept_msr_x2apic
(
msr
,
MSR_TYPE_R
,
true
);
}
/*
* TPR reads and writes can be virtualized even if virtual interrupt
* delivery is not in use.
*/
vmx_disable_intercept_msr_x2apic
(
0x808
,
MSR_TYPE_W
,
true
);
vmx_disable_intercept_msr_x2apic
(
0x808
,
MSR_TYPE_R
|
MSR_TYPE_W
,
false
);
/* EOI */
vmx_disable_intercept_msr_x2apic
(
0x80b
,
MSR_TYPE_W
,
true
);
/* SELF-IPI */
vmx_disable_intercept_msr_x2apic
(
0x83f
,
MSR_TYPE_W
,
true
);
if
(
enable_ept
)
vmx_enable_tdp
();
else
...
...
@@ -6971,94 +6999,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu)
return
handle_nop
(
vcpu
);
}
/*
* To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12.
* We could reuse a single VMCS for all the L2 guests, but we also want the
* option to allocate a separate vmcs02 for each separate loaded vmcs12 - this
* allows keeping them loaded on the processor, and in the future will allow
* optimizations where prepare_vmcs02 doesn't need to set all the fields on
* every entry if they never change.
* So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE
* (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first.
*
* The following functions allocate and free a vmcs02 in this pool.
*/
/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */
static
struct
loaded_vmcs
*
nested_get_current_vmcs02
(
struct
vcpu_vmx
*
vmx
)
{
struct
vmcs02_list
*
item
;
list_for_each_entry
(
item
,
&
vmx
->
nested
.
vmcs02_pool
,
list
)
if
(
item
->
vmptr
==
vmx
->
nested
.
current_vmptr
)
{
list_move
(
&
item
->
list
,
&
vmx
->
nested
.
vmcs02_pool
);
return
&
item
->
vmcs02
;
}
if
(
vmx
->
nested
.
vmcs02_num
>=
max
(
VMCS02_POOL_SIZE
,
1
))
{
/* Recycle the least recently used VMCS. */
item
=
list_last_entry
(
&
vmx
->
nested
.
vmcs02_pool
,
struct
vmcs02_list
,
list
);
item
->
vmptr
=
vmx
->
nested
.
current_vmptr
;
list_move
(
&
item
->
list
,
&
vmx
->
nested
.
vmcs02_pool
);
return
&
item
->
vmcs02
;
}
/* Create a new VMCS */
item
=
kzalloc
(
sizeof
(
struct
vmcs02_list
),
GFP_KERNEL
);
if
(
!
item
)
return
NULL
;
item
->
vmcs02
.
vmcs
=
alloc_vmcs
();
item
->
vmcs02
.
shadow_vmcs
=
NULL
;
if
(
!
item
->
vmcs02
.
vmcs
)
{
kfree
(
item
);
return
NULL
;
}
loaded_vmcs_init
(
&
item
->
vmcs02
);
item
->
vmptr
=
vmx
->
nested
.
current_vmptr
;
list_add
(
&
(
item
->
list
),
&
(
vmx
->
nested
.
vmcs02_pool
));
vmx
->
nested
.
vmcs02_num
++
;
return
&
item
->
vmcs02
;
}
/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */
static
void
nested_free_vmcs02
(
struct
vcpu_vmx
*
vmx
,
gpa_t
vmptr
)
{
struct
vmcs02_list
*
item
;
list_for_each_entry
(
item
,
&
vmx
->
nested
.
vmcs02_pool
,
list
)
if
(
item
->
vmptr
==
vmptr
)
{
free_loaded_vmcs
(
&
item
->
vmcs02
);
list_del
(
&
item
->
list
);
kfree
(
item
);
vmx
->
nested
.
vmcs02_num
--
;
return
;
}
}
/*
* Free all VMCSs saved for this vcpu, except the one pointed by
* vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs
* must be &vmx->vmcs01.
*/
static
void
nested_free_all_saved_vmcss
(
struct
vcpu_vmx
*
vmx
)
{
struct
vmcs02_list
*
item
,
*
n
;
WARN_ON
(
vmx
->
loaded_vmcs
!=
&
vmx
->
vmcs01
);
list_for_each_entry_safe
(
item
,
n
,
&
vmx
->
nested
.
vmcs02_pool
,
list
)
{
/*
* Something will leak if the above WARN triggers. Better than
* a use-after-free.
*/
if
(
vmx
->
loaded_vmcs
==
&
item
->
vmcs02
)
continue
;
free_loaded_vmcs
(
&
item
->
vmcs02
);
list_del
(
&
item
->
list
);
kfree
(
item
);
vmx
->
nested
.
vmcs02_num
--
;
}
}
/*
* The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
* set the success or error code of an emulated VMX instruction, as specified
...
...
@@ -7239,13 +7179,11 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
{
struct
vcpu_vmx
*
vmx
=
to_vmx
(
vcpu
);
struct
vmcs
*
shadow_vmcs
;
int
r
;
if
(
cpu_has_vmx_msr_bitmap
())
{
vmx
->
nested
.
msr_bitmap
=
(
unsigned
long
*
)
__get_free_page
(
GFP_KERNEL
);
if
(
!
vmx
->
nested
.
msr_bitmap
)
goto
out_msr_bitmap
;
}
r
=
alloc_loaded_vmcs
(
&
vmx
->
nested
.
vmcs02
);
if
(
r
<
0
)
goto
out_vmcs02
;
vmx
->
nested
.
cached_vmcs12
=
kmalloc
(
VMCS12_SIZE
,
GFP_KERNEL
);
if
(
!
vmx
->
nested
.
cached_vmcs12
)
...
...
@@ -7262,9 +7200,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
vmx
->
vmcs01
.
shadow_vmcs
=
shadow_vmcs
;
}
INIT_LIST_HEAD
(
&
(
vmx
->
nested
.
vmcs02_pool
));
vmx
->
nested
.
vmcs02_num
=
0
;
hrtimer_init
(
&
vmx
->
nested
.
preemption_timer
,
CLOCK_MONOTONIC
,
HRTIMER_MODE_REL_PINNED
);
vmx
->
nested
.
preemption_timer
.
function
=
vmx_preemption_timer_fn
;
...
...
@@ -7276,9 +7211,9 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
kfree
(
vmx
->
nested
.
cached_vmcs12
);
out_cached_vmcs12:
free_
page
((
unsigned
long
)
vmx
->
nested
.
msr_bitmap
);
free_
loaded_vmcs
(
&
vmx
->
nested
.
vmcs02
);
out_
msr_bitmap
:
out_
vmcs02
:
return
-
ENOMEM
;
}
...
...
@@ -7421,10 +7356,6 @@ static void free_nested(struct vcpu_vmx *vmx)
free_vpid
(
vmx
->
nested
.
vpid02
);
vmx
->
nested
.
posted_intr_nv
=
-
1
;
vmx
->
nested
.
current_vmptr
=
-
1ull
;
if
(
vmx
->
nested
.
msr_bitmap
)
{
free_page
((
unsigned
long
)
vmx
->
nested
.
msr_bitmap
);
vmx
->
nested
.
msr_bitmap
=
NULL
;
}
if
(
enable_shadow_vmcs
)
{
vmx_disable_shadow_vmcs
(
vmx
);
vmcs_clear
(
vmx
->
vmcs01
.
shadow_vmcs
);
...
...
@@ -7432,7 +7363,7 @@ static void free_nested(struct vcpu_vmx *vmx)
vmx
->
vmcs01
.
shadow_vmcs
=
NULL
;
}
kfree
(
vmx
->
nested
.
cached_vmcs12
);
/* Unpin physical memory we referred to in
current
vmcs02 */
/* Unpin physical memory we referred to in
the
vmcs02 */
if
(
vmx
->
nested
.
apic_access_page
)
{
kvm_release_page_dirty
(
vmx
->
nested
.
apic_access_page
);
vmx
->
nested
.
apic_access_page
=
NULL
;
...
...
@@ -7448,7 +7379,7 @@ static void free_nested(struct vcpu_vmx *vmx)
vmx
->
nested
.
pi_desc
=
NULL
;
}
nested_free_all_saved_vmcss
(
vmx
);
free_loaded_vmcs
(
&
vmx
->
nested
.
vmcs02
);
}
/* Emulate the VMXOFF instruction */
...
...
@@ -7491,8 +7422,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
vmptr
+
offsetof
(
struct
vmcs12
,
launch_state
),
&
zero
,
sizeof
(
zero
));
nested_free_vmcs02
(
vmx
,
vmptr
);
nested_vmx_succeed
(
vcpu
);
return
kvm_skip_emulated_instruction
(
vcpu
);
}
...
...
@@ -8404,10 +8333,11 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
/*
* The host physical addresses of some pages of guest memory
* are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU
* may write to these pages via their host physical address while
* L2 is running, bypassing any address-translation-based dirty
* tracking (e.g. EPT write protection).
* are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
* Page). The CPU may write to these pages via their host
* physical address while L2 is running, bypassing any
* address-translation-based dirty tracking (e.g. EPT write
* protection).
*
* Mark them dirty on every exit from L2 to prevent them from
* getting out of sync with dirty tracking.
...
...
@@ -8941,7 +8871,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
}
vmcs_write32
(
SECONDARY_VM_EXEC_CONTROL
,
sec_exec_control
);
vmx_
set
_msr_bitmap
(
vcpu
);
vmx_
update
_msr_bitmap
(
vcpu
);
}
static
void
vmx_set_apic_access_page_addr
(
struct
kvm_vcpu
*
vcpu
,
hpa_t
hpa
)
...
...
@@ -9602,6 +9532,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
{
int
err
;
struct
vcpu_vmx
*
vmx
=
kmem_cache_zalloc
(
kvm_vcpu_cache
,
GFP_KERNEL
);
unsigned
long
*
msr_bitmap
;
int
cpu
;
if
(
!
vmx
)
...
...
@@ -9634,13 +9565,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
if
(
!
vmx
->
guest_msrs
)
goto
free_pml
;
vmx
->
loaded_vmcs
=
&
vmx
->
vmcs01
;
vmx
->
loaded_vmcs
->
vmcs
=
alloc_vmcs
();
vmx
->
loaded_vmcs
->
shadow_vmcs
=
NULL
;
if
(
!
vmx
->
loaded_vmcs
->
vmcs
)
err
=
alloc_loaded_vmcs
(
&
vmx
->
vmcs01
);
if
(
err
<
0
)
goto
free_msrs
;
loaded_vmcs_init
(
vmx
->
loaded_vmcs
);
msr_bitmap
=
vmx
->
vmcs01
.
msr_bitmap
;
vmx_disable_intercept_for_msr
(
msr_bitmap
,
MSR_FS_BASE
,
MSR_TYPE_RW
);
vmx_disable_intercept_for_msr
(
msr_bitmap
,
MSR_GS_BASE
,
MSR_TYPE_RW
);
vmx_disable_intercept_for_msr
(
msr_bitmap
,
MSR_KERNEL_GS_BASE
,
MSR_TYPE_RW
);
vmx_disable_intercept_for_msr
(
msr_bitmap
,
MSR_IA32_SYSENTER_CS
,
MSR_TYPE_RW
);
vmx_disable_intercept_for_msr
(
msr_bitmap
,
MSR_IA32_SYSENTER_ESP
,
MSR_TYPE_RW
);
vmx_disable_intercept_for_msr
(
msr_bitmap
,
MSR_IA32_SYSENTER_EIP
,
MSR_TYPE_RW
);
vmx
->
msr_bitmap_mode
=
0
;
vmx
->
loaded_vmcs
=
&
vmx
->
vmcs01
;
cpu
=
get_cpu
();
vmx_vcpu_load
(
&
vmx
->
vcpu
,
cpu
);
vmx
->
vcpu
.
cpu
=
cpu
;
...
...
@@ -10103,7 +10041,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
int
msr
;
struct
page
*
page
;
unsigned
long
*
msr_bitmap_l1
;
unsigned
long
*
msr_bitmap_l0
=
to_vmx
(
vcpu
)
->
nested
.
msr_bitmap
;
unsigned
long
*
msr_bitmap_l0
=
to_vmx
(
vcpu
)
->
nested
.
vmcs02
.
msr_bitmap
;
/* This shortcut is ok because we support only x2APIC MSRs so far. */
if
(
!
nested_cpu_has_virt_x2apic_mode
(
vmcs12
))
...
...
@@ -10680,6 +10618,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if
(
kvm_has_tsc_control
)
decache_tsc_multiplier
(
vmx
);
if
(
cpu_has_vmx_msr_bitmap
())
vmcs_write64
(
MSR_BITMAP
,
__pa
(
vmx
->
nested
.
vmcs02
.
msr_bitmap
));
if
(
enable_vpid
)
{
/*
* There is no direct mapping between vpid02 and vpid12, the
...
...
@@ -10901,20 +10842,15 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
{
struct
vcpu_vmx
*
vmx
=
to_vmx
(
vcpu
);
struct
vmcs12
*
vmcs12
=
get_vmcs12
(
vcpu
);
struct
loaded_vmcs
*
vmcs02
;
u32
msr_entry_idx
;
u32
exit_qual
;
vmcs02
=
nested_get_current_vmcs02
(
vmx
);
if
(
!
vmcs02
)
return
-
ENOMEM
;
enter_guest_mode
(
vcpu
);
if
(
!
(
vmcs12
->
vm_entry_controls
&
VM_ENTRY_LOAD_DEBUG_CONTROLS
))
vmx
->
nested
.
vmcs01_debugctl
=
vmcs_read64
(
GUEST_IA32_DEBUGCTL
);
vmx_switch_vmcs
(
vcpu
,
vmcs02
);
vmx_switch_vmcs
(
vcpu
,
&
vmx
->
nested
.
vmcs02
);
vmx_segment_cache_clear
(
vmx
);
if
(
prepare_vmcs02
(
vcpu
,
vmcs12
,
from_vmentry
,
&
exit_qual
))
{
...
...
@@ -11483,7 +11419,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vmcs_write64
(
GUEST_IA32_DEBUGCTL
,
0
);
if
(
cpu_has_vmx_msr_bitmap
())
vmx_
set
_msr_bitmap
(
vcpu
);
vmx_
update
_msr_bitmap
(
vcpu
);
if
(
nested_vmx_load_msr
(
vcpu
,
vmcs12
->
vm_exit_msr_load_addr
,
vmcs12
->
vm_exit_msr_load_count
))
...
...
@@ -11532,10 +11468,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
vm_exit_controls_reset_shadow
(
vmx
);
vmx_segment_cache_clear
(
vmx
);
/* if no vmcs02 cache requested, remove the one we used */
if
(
VMCS02_POOL_SIZE
==
0
)
nested_free_vmcs02
(
vmx
,
vmx
->
nested
.
current_vmptr
);
/* Update any VMCS fields that might have changed while L2 ran */
vmcs_write32
(
VM_EXIT_MSR_LOAD_COUNT
,
vmx
->
msr_autoload
.
nr
);
vmcs_write32
(
VM_ENTRY_MSR_LOAD_COUNT
,
vmx
->
msr_autoload
.
nr
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录