提交 3370b69e 编写于 作者: L Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull second batch of kvm updates from Paolo Bonzini:
 "Four changes:

   - x86: work around two nasty cases where a benign exception occurs
     while another is being delivered.  The endless stream of exceptions
     causes an infinite loop in the processor, which not even NMIs or
     SMIs can interrupt; in the virt case, there is no possibility to
     exit to the host either.

   - x86: support for Skylake per-guest TSC rate.  Long supported by
     AMD, the patches mostly move things from there to common
     arch/x86/kvm/ code.

   - generic: remove local_irq_save/restore from the guest entry and
     exit paths when context tracking is enabled.  The patches are a few
     months old, but we discussed them again at kernel summit.  Andy
     will pick up from here and, in 4.5, try to remove it from the user
     entry/exit paths.

   - PPC: Two bug fixes, see merge commit 37028975 for details"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (21 commits)
  KVM: x86: rename update_db_bp_intercept to update_bp_intercept
  KVM: svm: unconditionally intercept #DB
  KVM: x86: work around infinite loop in microcode when #AC is delivered
  context_tracking: avoid irq_save/irq_restore on guest entry and exit
  context_tracking: remove duplicate enabled check
  KVM: VMX: Dump TSC multiplier in dump_vmcs()
  KVM: VMX: Use a scaled host TSC for guest readings of MSR_IA32_TSC
  KVM: VMX: Setup TSC scaling ratio when a vcpu is loaded
  KVM: VMX: Enable and initialize VMX TSC scaling
  KVM: x86: Use the correct vcpu's TSC rate to compute time scale
  KVM: x86: Move TSC scaling logic out of call-back read_l1_tsc()
  KVM: x86: Move TSC scaling logic out of call-back adjust_tsc_offset()
  KVM: x86: Replace call-back compute_tsc_offset() with a common function
  KVM: x86: Replace call-back set_tsc_khz() with a common function
  KVM: x86: Add a common TSC scaling function
  KVM: x86: Add a common TSC scaling ratio field in kvm_vcpu_arch
  KVM: x86: Collect information for setting TSC scaling ratio
  KVM: x86: declare a few variables as __read_mostly
  KVM: x86: merge handle_mmio_page_fault and handle_mmio_page_fault_common
  KVM: PPC: Book3S HV: Don't dynamically split core when already split
  ...
...@@ -2019,7 +2019,7 @@ static bool can_split_piggybacked_subcores(struct core_info *cip) ...@@ -2019,7 +2019,7 @@ static bool can_split_piggybacked_subcores(struct core_info *cip)
return false; return false;
n_subcores += (cip->subcore_threads[sub] - 1) >> 1; n_subcores += (cip->subcore_threads[sub] - 1) >> 1;
} }
if (n_subcores > 3 || large_sub < 0) if (large_sub < 0 || !subcore_config_ok(n_subcores + 1, 2))
return false; return false;
/* /*
......
...@@ -1749,7 +1749,8 @@ kvmppc_hdsi: ...@@ -1749,7 +1749,8 @@ kvmppc_hdsi:
beq 3f beq 3f
clrrdi r0, r4, 28 clrrdi r0, r4, 28
PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */ PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
bne 1f /* if no SLB entry found */ li r0, BOOK3S_INTERRUPT_DATA_SEGMENT
bne 7f /* if no SLB entry found */
4: std r4, VCPU_FAULT_DAR(r9) 4: std r4, VCPU_FAULT_DAR(r9)
stw r6, VCPU_FAULT_DSISR(r9) stw r6, VCPU_FAULT_DSISR(r9)
...@@ -1768,14 +1769,15 @@ kvmppc_hdsi: ...@@ -1768,14 +1769,15 @@ kvmppc_hdsi:
cmpdi r3, -2 /* MMIO emulation; need instr word */ cmpdi r3, -2 /* MMIO emulation; need instr word */
beq 2f beq 2f
/* Synthesize a DSI for the guest */ /* Synthesize a DSI (or DSegI) for the guest */
ld r4, VCPU_FAULT_DAR(r9) ld r4, VCPU_FAULT_DAR(r9)
mr r6, r3 mr r6, r3
1: mtspr SPRN_DAR, r4 1: li r0, BOOK3S_INTERRUPT_DATA_STORAGE
mtspr SPRN_DSISR, r6 mtspr SPRN_DSISR, r6
7: mtspr SPRN_DAR, r4
mtspr SPRN_SRR0, r10 mtspr SPRN_SRR0, r10
mtspr SPRN_SRR1, r11 mtspr SPRN_SRR1, r11
li r10, BOOK3S_INTERRUPT_DATA_STORAGE mr r10, r0
bl kvmppc_msr_interrupt bl kvmppc_msr_interrupt
fast_interrupt_c_return: fast_interrupt_c_return:
6: ld r7, VCPU_CTR(r9) 6: ld r7, VCPU_CTR(r9)
...@@ -1823,7 +1825,8 @@ kvmppc_hisi: ...@@ -1823,7 +1825,8 @@ kvmppc_hisi:
beq 3f beq 3f
clrrdi r0, r10, 28 clrrdi r0, r10, 28
PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */ PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
bne 1f /* if no SLB entry found */ li r0, BOOK3S_INTERRUPT_INST_SEGMENT
bne 7f /* if no SLB entry found */
4: 4:
/* Search the hash table. */ /* Search the hash table. */
mr r3, r9 /* vcpu pointer */ mr r3, r9 /* vcpu pointer */
...@@ -1840,11 +1843,12 @@ kvmppc_hisi: ...@@ -1840,11 +1843,12 @@ kvmppc_hisi:
cmpdi r3, -1 /* handle in kernel mode */ cmpdi r3, -1 /* handle in kernel mode */
beq guest_exit_cont beq guest_exit_cont
/* Synthesize an ISI for the guest */ /* Synthesize an ISI (or ISegI) for the guest */
mr r11, r3 mr r11, r3
1: mtspr SPRN_SRR0, r10 1: li r0, BOOK3S_INTERRUPT_INST_STORAGE
7: mtspr SPRN_SRR0, r10
mtspr SPRN_SRR1, r11 mtspr SPRN_SRR1, r11
li r10, BOOK3S_INTERRUPT_INST_STORAGE mr r10, r0
bl kvmppc_msr_interrupt bl kvmppc_msr_interrupt
b fast_interrupt_c_return b fast_interrupt_c_return
......
...@@ -505,6 +505,7 @@ struct kvm_vcpu_arch { ...@@ -505,6 +505,7 @@ struct kvm_vcpu_arch {
u32 virtual_tsc_mult; u32 virtual_tsc_mult;
u32 virtual_tsc_khz; u32 virtual_tsc_khz;
s64 ia32_tsc_adjust_msr; s64 ia32_tsc_adjust_msr;
u64 tsc_scaling_ratio;
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
unsigned nmi_pending; /* NMI queued after currently running handler */ unsigned nmi_pending; /* NMI queued after currently running handler */
...@@ -777,7 +778,7 @@ struct kvm_x86_ops { ...@@ -777,7 +778,7 @@ struct kvm_x86_ops {
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu); void (*vcpu_put)(struct kvm_vcpu *vcpu);
void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu); void (*update_bp_intercept)(struct kvm_vcpu *vcpu);
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
...@@ -844,7 +845,7 @@ struct kvm_x86_ops { ...@@ -844,7 +845,7 @@ struct kvm_x86_ops {
int (*get_lpage_level)(void); int (*get_lpage_level)(void);
bool (*rdtscp_supported)(void); bool (*rdtscp_supported)(void);
bool (*invpcid_supported)(void); bool (*invpcid_supported)(void);
void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host); void (*adjust_tsc_offset_guest)(struct kvm_vcpu *vcpu, s64 adjustment);
void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
...@@ -852,11 +853,9 @@ struct kvm_x86_ops { ...@@ -852,11 +853,9 @@ struct kvm_x86_ops {
bool (*has_wbinvd_exit)(void); bool (*has_wbinvd_exit)(void);
void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale);
u64 (*read_tsc_offset)(struct kvm_vcpu *vcpu); u64 (*read_tsc_offset)(struct kvm_vcpu *vcpu);
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc);
u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu, u64 host_tsc); u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu, u64 host_tsc);
void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
...@@ -923,17 +922,6 @@ struct kvm_arch_async_pf { ...@@ -923,17 +922,6 @@ struct kvm_arch_async_pf {
extern struct kvm_x86_ops *kvm_x86_ops; extern struct kvm_x86_ops *kvm_x86_ops;
static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
s64 adjustment)
{
kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, false);
}
static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
{
kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, true);
}
int kvm_mmu_module_init(void); int kvm_mmu_module_init(void);
void kvm_mmu_module_exit(void); void kvm_mmu_module_exit(void);
...@@ -986,10 +974,12 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); ...@@ -986,10 +974,12 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
/* control of guest tsc rate supported? */ /* control of guest tsc rate supported? */
extern bool kvm_has_tsc_control; extern bool kvm_has_tsc_control;
/* minimum supported tsc_khz for guests */
extern u32 kvm_min_guest_tsc_khz;
/* maximum supported tsc_khz for guests */ /* maximum supported tsc_khz for guests */
extern u32 kvm_max_guest_tsc_khz; extern u32 kvm_max_guest_tsc_khz;
/* number of bits of the fractional part of the TSC scaling ratio */
extern u8 kvm_tsc_scaling_ratio_frac_bits;
/* maximum allowed value of TSC scaling ratio */
extern u64 kvm_max_tsc_scaling_ratio;
enum emulation_result { enum emulation_result {
EMULATE_DONE, /* no further processing */ EMULATE_DONE, /* no further processing */
...@@ -1235,6 +1225,9 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, ...@@ -1235,6 +1225,9 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
void kvm_define_shared_msr(unsigned index, u32 msr); void kvm_define_shared_msr(unsigned index, u32 msr);
int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
......
...@@ -73,6 +73,7 @@ ...@@ -73,6 +73,7 @@
#define SECONDARY_EXEC_ENABLE_PML 0x00020000 #define SECONDARY_EXEC_ENABLE_PML 0x00020000
#define SECONDARY_EXEC_XSAVES 0x00100000 #define SECONDARY_EXEC_XSAVES 0x00100000
#define SECONDARY_EXEC_PCOMMIT 0x00200000 #define SECONDARY_EXEC_PCOMMIT 0x00200000
#define SECONDARY_EXEC_TSC_SCALING 0x02000000
#define PIN_BASED_EXT_INTR_MASK 0x00000001 #define PIN_BASED_EXT_INTR_MASK 0x00000001
#define PIN_BASED_NMI_EXITING 0x00000008 #define PIN_BASED_NMI_EXITING 0x00000008
...@@ -167,6 +168,8 @@ enum vmcs_field { ...@@ -167,6 +168,8 @@ enum vmcs_field {
VMWRITE_BITMAP = 0x00002028, VMWRITE_BITMAP = 0x00002028,
XSS_EXIT_BITMAP = 0x0000202C, XSS_EXIT_BITMAP = 0x0000202C,
XSS_EXIT_BITMAP_HIGH = 0x0000202D, XSS_EXIT_BITMAP_HIGH = 0x0000202D,
TSC_MULTIPLIER = 0x00002032,
TSC_MULTIPLIER_HIGH = 0x00002033,
GUEST_PHYSICAL_ADDRESS = 0x00002400, GUEST_PHYSICAL_ADDRESS = 0x00002400,
GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
VMCS_LINK_POINTER = 0x00002800, VMCS_LINK_POINTER = 0x00002800,
......
...@@ -100,6 +100,7 @@ ...@@ -100,6 +100,7 @@
{ SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \
{ SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \
{ SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \
{ SVM_EXIT_EXCP_BASE + AC_VECTOR, "AC excp" }, \
{ SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \
{ SVM_EXIT_INTR, "interrupt" }, \ { SVM_EXIT_INTR, "interrupt" }, \
{ SVM_EXIT_NMI, "nmi" }, \ { SVM_EXIT_NMI, "nmi" }, \
......
...@@ -1250,7 +1250,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) ...@@ -1250,7 +1250,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
tsc_deadline = apic->lapic_timer.expired_tscdeadline; tsc_deadline = apic->lapic_timer.expired_tscdeadline;
apic->lapic_timer.expired_tscdeadline = 0; apic->lapic_timer.expired_tscdeadline = 0;
guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc()); guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
/* __delay is delay_tsc whenever the hardware has TSC, thus always. */ /* __delay is delay_tsc whenever the hardware has TSC, thus always. */
...@@ -1318,7 +1318,7 @@ static void start_apic_timer(struct kvm_lapic *apic) ...@@ -1318,7 +1318,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
local_irq_save(flags); local_irq_save(flags);
now = apic->lapic_timer.timer.base->get_time(); now = apic->lapic_timer.timer.base->get_time();
guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc()); guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
if (likely(tscdeadline > guest_tsc)) { if (likely(tscdeadline > guest_tsc)) {
ns = (tscdeadline - guest_tsc) * 1000000ULL; ns = (tscdeadline - guest_tsc) * 1000000ULL;
do_div(ns, this_tsc_khz); do_div(ns, this_tsc_khz);
......
...@@ -3359,7 +3359,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) ...@@ -3359,7 +3359,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
return reserved; return reserved;
} }
int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
{ {
u64 spte; u64 spte;
bool reserved; bool reserved;
...@@ -3368,7 +3368,7 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) ...@@ -3368,7 +3368,7 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
return RET_MMIO_PF_EMULATE; return RET_MMIO_PF_EMULATE;
reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte); reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte);
if (unlikely(reserved)) if (WARN_ON(reserved))
return RET_MMIO_PF_BUG; return RET_MMIO_PF_BUG;
if (is_mmio_spte(spte)) { if (is_mmio_spte(spte)) {
...@@ -3392,17 +3392,7 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) ...@@ -3392,17 +3392,7 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
*/ */
return RET_MMIO_PF_RETRY; return RET_MMIO_PF_RETRY;
} }
EXPORT_SYMBOL_GPL(handle_mmio_page_fault_common); EXPORT_SYMBOL_GPL(handle_mmio_page_fault);
static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr,
u32 error_code, bool direct)
{
int ret;
ret = handle_mmio_page_fault_common(vcpu, addr, direct);
WARN_ON(ret == RET_MMIO_PF_BUG);
return ret;
}
static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
u32 error_code, bool prefault) u32 error_code, bool prefault)
...@@ -3413,7 +3403,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, ...@@ -3413,7 +3403,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
if (unlikely(error_code & PFERR_RSVD_MASK)) { if (unlikely(error_code & PFERR_RSVD_MASK)) {
r = handle_mmio_page_fault(vcpu, gva, error_code, true); r = handle_mmio_page_fault(vcpu, gva, true);
if (likely(r != RET_MMIO_PF_INVALID)) if (likely(r != RET_MMIO_PF_INVALID))
return r; return r;
...@@ -3503,7 +3493,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, ...@@ -3503,7 +3493,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
if (unlikely(error_code & PFERR_RSVD_MASK)) { if (unlikely(error_code & PFERR_RSVD_MASK)) {
r = handle_mmio_page_fault(vcpu, gpa, error_code, true); r = handle_mmio_page_fault(vcpu, gpa, true);
if (likely(r != RET_MMIO_PF_INVALID)) if (likely(r != RET_MMIO_PF_INVALID))
return r; return r;
......
...@@ -56,13 +56,13 @@ void ...@@ -56,13 +56,13 @@ void
reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
/* /*
* Return values of handle_mmio_page_fault_common: * Return values of handle_mmio_page_fault:
* RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
* directly. * directly.
* RET_MMIO_PF_INVALID: invalid spte is detected then let the real page * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page
* fault path update the mmio spte. * fault path update the mmio spte.
* RET_MMIO_PF_RETRY: let CPU fault again on the address. * RET_MMIO_PF_RETRY: let CPU fault again on the address.
* RET_MMIO_PF_BUG: bug is detected. * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed).
*/ */
enum { enum {
RET_MMIO_PF_EMULATE = 1, RET_MMIO_PF_EMULATE = 1,
...@@ -71,7 +71,7 @@ enum { ...@@ -71,7 +71,7 @@ enum {
RET_MMIO_PF_BUG = -1 RET_MMIO_PF_BUG = -1
}; };
int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct);
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly); void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly);
......
...@@ -705,8 +705,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, ...@@ -705,8 +705,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
if (unlikely(error_code & PFERR_RSVD_MASK)) { if (unlikely(error_code & PFERR_RSVD_MASK)) {
r = handle_mmio_page_fault(vcpu, addr, error_code, r = handle_mmio_page_fault(vcpu, addr, mmu_is_nested(vcpu));
mmu_is_nested(vcpu));
if (likely(r != RET_MMIO_PF_INVALID)) if (likely(r != RET_MMIO_PF_INVALID))
return r; return r;
......
...@@ -158,8 +158,6 @@ struct vcpu_svm { ...@@ -158,8 +158,6 @@ struct vcpu_svm {
unsigned long int3_rip; unsigned long int3_rip;
u32 apf_reason; u32 apf_reason;
u64 tsc_ratio;
/* cached guest cpuid flags for faster access */ /* cached guest cpuid flags for faster access */
bool nrips_enabled : 1; bool nrips_enabled : 1;
}; };
...@@ -214,7 +212,6 @@ static int nested_svm_intercept(struct vcpu_svm *svm); ...@@ -214,7 +212,6 @@ static int nested_svm_intercept(struct vcpu_svm *svm);
static int nested_svm_vmexit(struct vcpu_svm *svm); static int nested_svm_vmexit(struct vcpu_svm *svm);
static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
bool has_error_code, u32 error_code); bool has_error_code, u32 error_code);
static u64 __scale_tsc(u64 ratio, u64 tsc);
enum { enum {
VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
...@@ -894,20 +891,9 @@ static __init int svm_hardware_setup(void) ...@@ -894,20 +891,9 @@ static __init int svm_hardware_setup(void)
kvm_enable_efer_bits(EFER_FFXSR); kvm_enable_efer_bits(EFER_FFXSR);
if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
u64 max;
kvm_has_tsc_control = true; kvm_has_tsc_control = true;
kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
/* kvm_tsc_scaling_ratio_frac_bits = 32;
* Make sure the user can only configure tsc_khz values that
* fit into a signed integer.
* A min value is not calculated needed because it will always
* be 1 on all machines and a value of 0 is used to disable
* tsc-scaling for the vcpu.
*/
max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX));
kvm_max_guest_tsc_khz = max;
} }
if (nested) { if (nested) {
...@@ -971,68 +957,6 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) ...@@ -971,68 +957,6 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
seg->base = 0; seg->base = 0;
} }
static u64 __scale_tsc(u64 ratio, u64 tsc)
{
u64 mult, frac, _tsc;
mult = ratio >> 32;
frac = ratio & ((1ULL << 32) - 1);
_tsc = tsc;
_tsc *= mult;
_tsc += (tsc >> 32) * frac;
_tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32;
return _tsc;
}
static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
{
struct vcpu_svm *svm = to_svm(vcpu);
u64 _tsc = tsc;
if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
_tsc = __scale_tsc(svm->tsc_ratio, tsc);
return _tsc;
}
static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
{
struct vcpu_svm *svm = to_svm(vcpu);
u64 ratio;
u64 khz;
/* Guest TSC same frequency as host TSC? */
if (!scale) {
svm->tsc_ratio = TSC_RATIO_DEFAULT;
return;
}
/* TSC scaling supported? */
if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
if (user_tsc_khz > tsc_khz) {
vcpu->arch.tsc_catchup = 1;
vcpu->arch.tsc_always_catchup = 1;
} else
WARN(1, "user requested TSC rate below hardware speed\n");
return;
}
khz = user_tsc_khz;
/* TSC scaling required - calculate ratio */
ratio = khz << 32;
do_div(ratio, tsc_khz);
if (ratio == 0 || ratio & TSC_RATIO_RSVD) {
WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n",
user_tsc_khz);
return;
}
svm->tsc_ratio = ratio;
}
static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu) static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
...@@ -1059,16 +983,10 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) ...@@ -1059,16 +983,10 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
mark_dirty(svm->vmcb, VMCB_INTERCEPTS); mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
} }
static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host) static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
if (host) {
if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
WARN_ON(adjustment < 0);
adjustment = svm_scale_tsc(vcpu, (u64)adjustment);
}
svm->vmcb->control.tsc_offset += adjustment; svm->vmcb->control.tsc_offset += adjustment;
if (is_guest_mode(vcpu)) if (is_guest_mode(vcpu))
svm->nested.hsave->control.tsc_offset += adjustment; svm->nested.hsave->control.tsc_offset += adjustment;
...@@ -1080,15 +998,6 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho ...@@ -1080,15 +998,6 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho
mark_dirty(svm->vmcb, VMCB_INTERCEPTS); mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
} }
static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
{
u64 tsc;
tsc = svm_scale_tsc(vcpu, rdtsc());
return target_tsc - tsc;
}
static void init_vmcb(struct vcpu_svm *svm) static void init_vmcb(struct vcpu_svm *svm)
{ {
struct vmcb_control_area *control = &svm->vmcb->control; struct vmcb_control_area *control = &svm->vmcb->control;
...@@ -1110,6 +1019,8 @@ static void init_vmcb(struct vcpu_svm *svm) ...@@ -1110,6 +1019,8 @@ static void init_vmcb(struct vcpu_svm *svm)
set_exception_intercept(svm, PF_VECTOR); set_exception_intercept(svm, PF_VECTOR);
set_exception_intercept(svm, UD_VECTOR); set_exception_intercept(svm, UD_VECTOR);
set_exception_intercept(svm, MC_VECTOR); set_exception_intercept(svm, MC_VECTOR);
set_exception_intercept(svm, AC_VECTOR);
set_exception_intercept(svm, DB_VECTOR);
set_intercept(svm, INTERCEPT_INTR); set_intercept(svm, INTERCEPT_INTR);
set_intercept(svm, INTERCEPT_NMI); set_intercept(svm, INTERCEPT_NMI);
...@@ -1235,8 +1146,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) ...@@ -1235,8 +1146,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
goto out; goto out;
} }
svm->tsc_ratio = TSC_RATIO_DEFAULT;
err = kvm_vcpu_init(&svm->vcpu, kvm, id); err = kvm_vcpu_init(&svm->vcpu, kvm, id);
if (err) if (err)
goto free_svm; goto free_svm;
...@@ -1322,10 +1231,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) ...@@ -1322,10 +1231,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
if (static_cpu_has(X86_FEATURE_TSCRATEMSR) && if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
svm->tsc_ratio != __this_cpu_read(current_tsc_ratio)) { u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
__this_cpu_write(current_tsc_ratio, svm->tsc_ratio); if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) {
wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio); __this_cpu_write(current_tsc_ratio, tsc_ratio);
wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
}
} }
} }
...@@ -1644,20 +1555,13 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, ...@@ -1644,20 +1555,13 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
mark_dirty(svm->vmcb, VMCB_SEG); mark_dirty(svm->vmcb, VMCB_SEG);
} }
static void update_db_bp_intercept(struct kvm_vcpu *vcpu) static void update_bp_intercept(struct kvm_vcpu *vcpu)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
clr_exception_intercept(svm, DB_VECTOR);
clr_exception_intercept(svm, BP_VECTOR); clr_exception_intercept(svm, BP_VECTOR);
if (svm->nmi_singlestep)
set_exception_intercept(svm, DB_VECTOR);
if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
if (vcpu->guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
set_exception_intercept(svm, DB_VECTOR);
if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
set_exception_intercept(svm, BP_VECTOR); set_exception_intercept(svm, BP_VECTOR);
} else } else
...@@ -1763,7 +1667,6 @@ static int db_interception(struct vcpu_svm *svm) ...@@ -1763,7 +1667,6 @@ static int db_interception(struct vcpu_svm *svm)
if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
svm->vmcb->save.rflags &= svm->vmcb->save.rflags &=
~(X86_EFLAGS_TF | X86_EFLAGS_RF); ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
update_db_bp_intercept(&svm->vcpu);
} }
if (svm->vcpu.guest_debug & if (svm->vcpu.guest_debug &
...@@ -1798,6 +1701,12 @@ static int ud_interception(struct vcpu_svm *svm) ...@@ -1798,6 +1701,12 @@ static int ud_interception(struct vcpu_svm *svm)
return 1; return 1;
} }
static int ac_interception(struct vcpu_svm *svm)
{
kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0);
return 1;
}
static void svm_fpu_activate(struct kvm_vcpu *vcpu) static void svm_fpu_activate(struct kvm_vcpu *vcpu)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
...@@ -3075,8 +2984,7 @@ static int cr8_write_interception(struct vcpu_svm *svm) ...@@ -3075,8 +2984,7 @@ static int cr8_write_interception(struct vcpu_svm *svm)
static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
{ {
struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
return vmcb->control.tsc_offset + return vmcb->control.tsc_offset + host_tsc;
svm_scale_tsc(vcpu, host_tsc);
} }
static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
...@@ -3086,7 +2994,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -3086,7 +2994,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr_info->index) { switch (msr_info->index) {
case MSR_IA32_TSC: { case MSR_IA32_TSC: {
msr_info->data = svm->vmcb->control.tsc_offset + msr_info->data = svm->vmcb->control.tsc_offset +
svm_scale_tsc(vcpu, rdtsc()); kvm_scale_tsc(vcpu, rdtsc());
break; break;
} }
...@@ -3362,6 +3270,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { ...@@ -3362,6 +3270,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
[SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
[SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
[SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception,
[SVM_EXIT_INTR] = intr_interception, [SVM_EXIT_INTR] = intr_interception,
[SVM_EXIT_NMI] = nmi_interception, [SVM_EXIT_NMI] = nmi_interception,
[SVM_EXIT_SMI] = nop_on_interception, [SVM_EXIT_SMI] = nop_on_interception,
...@@ -3745,7 +3654,6 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) ...@@ -3745,7 +3654,6 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
*/ */
svm->nmi_singlestep = true; svm->nmi_singlestep = true;
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
update_db_bp_intercept(vcpu);
} }
static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
...@@ -4371,7 +4279,7 @@ static struct kvm_x86_ops svm_x86_ops = { ...@@ -4371,7 +4279,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.vcpu_load = svm_vcpu_load, .vcpu_load = svm_vcpu_load,
.vcpu_put = svm_vcpu_put, .vcpu_put = svm_vcpu_put,
.update_db_bp_intercept = update_db_bp_intercept, .update_bp_intercept = update_bp_intercept,
.get_msr = svm_get_msr, .get_msr = svm_get_msr,
.set_msr = svm_set_msr, .set_msr = svm_set_msr,
.get_segment_base = svm_get_segment_base, .get_segment_base = svm_get_segment_base,
...@@ -4443,11 +4351,9 @@ static struct kvm_x86_ops svm_x86_ops = { ...@@ -4443,11 +4351,9 @@ static struct kvm_x86_ops svm_x86_ops = {
.has_wbinvd_exit = svm_has_wbinvd_exit, .has_wbinvd_exit = svm_has_wbinvd_exit,
.set_tsc_khz = svm_set_tsc_khz,
.read_tsc_offset = svm_read_tsc_offset, .read_tsc_offset = svm_read_tsc_offset,
.write_tsc_offset = svm_write_tsc_offset, .write_tsc_offset = svm_write_tsc_offset,
.adjust_tsc_offset = svm_adjust_tsc_offset, .adjust_tsc_offset_guest = svm_adjust_tsc_offset_guest,
.compute_tsc_offset = svm_compute_tsc_offset,
.read_l1_tsc = svm_read_l1_tsc, .read_l1_tsc = svm_read_l1_tsc,
.set_tdp_cr3 = set_tdp_cr3, .set_tdp_cr3 = set_tdp_cr3,
......
...@@ -107,6 +107,8 @@ static u64 __read_mostly host_xss; ...@@ -107,6 +107,8 @@ static u64 __read_mostly host_xss;
static bool __read_mostly enable_pml = 1; static bool __read_mostly enable_pml = 1;
module_param_named(pml, enable_pml, bool, S_IRUGO); module_param_named(pml, enable_pml, bool, S_IRUGO);
#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
#define KVM_VM_CR0_ALWAYS_ON \ #define KVM_VM_CR0_ALWAYS_ON \
...@@ -1172,6 +1174,12 @@ static inline bool cpu_has_vmx_pml(void) ...@@ -1172,6 +1174,12 @@ static inline bool cpu_has_vmx_pml(void)
return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML; return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML;
} }
static inline bool cpu_has_vmx_tsc_scaling(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
SECONDARY_EXEC_TSC_SCALING;
}
static inline bool report_flexpriority(void) static inline bool report_flexpriority(void)
{ {
return flexpriority_enabled; return flexpriority_enabled;
...@@ -1631,7 +1639,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) ...@@ -1631,7 +1639,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
u32 eb; u32 eb;
eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
(1u << NM_VECTOR) | (1u << DB_VECTOR); (1u << NM_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR);
if ((vcpu->guest_debug & if ((vcpu->guest_debug &
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
...@@ -2053,6 +2061,12 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) ...@@ -2053,6 +2061,12 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
/* Setup TSC multiplier */
if (cpu_has_vmx_tsc_scaling())
vmcs_write64(TSC_MULTIPLIER,
vcpu->arch.tsc_scaling_ratio);
vmx->loaded_vmcs->cpu = cpu; vmx->loaded_vmcs->cpu = cpu;
} }
...@@ -2357,15 +2371,16 @@ static void setup_msrs(struct vcpu_vmx *vmx) ...@@ -2357,15 +2371,16 @@ static void setup_msrs(struct vcpu_vmx *vmx)
/* /*
* reads and returns guest's timestamp counter "register" * reads and returns guest's timestamp counter "register"
* guest_tsc = host_tsc + tsc_offset -- 21.3 * guest_tsc = (host_tsc * tsc multiplier) >> 48 + tsc_offset
* -- Intel TSC Scaling for Virtualization White Paper, sec 1.3
*/ */
static u64 guest_read_tsc(void) static u64 guest_read_tsc(struct kvm_vcpu *vcpu)
{ {
u64 host_tsc, tsc_offset; u64 host_tsc, tsc_offset;
host_tsc = rdtsc(); host_tsc = rdtsc();
tsc_offset = vmcs_read64(TSC_OFFSET); tsc_offset = vmcs_read64(TSC_OFFSET);
return host_tsc + tsc_offset; return kvm_scale_tsc(vcpu, host_tsc) + tsc_offset;
} }
/* /*
...@@ -2382,22 +2397,6 @@ static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) ...@@ -2382,22 +2397,6 @@ static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
return host_tsc + tsc_offset; return host_tsc + tsc_offset;
} }
/*
* Engage any workarounds for mis-matched TSC rates. Currently limited to
* software catchup for faster rates on slower CPUs.
*/
static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
{
if (!scale)
return;
if (user_tsc_khz > tsc_khz) {
vcpu->arch.tsc_catchup = 1;
vcpu->arch.tsc_always_catchup = 1;
} else
WARN(1, "user requested TSC rate below hardware speed\n");
}
static u64 vmx_read_tsc_offset(struct kvm_vcpu *vcpu) static u64 vmx_read_tsc_offset(struct kvm_vcpu *vcpu)
{ {
return vmcs_read64(TSC_OFFSET); return vmcs_read64(TSC_OFFSET);
...@@ -2429,7 +2428,7 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) ...@@ -2429,7 +2428,7 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
} }
} }
static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host) static void vmx_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment)
{ {
u64 offset = vmcs_read64(TSC_OFFSET); u64 offset = vmcs_read64(TSC_OFFSET);
...@@ -2442,11 +2441,6 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho ...@@ -2442,11 +2441,6 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho
offset + adjustment); offset + adjustment);
} }
static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
{
return target_tsc - rdtsc();
}
static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu)
{ {
struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0); struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0);
...@@ -2778,7 +2772,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -2778,7 +2772,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_EFER: case MSR_EFER:
return kvm_get_msr_common(vcpu, msr_info); return kvm_get_msr_common(vcpu, msr_info);
case MSR_IA32_TSC: case MSR_IA32_TSC:
msr_info->data = guest_read_tsc(); msr_info->data = guest_read_tsc(vcpu);
break; break;
case MSR_IA32_SYSENTER_CS: case MSR_IA32_SYSENTER_CS:
msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
...@@ -3154,7 +3148,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) ...@@ -3154,7 +3148,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
SECONDARY_EXEC_SHADOW_VMCS | SECONDARY_EXEC_SHADOW_VMCS |
SECONDARY_EXEC_XSAVES | SECONDARY_EXEC_XSAVES |
SECONDARY_EXEC_ENABLE_PML | SECONDARY_EXEC_ENABLE_PML |
SECONDARY_EXEC_PCOMMIT; SECONDARY_EXEC_PCOMMIT |
SECONDARY_EXEC_TSC_SCALING;
if (adjust_vmx_controls(min2, opt2, if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2, MSR_IA32_VMX_PROCBASED_CTLS2,
&_cpu_based_2nd_exec_control) < 0) &_cpu_based_2nd_exec_control) < 0)
...@@ -5266,6 +5261,9 @@ static int handle_exception(struct kvm_vcpu *vcpu) ...@@ -5266,6 +5261,9 @@ static int handle_exception(struct kvm_vcpu *vcpu)
return handle_rmode_exception(vcpu, ex_no, error_code); return handle_rmode_exception(vcpu, ex_no, error_code);
switch (ex_no) { switch (ex_no) {
case AC_VECTOR:
kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
return 1;
case DB_VECTOR: case DB_VECTOR:
dr6 = vmcs_readl(EXIT_QUALIFICATION); dr6 = vmcs_readl(EXIT_QUALIFICATION);
if (!(vcpu->guest_debug & if (!(vcpu->guest_debug &
...@@ -5908,7 +5906,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) ...@@ -5908,7 +5906,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
return 1; return 1;
} }
ret = handle_mmio_page_fault_common(vcpu, gpa, true); ret = handle_mmio_page_fault(vcpu, gpa, true);
if (likely(ret == RET_MMIO_PF_EMULATE)) if (likely(ret == RET_MMIO_PF_EMULATE))
return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) == return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) ==
EMULATE_DONE; EMULATE_DONE;
...@@ -6199,6 +6197,12 @@ static __init int hardware_setup(void) ...@@ -6199,6 +6197,12 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_apicv()) if (!cpu_has_vmx_apicv())
enable_apicv = 0; enable_apicv = 0;
if (cpu_has_vmx_tsc_scaling()) {
kvm_has_tsc_control = true;
kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX;
kvm_tsc_scaling_ratio_frac_bits = 48;
}
if (enable_apicv) if (enable_apicv)
kvm_x86_ops->update_cr8_intercept = NULL; kvm_x86_ops->update_cr8_intercept = NULL;
else { else {
...@@ -8008,6 +8012,9 @@ static void dump_vmcs(void) ...@@ -8008,6 +8012,9 @@ static void dump_vmcs(void)
vmcs_read32(IDT_VECTORING_INFO_FIELD), vmcs_read32(IDT_VECTORING_INFO_FIELD),
vmcs_read32(IDT_VECTORING_ERROR_CODE)); vmcs_read32(IDT_VECTORING_ERROR_CODE));
pr_err("TSC Offset = 0x%016lx\n", vmcs_readl(TSC_OFFSET)); pr_err("TSC Offset = 0x%016lx\n", vmcs_readl(TSC_OFFSET));
if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
pr_err("TSC Multiplier = 0x%016lx\n",
vmcs_readl(TSC_MULTIPLIER));
if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW)
pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
...@@ -10752,7 +10759,7 @@ static struct kvm_x86_ops vmx_x86_ops = { ...@@ -10752,7 +10759,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.vcpu_load = vmx_vcpu_load, .vcpu_load = vmx_vcpu_load,
.vcpu_put = vmx_vcpu_put, .vcpu_put = vmx_vcpu_put,
.update_db_bp_intercept = update_exception_bitmap, .update_bp_intercept = update_exception_bitmap,
.get_msr = vmx_get_msr, .get_msr = vmx_get_msr,
.set_msr = vmx_set_msr, .set_msr = vmx_set_msr,
.get_segment_base = vmx_get_segment_base, .get_segment_base = vmx_get_segment_base,
...@@ -10826,11 +10833,9 @@ static struct kvm_x86_ops vmx_x86_ops = { ...@@ -10826,11 +10833,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
.set_tsc_khz = vmx_set_tsc_khz,
.read_tsc_offset = vmx_read_tsc_offset, .read_tsc_offset = vmx_read_tsc_offset,
.write_tsc_offset = vmx_write_tsc_offset, .write_tsc_offset = vmx_write_tsc_offset,
.adjust_tsc_offset = vmx_adjust_tsc_offset, .adjust_tsc_offset_guest = vmx_adjust_tsc_offset_guest,
.compute_tsc_offset = vmx_compute_tsc_offset,
.read_l1_tsc = vmx_read_l1_tsc, .read_l1_tsc = vmx_read_l1_tsc,
.set_tdp_cr3 = vmx_set_cr3, .set_tdp_cr3 = vmx_set_cr3,
......
...@@ -93,10 +93,10 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu); ...@@ -93,10 +93,10 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu);
static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
struct kvm_x86_ops *kvm_x86_ops; struct kvm_x86_ops *kvm_x86_ops __read_mostly;
EXPORT_SYMBOL_GPL(kvm_x86_ops); EXPORT_SYMBOL_GPL(kvm_x86_ops);
static bool ignore_msrs = 0; static bool __read_mostly ignore_msrs = 0;
module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
unsigned int min_timer_period_us = 500; unsigned int min_timer_period_us = 500;
...@@ -105,20 +105,25 @@ module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); ...@@ -105,20 +105,25 @@ module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
static bool __read_mostly kvmclock_periodic_sync = true; static bool __read_mostly kvmclock_periodic_sync = true;
module_param(kvmclock_periodic_sync, bool, S_IRUGO); module_param(kvmclock_periodic_sync, bool, S_IRUGO);
bool kvm_has_tsc_control; bool __read_mostly kvm_has_tsc_control;
EXPORT_SYMBOL_GPL(kvm_has_tsc_control); EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
u32 kvm_max_guest_tsc_khz; u32 __read_mostly kvm_max_guest_tsc_khz;
EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits;
EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
u64 __read_mostly kvm_max_tsc_scaling_ratio;
EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
static u64 __read_mostly kvm_default_tsc_scaling_ratio;
/* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */ /* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */
static u32 tsc_tolerance_ppm = 250; static u32 __read_mostly tsc_tolerance_ppm = 250;
module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
/* lapic timer advance (tscdeadline mode only) in nanoseconds */ /* lapic timer advance (tscdeadline mode only) in nanoseconds */
unsigned int lapic_timer_advance_ns = 0; unsigned int __read_mostly lapic_timer_advance_ns = 0;
module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
static bool backwards_tsc_observed = false; static bool __read_mostly backwards_tsc_observed = false;
#define KVM_NR_SHARED_MSRS 16 #define KVM_NR_SHARED_MSRS 16
...@@ -1249,14 +1254,53 @@ static u32 adjust_tsc_khz(u32 khz, s32 ppm) ...@@ -1249,14 +1254,53 @@ static u32 adjust_tsc_khz(u32 khz, s32 ppm)
return v; return v;
} }
static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
{
u64 ratio;
/* Guest TSC same frequency as host TSC? */
if (!scale) {
vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
return 0;
}
/* TSC scaling supported? */
if (!kvm_has_tsc_control) {
if (user_tsc_khz > tsc_khz) {
vcpu->arch.tsc_catchup = 1;
vcpu->arch.tsc_always_catchup = 1;
return 0;
} else {
WARN(1, "user requested TSC rate below hardware speed\n");
return -1;
}
}
/* TSC scaling required - calculate ratio */
ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits,
user_tsc_khz, tsc_khz);
if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
user_tsc_khz);
return -1;
}
vcpu->arch.tsc_scaling_ratio = ratio;
return 0;
}
static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
{ {
u32 thresh_lo, thresh_hi; u32 thresh_lo, thresh_hi;
int use_scaling = 0; int use_scaling = 0;
/* tsc_khz can be zero if TSC calibration fails */ /* tsc_khz can be zero if TSC calibration fails */
if (this_tsc_khz == 0) if (this_tsc_khz == 0) {
return; /* set tsc_scaling_ratio to a safe value */
vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
return -1;
}
/* Compute a scale to convert nanoseconds in TSC cycles */ /* Compute a scale to convert nanoseconds in TSC cycles */
kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
...@@ -1276,7 +1320,7 @@ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) ...@@ -1276,7 +1320,7 @@ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi); pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi);
use_scaling = 1; use_scaling = 1;
} }
kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling); return set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
} }
static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
...@@ -1322,6 +1366,48 @@ static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset) ...@@ -1322,6 +1366,48 @@ static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset; vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
} }
/*
* Multiply tsc by a fixed point number represented by ratio.
*
* The most significant 64-N bits (mult) of ratio represent the
* integral part of the fixed point number; the remaining N bits
* (frac) represent the fractional part, ie. ratio represents a fixed
* point number (mult + frac * 2^(-N)).
*
* N equals to kvm_tsc_scaling_ratio_frac_bits.
*/
static inline u64 __scale_tsc(u64 ratio, u64 tsc)
{
return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits);
}
u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
{
u64 _tsc = tsc;
u64 ratio = vcpu->arch.tsc_scaling_ratio;
if (ratio != kvm_default_tsc_scaling_ratio)
_tsc = __scale_tsc(ratio, tsc);
return _tsc;
}
EXPORT_SYMBOL_GPL(kvm_scale_tsc);
static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
{
u64 tsc;
tsc = kvm_scale_tsc(vcpu, rdtsc());
return target_tsc - tsc;
}
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
{
return kvm_x86_ops->read_l1_tsc(vcpu, kvm_scale_tsc(vcpu, host_tsc));
}
EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
{ {
struct kvm *kvm = vcpu->kvm; struct kvm *kvm = vcpu->kvm;
...@@ -1333,7 +1419,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) ...@@ -1333,7 +1419,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
u64 data = msr->data; u64 data = msr->data;
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); offset = kvm_compute_tsc_offset(vcpu, data);
ns = get_kernel_ns(); ns = get_kernel_ns();
elapsed = ns - kvm->arch.last_tsc_nsec; elapsed = ns - kvm->arch.last_tsc_nsec;
...@@ -1390,7 +1476,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) ...@@ -1390,7 +1476,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
} else { } else {
u64 delta = nsec_to_cycles(vcpu, elapsed); u64 delta = nsec_to_cycles(vcpu, elapsed);
data += delta; data += delta;
offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); offset = kvm_compute_tsc_offset(vcpu, data);
pr_debug("kvm: adjusted tsc offset by %llu\n", delta); pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
} }
matched = true; matched = true;
...@@ -1447,6 +1533,20 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) ...@@ -1447,6 +1533,20 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
EXPORT_SYMBOL_GPL(kvm_write_tsc); EXPORT_SYMBOL_GPL(kvm_write_tsc);
static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
s64 adjustment)
{
kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment);
}
static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
{
if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
WARN_ON(adjustment < 0);
adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment);
}
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
static cycle_t read_tsc(void) static cycle_t read_tsc(void)
...@@ -1608,7 +1708,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) ...@@ -1608,7 +1708,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
static int kvm_guest_time_update(struct kvm_vcpu *v) static int kvm_guest_time_update(struct kvm_vcpu *v)
{ {
unsigned long flags, this_tsc_khz; unsigned long flags, this_tsc_khz, tgt_tsc_khz;
struct kvm_vcpu_arch *vcpu = &v->arch; struct kvm_vcpu_arch *vcpu = &v->arch;
struct kvm_arch *ka = &v->kvm->arch; struct kvm_arch *ka = &v->kvm->arch;
s64 kernel_ns; s64 kernel_ns;
...@@ -1645,7 +1745,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) ...@@ -1645,7 +1745,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
kernel_ns = get_kernel_ns(); kernel_ns = get_kernel_ns();
} }
tsc_timestamp = kvm_x86_ops->read_l1_tsc(v, host_tsc); tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
/* /*
* We may have to catch up the TSC to match elapsed wall clock * We may have to catch up the TSC to match elapsed wall clock
...@@ -1671,7 +1771,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) ...@@ -1671,7 +1771,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
return 0; return 0;
if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, tgt_tsc_khz = kvm_has_tsc_control ?
vcpu->virtual_tsc_khz : this_tsc_khz;
kvm_get_time_scale(NSEC_PER_SEC / 1000, tgt_tsc_khz,
&vcpu->hv_clock.tsc_shift, &vcpu->hv_clock.tsc_shift,
&vcpu->hv_clock.tsc_to_system_mul); &vcpu->hv_clock.tsc_to_system_mul);
vcpu->hw_tsc_khz = this_tsc_khz; vcpu->hw_tsc_khz = this_tsc_khz;
...@@ -2617,7 +2719,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) ...@@ -2617,7 +2719,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (tsc_delta < 0) if (tsc_delta < 0)
mark_tsc_unstable("KVM discovered backwards TSC"); mark_tsc_unstable("KVM discovered backwards TSC");
if (check_tsc_unstable()) { if (check_tsc_unstable()) {
u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu, u64 offset = kvm_compute_tsc_offset(vcpu,
vcpu->arch.last_guest_tsc); vcpu->arch.last_guest_tsc);
kvm_x86_ops->write_tsc_offset(vcpu, offset); kvm_x86_ops->write_tsc_offset(vcpu, offset);
vcpu->arch.tsc_catchup = 1; vcpu->arch.tsc_catchup = 1;
...@@ -3319,9 +3421,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, ...@@ -3319,9 +3421,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (user_tsc_khz == 0) if (user_tsc_khz == 0)
user_tsc_khz = tsc_khz; user_tsc_khz = tsc_khz;
kvm_set_tsc_khz(vcpu, user_tsc_khz); if (!kvm_set_tsc_khz(vcpu, user_tsc_khz))
r = 0; r = 0;
goto out; goto out;
} }
case KVM_GET_TSC_KHZ: { case KVM_GET_TSC_KHZ: {
...@@ -6452,8 +6554,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) ...@@ -6452,8 +6554,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (hw_breakpoint_active()) if (hw_breakpoint_active())
hw_breakpoint_restore(); hw_breakpoint_restore();
vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
rdtsc());
vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb(); smp_wmb();
...@@ -7015,7 +7116,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, ...@@ -7015,7 +7116,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
*/ */
kvm_set_rflags(vcpu, rflags); kvm_set_rflags(vcpu, rflags);
kvm_x86_ops->update_db_bp_intercept(vcpu); kvm_x86_ops->update_bp_intercept(vcpu);
r = 0; r = 0;
...@@ -7364,6 +7465,20 @@ int kvm_arch_hardware_setup(void) ...@@ -7364,6 +7465,20 @@ int kvm_arch_hardware_setup(void)
if (r != 0) if (r != 0)
return r; return r;
if (kvm_has_tsc_control) {
/*
* Make sure the user can only configure tsc_khz values that
* fit into a signed integer.
* A min value is not calculated needed because it will always
* be 1 on all machines.
*/
u64 max = min(0x7fffffffULL,
__scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz));
kvm_max_guest_tsc_khz = max;
kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
}
kvm_init_msr_list(); kvm_init_msr_list();
return 0; return 0;
} }
......
...@@ -10,6 +10,10 @@ ...@@ -10,6 +10,10 @@
#ifdef CONFIG_CONTEXT_TRACKING #ifdef CONFIG_CONTEXT_TRACKING
extern void context_tracking_cpu_set(int cpu); extern void context_tracking_cpu_set(int cpu);
/* Called with interrupts disabled. */
extern void __context_tracking_enter(enum ctx_state state);
extern void __context_tracking_exit(enum ctx_state state);
extern void context_tracking_enter(enum ctx_state state); extern void context_tracking_enter(enum ctx_state state);
extern void context_tracking_exit(enum ctx_state state); extern void context_tracking_exit(enum ctx_state state);
extern void context_tracking_user_enter(void); extern void context_tracking_user_enter(void);
...@@ -18,13 +22,13 @@ extern void context_tracking_user_exit(void); ...@@ -18,13 +22,13 @@ extern void context_tracking_user_exit(void);
static inline void user_enter(void) static inline void user_enter(void)
{ {
if (context_tracking_is_enabled()) if (context_tracking_is_enabled())
context_tracking_user_enter(); context_tracking_enter(CONTEXT_USER);
} }
static inline void user_exit(void) static inline void user_exit(void)
{ {
if (context_tracking_is_enabled()) if (context_tracking_is_enabled())
context_tracking_user_exit(); context_tracking_exit(CONTEXT_USER);
} }
static inline enum ctx_state exception_enter(void) static inline enum ctx_state exception_enter(void)
...@@ -88,13 +92,13 @@ static inline void guest_enter(void) ...@@ -88,13 +92,13 @@ static inline void guest_enter(void)
current->flags |= PF_VCPU; current->flags |= PF_VCPU;
if (context_tracking_is_enabled()) if (context_tracking_is_enabled())
context_tracking_enter(CONTEXT_GUEST); __context_tracking_enter(CONTEXT_GUEST);
} }
static inline void guest_exit(void) static inline void guest_exit(void)
{ {
if (context_tracking_is_enabled()) if (context_tracking_is_enabled())
context_tracking_exit(CONTEXT_GUEST); __context_tracking_exit(CONTEXT_GUEST);
if (vtime_accounting_enabled()) if (vtime_accounting_enabled())
vtime_guest_exit(current); vtime_guest_exit(current);
......
...@@ -1183,4 +1183,5 @@ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); ...@@ -1183,4 +1183,5 @@ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set); uint32_t guest_irq, bool set);
#endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
#endif #endif
...@@ -142,6 +142,13 @@ static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) ...@@ -142,6 +142,13 @@ static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift)
} }
#endif /* mul_u64_u32_shr */ #endif /* mul_u64_u32_shr */
#ifndef mul_u64_u64_shr
static inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift)
{
return (u64)(((unsigned __int128)a * mul) >> shift);
}
#endif /* mul_u64_u64_shr */
#else #else
#ifndef mul_u64_u32_shr #ifndef mul_u64_u32_shr
...@@ -161,6 +168,79 @@ static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) ...@@ -161,6 +168,79 @@ static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift)
} }
#endif /* mul_u64_u32_shr */ #endif /* mul_u64_u32_shr */
#ifndef mul_u64_u64_shr
static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift)
{
union {
u64 ll;
struct {
#ifdef __BIG_ENDIAN
u32 high, low;
#else
u32 low, high;
#endif #endif
} l;
} rl, rm, rn, rh, a0, b0;
u64 c;
a0.ll = a;
b0.ll = b;
rl.ll = (u64)a0.l.low * b0.l.low;
rm.ll = (u64)a0.l.low * b0.l.high;
rn.ll = (u64)a0.l.high * b0.l.low;
rh.ll = (u64)a0.l.high * b0.l.high;
/*
* Each of these lines computes a 64-bit intermediate result into "c",
* starting at bits 32-95. The low 32-bits go into the result of the
* multiplication, the high 32-bits are carried into the next step.
*/
rl.l.high = c = (u64)rl.l.high + rm.l.low + rn.l.low;
rh.l.low = c = (c >> 32) + rm.l.high + rn.l.high + rh.l.low;
rh.l.high = (c >> 32) + rh.l.high;
/*
* The 128-bit result of the multiplication is in rl.ll and rh.ll,
* shift it right and throw away the high part of the result.
*/
if (shift == 0)
return rl.ll;
if (shift < 64)
return (rl.ll >> shift) | (rh.ll << (64 - shift));
return rh.ll >> (shift & 63);
}
#endif /* mul_u64_u64_shr */
#endif
#ifndef mul_u64_u32_div
static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor)
{
union {
u64 ll;
struct {
#ifdef __BIG_ENDIAN
u32 high, low;
#else
u32 low, high;
#endif
} l;
} u, rl, rh;
u.ll = a;
rl.ll = (u64)u.l.low * mul;
rh.ll = (u64)u.l.high * mul + rl.l.high;
/* Bits 32-63 of the result will be in rh.l.low. */
rl.l.high = do_div(rh.ll, divisor);
/* Bits 0-31 of the result will be in rl.l.low. */
do_div(rl.ll, divisor);
rl.l.high = rh.l.low;
return rl.ll;
}
#endif /* mul_u64_u32_div */
#endif /* _LINUX_MATH64_H */ #endif /* _LINUX_MATH64_H */
...@@ -58,36 +58,13 @@ static void context_tracking_recursion_exit(void) ...@@ -58,36 +58,13 @@ static void context_tracking_recursion_exit(void)
* instructions to execute won't use any RCU read side critical section * instructions to execute won't use any RCU read side critical section
* because this function sets RCU in extended quiescent state. * because this function sets RCU in extended quiescent state.
*/ */
void context_tracking_enter(enum ctx_state state) void __context_tracking_enter(enum ctx_state state)
{ {
unsigned long flags;
/*
* Repeat the user_enter() check here because some archs may be calling
* this from asm and if no CPU needs context tracking, they shouldn't
* go further. Repeat the check here until they support the inline static
* key check.
*/
if (!context_tracking_is_enabled())
return;
/*
* Some contexts may involve an exception occuring in an irq,
* leading to that nesting:
* rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
* This would mess up the dyntick_nesting count though. And rcu_irq_*()
* helpers are enough to protect RCU uses inside the exception. So
* just return immediately if we detect we are in an IRQ.
*/
if (in_interrupt())
return;
/* Kernel threads aren't supposed to go to userspace */ /* Kernel threads aren't supposed to go to userspace */
WARN_ON_ONCE(!current->mm); WARN_ON_ONCE(!current->mm);
local_irq_save(flags);
if (!context_tracking_recursion_enter()) if (!context_tracking_recursion_enter())
goto out_irq_restore; return;
if ( __this_cpu_read(context_tracking.state) != state) { if ( __this_cpu_read(context_tracking.state) != state) {
if (__this_cpu_read(context_tracking.active)) { if (__this_cpu_read(context_tracking.active)) {
...@@ -120,7 +97,27 @@ void context_tracking_enter(enum ctx_state state) ...@@ -120,7 +97,27 @@ void context_tracking_enter(enum ctx_state state)
__this_cpu_write(context_tracking.state, state); __this_cpu_write(context_tracking.state, state);
} }
context_tracking_recursion_exit(); context_tracking_recursion_exit();
out_irq_restore: }
NOKPROBE_SYMBOL(__context_tracking_enter);
EXPORT_SYMBOL_GPL(__context_tracking_enter);
void context_tracking_enter(enum ctx_state state)
{
unsigned long flags;
/*
* Some contexts may involve an exception occuring in an irq,
* leading to that nesting:
* rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
* This would mess up the dyntick_nesting count though. And rcu_irq_*()
* helpers are enough to protect RCU uses inside the exception. So
* just return immediately if we detect we are in an IRQ.
*/
if (in_interrupt())
return;
local_irq_save(flags);
__context_tracking_enter(state);
local_irq_restore(flags); local_irq_restore(flags);
} }
NOKPROBE_SYMBOL(context_tracking_enter); NOKPROBE_SYMBOL(context_tracking_enter);
...@@ -128,7 +125,7 @@ EXPORT_SYMBOL_GPL(context_tracking_enter); ...@@ -128,7 +125,7 @@ EXPORT_SYMBOL_GPL(context_tracking_enter);
void context_tracking_user_enter(void) void context_tracking_user_enter(void)
{ {
context_tracking_enter(CONTEXT_USER); user_enter();
} }
NOKPROBE_SYMBOL(context_tracking_user_enter); NOKPROBE_SYMBOL(context_tracking_user_enter);
...@@ -144,19 +141,10 @@ NOKPROBE_SYMBOL(context_tracking_user_enter); ...@@ -144,19 +141,10 @@ NOKPROBE_SYMBOL(context_tracking_user_enter);
* This call supports re-entrancy. This way it can be called from any exception * This call supports re-entrancy. This way it can be called from any exception
* handler without needing to know if we came from userspace or not. * handler without needing to know if we came from userspace or not.
*/ */
void context_tracking_exit(enum ctx_state state) void __context_tracking_exit(enum ctx_state state)
{ {
unsigned long flags;
if (!context_tracking_is_enabled())
return;
if (in_interrupt())
return;
local_irq_save(flags);
if (!context_tracking_recursion_enter()) if (!context_tracking_recursion_enter())
goto out_irq_restore; return;
if (__this_cpu_read(context_tracking.state) == state) { if (__this_cpu_read(context_tracking.state) == state) {
if (__this_cpu_read(context_tracking.active)) { if (__this_cpu_read(context_tracking.active)) {
...@@ -173,7 +161,19 @@ void context_tracking_exit(enum ctx_state state) ...@@ -173,7 +161,19 @@ void context_tracking_exit(enum ctx_state state)
__this_cpu_write(context_tracking.state, CONTEXT_KERNEL); __this_cpu_write(context_tracking.state, CONTEXT_KERNEL);
} }
context_tracking_recursion_exit(); context_tracking_recursion_exit();
out_irq_restore: }
NOKPROBE_SYMBOL(__context_tracking_exit);
EXPORT_SYMBOL_GPL(__context_tracking_exit);
void context_tracking_exit(enum ctx_state state)
{
unsigned long flags;
if (in_interrupt())
return;
local_irq_save(flags);
__context_tracking_exit(state);
local_irq_restore(flags); local_irq_restore(flags);
} }
NOKPROBE_SYMBOL(context_tracking_exit); NOKPROBE_SYMBOL(context_tracking_exit);
...@@ -181,7 +181,7 @@ EXPORT_SYMBOL_GPL(context_tracking_exit); ...@@ -181,7 +181,7 @@ EXPORT_SYMBOL_GPL(context_tracking_exit);
void context_tracking_user_exit(void) void context_tracking_user_exit(void)
{ {
context_tracking_exit(CONTEXT_USER); user_exit();
} }
NOKPROBE_SYMBOL(context_tracking_user_exit); NOKPROBE_SYMBOL(context_tracking_user_exit);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册