From a40939dfe67178fee98d40292f014f39922f692f Mon Sep 17 00:00:00 2001
From: chenjiajun <chenjiajun8@huawei.com>
Date: Wed, 23 Dec 2020 09:37:16 +0800
Subject: [PATCH] kvm: debugfs: Export x86 kvm exits to vcpu_stat

virt inclusion
category: feature
bugzilla: 46853
CVE: NA

Export vcpu_stat via debugfs for x86, which contains x86 kvm exits items.
The path of the vcpu_stat is /sys/kernel/debug/kvm/vcpu_stat, and
each line of vcpu_stat is a collection of various kvm exits for a vcpu.
And through vcpu_stat, we only need to open one file to
tail performance of virtual machine, which is more convenient.

Signed-off-by: Feng Lin <linfeng23@huawei.com>
Signed-off-by: chenjiajun <chenjiajun8@huawei.com>
Reviewed-by: Xiangyou Xie <xiexiangyou@huawei.com>
Acked-by: Xie XiuQi <xiexiuqi@huawei.com>
Signed-off-by: Chen Jun <chenjun102@huawei.com>
---
 arch/x86/include/asm/kvm_host.h | 12 ++++++
 arch/x86/kvm/vmx/vmx.c          | 10 +++++
 arch/x86/kvm/x86.c              | 65 +++++++++++++++++++++++++++++++++
 virt/kvm/kvm_main.c             | 51 ++++++++++++++++++++++----
 4 files changed, 130 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index adbe88e4be12..730ffde044b4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1058,6 +1058,18 @@ struct kvm_vcpu_stat {
 	u64 req_event;
 	u64 halt_poll_success_ns;
 	u64 halt_poll_fail_ns;
+	u64 cr_exits;
+	u64 msr_rd_exits;
+	u64 msr_wr_exits;
+	u64 apic_wr_exits;
+	u64 ept_vio_exits;
+	u64 ept_mis_exits;
+	u64 pause_exits;
+	u64 steal;
+	u64 st_max;
+	u64 utime;
+	u64 stime;
+	u64 gtime;
 };
 
 struct x86_instruction_info;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c01aac2bac37..fb636f6cc55d 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -364,6 +364,11 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var);
 static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu,
 							  u32 msr, int type);
 
+void kvm_arch_vcpu_stat_reset(struct kvm_vcpu_stat *vcpu_stat)
+{
+	vcpu_stat->st_max = 0;
+}
+
 void vmx_vmexit(void);
 
 #define vmx_insn_failed(fmt...)		\
@@ -4996,6 +5001,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
 	exit_qualification = vmx_get_exit_qual(vcpu);
 	cr = exit_qualification & 15;
 	reg = (exit_qualification >> 8) & 15;
+	vcpu->stat.cr_exits++;
 	switch ((exit_qualification >> 4) & 3) {
 	case 0: /* mov to cr */
 		val = kvm_register_readl(vcpu, reg);
@@ -5240,6 +5246,7 @@ static int handle_apic_write(struct kvm_vcpu *vcpu)
 	u32 offset = exit_qualification & 0xfff;
 
 	/* APIC-write VM exit is trap-like and thus no need to adjust IP */
+	vcpu->stat.apic_wr_exits++;
 	kvm_apic_write_nodecode(vcpu, offset);
 	return 1;
 }
@@ -5308,6 +5315,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 	u64 error_code;
 
 	exit_qualification = vmx_get_exit_qual(vcpu);
+	vcpu->stat.ept_vio_exits++;
 
 	/*
 	 * EPT violation happened while executing iret from NMI,
@@ -5366,6 +5374,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 	 * nGPA here instead of the required GPA.
 	 */
 	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
+	vcpu->stat.ept_mis_exits++;
 	if (!is_guest_mode(vcpu) &&
 	    !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
 		trace_kvm_fast_mmio(gpa);
@@ -5480,6 +5489,7 @@ static void vmx_enable_tdp(void)
  */
 static int handle_pause(struct kvm_vcpu *vcpu)
 {
+	vcpu->stat.pause_exits++;
 	if (!kvm_pause_in_guest(vcpu->kvm))
 		grow_ple_window(vcpu);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1943bb8c5403..06ecae7718bf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -242,11 +242,48 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	VM_STAT("largepages", lpages, .mode = 0444),
 	VM_STAT("nx_largepages_splitted", nx_lpage_splits, .mode = 0444),
 	VM_STAT("max_mmu_page_hash_collisions", max_mmu_page_hash_collisions),
+	{ "vcpu_stat", 0, KVM_STAT_DFX },
 	{ NULL }
 };
 
 /* debugfs entries of Detail For vcpu stat EXtension */
 struct dfx_kvm_stats_debugfs_item dfx_debugfs_entries[] = {
+	DFX_STAT("pid", pid),
+	DFX_STAT("pf_fixed", pf_fixed),
+	DFX_STAT("pf_guest", pf_guest),
+	DFX_STAT("tlb_flush", tlb_flush),
+	DFX_STAT("invlpg", invlpg),
+	DFX_STAT("exits", exits),
+	DFX_STAT("io_exits", io_exits),
+	DFX_STAT("mmio_exits", mmio_exits),
+	DFX_STAT("signal_exits", signal_exits),
+	DFX_STAT("irq_window", irq_window_exits),
+	DFX_STAT("nmi_window", nmi_window_exits),
+	DFX_STAT("halt_exits", halt_exits),
+	DFX_STAT("halt_successful_poll", halt_successful_poll),
+	DFX_STAT("halt_attempted_poll", halt_attempted_poll),
+	DFX_STAT("halt_wakeup", halt_wakeup),
+	DFX_STAT("request_irq", request_irq_exits),
+	DFX_STAT("irq_exits", irq_exits),
+	DFX_STAT("host_state_reload", host_state_reload),
+	DFX_STAT("fpu_reload", fpu_reload),
+	DFX_STAT("insn_emulation", insn_emulation),
+	DFX_STAT("insn_emulation_fail", insn_emulation_fail),
+	DFX_STAT("hypercalls", hypercalls),
+	DFX_STAT("irq_injections", irq_injections),
+	DFX_STAT("nmi_injections", nmi_injections),
+	DFX_STAT("cr_exits", cr_exits),
+	DFX_STAT("msr_rd_exits", msr_rd_exits),
+	DFX_STAT("msr_wr_exits", msr_wr_exits),
+	DFX_STAT("apic_wr_exits", apic_wr_exits),
+	DFX_STAT("ept_vio_exits", ept_vio_exits),
+	DFX_STAT("ept_mis_exits", ept_mis_exits),
+	DFX_STAT("pause_exits", pause_exits),
+	DFX_STAT("steal", steal),
+	DFX_STAT("st_max", st_max),
+	DFX_STAT("utime", utime),
+	DFX_STAT("stime", stime),
+	DFX_STAT("gtime", gtime),
 	{ NULL }
 };
 
@@ -1718,6 +1755,7 @@ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
 	u64 data;
 	int r;
 
+	vcpu->stat.msr_rd_exits++;
 	r = kvm_get_msr(vcpu, ecx, &data);
 
 	/* MSR read failed? See if we should ask user space */
@@ -1747,6 +1785,7 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
 	u64 data = kvm_read_edx_eax(vcpu);
 	int r;
 
+	vcpu->stat.msr_wr_exits++;
 	r = kvm_set_msr(vcpu, ecx, data);
 
 	/* MSR write failed? See if we should ask user space */
@@ -2953,11 +2992,33 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
 	kvm_x86_ops.tlb_flush_guest(vcpu);
 }
 
+static u64 accumulate_stat_steal_time(u64 *last_steal)
+{
+	u64 delta;
+
+	if (*last_steal == 0)
+		delta = 0;
+	else
+		delta = current->sched_info.run_delay - *last_steal;
+
+	*last_steal = current->sched_info.run_delay;
+	return delta;
+}
+
+static void update_stat_steal_time(struct kvm_vcpu *vcpu)
+{
+	u64 delta;
+
+	delta = accumulate_stat_steal_time(&vcpu->stat.steal);
+	vcpu->stat.st_max = max(vcpu->stat.st_max, delta);
+}
+
 static void record_steal_time(struct kvm_vcpu *vcpu)
 {
 	struct kvm_host_map map;
 	struct kvm_steal_time *st;
 
+	update_stat_steal_time(vcpu);
 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
 		return;
 
@@ -9030,6 +9091,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		kvm_lapic_sync_from_vapic(vcpu);
 
 	r = kvm_x86_ops.handle_exit(vcpu, exit_fastpath);
+	vcpu->stat.utime = current->utime;
+	vcpu->stat.stime = current->stime;
+	vcpu->stat.gtime = current->gtime;
+
 	return r;
 
 cancel_injection:
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8581f3a14da9..2d55e1e0fb66 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4623,6 +4623,35 @@ void __attribute__((weak)) kvm_arch_vcpu_stat_reset(struct kvm_vcpu_stat *vcpu_s
 #define DFX_MAX_VCPU		1024
 #define DFX_MAX_VCPU_STAT_SIZE	1024
 
+/*
+ * copy of seq_buf_alloc of kernel, kernel not export it
+ */
+static void *dfx_seq_buf_alloc(unsigned long size)
+{
+	return kvmalloc(size, GFP_KERNEL_ACCOUNT);
+}
+
+static void dfx_seq_buf_free(const void *buf)
+{
+	kvfree(buf);
+}
+
+static int dfx_seq_buf_alloc_vcpu(struct seq_file *p, int vcpu_nr)
+{
+	char *buf;
+	size_t size;
+
+	size = (vcpu_nr + 1) * DFX_MAX_VCPU_STAT_SIZE;
+	buf = dfx_seq_buf_alloc(size);
+	if (!buf)
+		return -ENOMEM;
+	if (p->buf)
+		dfx_seq_buf_free(p->buf);
+	p->buf = buf;
+	p->size = size;
+	return 0;
+}
+
 static int __dfx_vcpu_stats_get(struct seq_file *p, void *v)
 {
 	struct kvm *kvm;
@@ -4634,27 +4663,35 @@ static int __dfx_vcpu_stats_get(struct seq_file *p, void *v)
 
 	mutex_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list)
-		kvm_for_each_vcpu(i, vcpu, kvm)
+		kvm_for_each_vcpu(i, vcpu, kvm) {
 			vcpu_nr++;
+		}
 	mutex_unlock(&kvm_lock);
-
 	vcpu_nr = min(vcpu_nr, DFX_MAX_VCPU);
+	if (!vcpu_nr) {
+		seq_putc(p, '\n');
+		return 0;
+	}
+
+	if (dfx_seq_buf_alloc_vcpu(p, vcpu_nr))
+		return -ENOMEM;
+
 	vcpu_stats = vmalloc(vcpu_nr * sizeof(struct kvm_vcpu_stat));
 	if (!vcpu_stats)
 		return -ENOMEM;
 
 	mutex_lock(&kvm_lock);
-	list_for_each_entry(kvm, &vm_list, vm_list)
+	list_for_each_entry(kvm, &vm_list, vm_list) {
 		kvm_for_each_vcpu(i, vcpu, kvm) {
 			if (index >= vcpu_nr)
 				break;
-			memcpy(vcpu_stats + index, &vcpu->stat,
+			memcpy(vcpu_stats + index, &(vcpu->stat),
 			       sizeof(struct kvm_vcpu_stat));
 			kvm_arch_vcpu_stat_reset(&vcpu->stat);
 			++index;
 		}
+	}
 	mutex_unlock(&kvm_lock);
-
 	for (i = 0; i < vcpu_nr; i++) {
 		for (dp = dfx_debugfs_entries; dp->name; ++dp) {
 			switch (dp->dfx_kind) {
@@ -4679,9 +4716,7 @@ static int __dfx_vcpu_stats_get(struct seq_file *p, void *v)
 
 static int dfx_vcpu_stats_open(struct inode *inode, struct file *file)
 {
-	size_t size = DFX_MAX_VCPU_STAT_SIZE * (DFX_MAX_VCPU + 1);
-
-	return single_open_size(file, __dfx_vcpu_stats_get, NULL, size);
+	return single_open(file, __dfx_vcpu_stats_get, NULL);
 }
 
 static const struct file_operations dfx_stat_fops = {
-- 
GitLab