From d7869d0e243014896be924648bde12ad67e1f363 Mon Sep 17 00:00:00 2001
From: Jiufei Xue <jiufei.xue@linux.alibaba.com>
Date: Thu, 10 Jan 2019 19:33:55 +0800
Subject: [PATCH] Revert "x86/tsc: Try to adjust TSC if sync test fails"

This reverts commit cc4db26899dcd0e6ff0448c77abd8eb61b1a1333.

When we do hot-add and enable vCPU, the time inside the VM jumps and
then VM stucks.
The dmesg shows like this:
[   48.402948] CPU2 has been hot-added
[   48.413774] smpboot: Booting Node 0 Processor 2 APIC 0x2
[   48.415155] kvm-clock: cpu 2, msr 6b615081, secondary cpu clock
[   48.453690] TSC ADJUST compensate: CPU2 observed 139318776350 warp.  Adjust: 139318776350
[  102.060874] clocksource: timekeeping watchdog on CPU0: Marking clocksource 'tsc' as unstable because the skew is too large:
[  102.060874] clocksource:                       'kvm-clock' wd_now: 1cb1cfc4bf8 wd_last: 1be9588f1fe mask: ffffffffffffffff
[  102.060874] clocksource:                       'tsc' cs_now: 207d794f7e cs_last: 205a32697a mask: ffffffffffffffff
[  102.060874] tsc: Marking TSC unstable due to clocksource watchdog
[  102.070188] KVM setup async PF for cpu 2
[  102.071461] kvm-stealtime: cpu 2, msr 13ba95000
[  102.074530] Will online and init hotplugged CPU: 2

This is because the TSC for the newly added VCPU is initialized to 0
while others are ahead. Guest will do the TSC ADJUST compensate and
cause the time jumps.

Commit bd8fab39cd4f("KVM: x86: fix maintaining of kvm_clock stability
on guest CPU hotplug") can fix this problem.  However, the host kernel
version may be older, so do not ajust TSC if sync test fails, just mark
it unstable.

Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
---
 arch/x86/kernel/tsc_sync.c | 83 +++-----------------------------------
 1 file changed, 5 insertions(+), 78 deletions(-)

diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index ec534f978867..d350fe8d559a 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -205,7 +205,6 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu)
 static atomic_t start_count;
 static atomic_t stop_count;
 static atomic_t skip_test;
-static atomic_t test_runs;
 
 /*
  * We use a raw spinlock in this exceptional case, because
@@ -318,16 +317,6 @@ void check_tsc_sync_source(int cpu)
 	if (unsynchronized_tsc())
 		return;
 
-	/*
-	 * Set the maximum number of test runs to
-	 *  1 if the CPU does not provide the TSC_ADJUST MSR
-	 *  3 if the MSR is available, so the target can try to adjust
-	 */
-	if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
-		atomic_set(&test_runs, 1);
-	else
-		atomic_set(&test_runs, 3);
-retry:
 	/*
 	 * Wait for the target to start or to skip the test:
 	 */
@@ -349,21 +338,7 @@ void check_tsc_sync_source(int cpu)
 	while (atomic_read(&stop_count) != cpus-1)
 		cpu_relax();
 
-	/*
-	 * If the test was successful set the number of runs to zero and
-	 * stop. If not, decrement the number of runs an check if we can
-	 * retry. In case of random warps no retry is attempted.
-	 */
-	if (!nr_warps) {
-		atomic_set(&test_runs, 0);
-
-		pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n",
-			smp_processor_id(), cpu);
-
-	} else if (atomic_dec_and_test(&test_runs) || random_warps) {
-		/* Force it to 0 if random warps brought us here */
-		atomic_set(&test_runs, 0);
-
+	if (nr_warps) {
 		pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n",
 			smp_processor_id(), cpu);
 		pr_warning("Measured %Ld cycles TSC warp between CPUs, "
@@ -371,6 +346,9 @@ void check_tsc_sync_source(int cpu)
 		if (random_warps)
 			pr_warning("TSC warped randomly between CPUs\n");
 		mark_tsc_unstable("check_tsc_sync_source failed");
+	} else {
+		pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n",
+			smp_processor_id(), cpu);
 	}
 
 	/*
@@ -386,12 +364,6 @@ void check_tsc_sync_source(int cpu)
 	 * Let the target continue with the bootup:
 	 */
 	atomic_inc(&stop_count);
-
-	/*
-	 * Retry, if there is a chance to do so.
-	 */
-	if (atomic_read(&test_runs) > 0)
-		goto retry;
 }
 
 /*
@@ -399,9 +371,6 @@ void check_tsc_sync_source(int cpu)
  */
 void check_tsc_sync_target(void)
 {
-	struct tsc_adjust *cur = this_cpu_ptr(&tsc_adjust);
-	unsigned int cpu = smp_processor_id();
-	cycles_t cur_max_warp, gbl_max_warp;
 	int cpus = 2;
 
 	/* Also aborts if there is no TSC. */
@@ -422,7 +391,6 @@ void check_tsc_sync_target(void)
 		return;
 	}
 
-retry:
 	/*
 	 * Register this CPU's participation and wait for the
 	 * source CPU to start the measurement:
@@ -431,12 +399,7 @@ void check_tsc_sync_target(void)
 	while (atomic_read(&start_count) != cpus)
 		cpu_relax();
 
-	cur_max_warp = check_tsc_warp(loop_timeout(cpu));
-
-	/*
-	 * Store the maximum observed warp value for a potential retry:
-	 */
-	gbl_max_warp = max_warp;
+	check_tsc_warp(loop_timeout(smp_processor_id()));
 
 	/*
 	 * Ok, we are done:
@@ -453,42 +416,6 @@ void check_tsc_sync_target(void)
 	 * Reset it for the next sync test:
 	 */
 	atomic_set(&stop_count, 0);
-
-	/*
-	 * Check the number of remaining test runs. If not zero, the test
-	 * failed and a retry with adjusted TSC is possible. If zero the
-	 * test was either successful or failed terminally.
-	 */
-	if (!atomic_read(&test_runs))
-		return;
-
-	/*
-	 * If the warp value of this CPU is 0, then the other CPU
-	 * observed time going backwards so this TSC was ahead and
-	 * needs to move backwards.
-	 */
-	if (!cur_max_warp)
-		cur_max_warp = -gbl_max_warp;
-
-	/*
-	 * Add the result to the previous adjustment value.
-	 *
-	 * The adjustement value is slightly off by the overhead of the
-	 * sync mechanism (observed values are ~200 TSC cycles), but this
-	 * really depends on CPU, node distance and frequency. So
-	 * compensating for this is hard to get right. Experiments show
-	 * that the warp is not longer detectable when the observed warp
-	 * value is used. In the worst case the adjustment needs to go
-	 * through a 3rd run for fine tuning.
-	 */
-	cur->adjusted += cur_max_warp;
-
-	pr_warn("TSC ADJUST compensate: CPU%u observed %lld warp. Adjust: %lld\n",
-		cpu, cur_max_warp, cur->adjusted);
-
-	wrmsrl(MSR_IA32_TSC_ADJUST, cur->adjusted);
-	goto retry;
-
 }
 
 #endif /* CONFIG_SMP */
-- 
GitLab