diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 47457ab975fdb36bed1e5146eaddd2e4c83f6f49..7365dd4acffb654d4c2ab3f15a9f85ee3f0c35f0 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -9,7 +9,7 @@ #define TICK_SIZE (tick_nsec / 1000) unsigned long long native_sched_clock(void); -extern int recalibrate_cpu_khz(void); +extern void recalibrate_cpu_khz(void); extern int no_timer_check; diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 8da0efb13544dc0ed2149a8eba7720de6089a88a..cf5d53c3f9ea32434a5b0d614bad97a4ed4dd926 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -32,15 +32,22 @@ static inline cycles_t get_cycles(void) extern struct system_counterval_t convert_art_to_tsc(u64 art); +extern void tsc_early_delay_calibrate(void); extern void tsc_init(void); extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); extern int check_tsc_unstable(void); +extern void mark_tsc_async_resets(char *reason); extern unsigned long native_calibrate_cpu(void); extern unsigned long native_calibrate_tsc(void); extern unsigned long long native_sched_clock_from_tsc(u64 tsc); extern int tsc_clocksource_reliable; +#ifdef CONFIG_X86_TSC +extern bool tsc_async_resets; +#else +# define tsc_async_resets false +#endif /* * Boot-time check whether the TSCs are synchronized across diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 9cffb44a3cf5dfedb122c7b31c2f690177e68604..036e26d63d9a020fbbfd775ac3248a4625fae276 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -776,23 +776,36 @@ static inline int uv_num_possible_blades(void) extern void uv_nmi_setup(void); extern void uv_nmi_setup_hubless(void); +/* BIOS/Kernel flags exchange MMR */ +#define UVH_BIOS_KERNEL_MMR UVH_SCRATCH5 +#define UVH_BIOS_KERNEL_MMR_ALIAS UVH_SCRATCH5_ALIAS +#define UVH_BIOS_KERNEL_MMR_ALIAS_2 UVH_SCRATCH5_ALIAS_2 + +/* TSC sync valid, set by BIOS */ +#define UVH_TSC_SYNC_MMR UVH_BIOS_KERNEL_MMR +#define UVH_TSC_SYNC_SHIFT 10 +#define UVH_TSC_SYNC_SHIFT_UV2K 16 /* UV2/3k have different bits */ +#define UVH_TSC_SYNC_MASK 3 /* 0011 */ +#define UVH_TSC_SYNC_VALID 3 /* 0011 */ +#define UVH_TSC_SYNC_INVALID 2 /* 0010 */ + /* BMC sets a bit this MMR non-zero before sending an NMI */ -#define UVH_NMI_MMR UVH_SCRATCH5 -#define UVH_NMI_MMR_CLEAR UVH_SCRATCH5_ALIAS +#define UVH_NMI_MMR UVH_BIOS_KERNEL_MMR +#define UVH_NMI_MMR_CLEAR UVH_BIOS_KERNEL_MMR_ALIAS #define UVH_NMI_MMR_SHIFT 63 -#define UVH_NMI_MMR_TYPE "SCRATCH5" +#define UVH_NMI_MMR_TYPE "SCRATCH5" /* Newer SMM NMI handler, not present in all systems */ #define UVH_NMI_MMRX UVH_EVENT_OCCURRED0 #define UVH_NMI_MMRX_CLEAR UVH_EVENT_OCCURRED0_ALIAS #define UVH_NMI_MMRX_SHIFT UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT -#define UVH_NMI_MMRX_TYPE "EXTIO_INT0" +#define UVH_NMI_MMRX_TYPE "EXTIO_INT0" /* Non-zero indicates newer SMM NMI handler present */ #define UVH_NMI_MMRX_SUPPORTED UVH_EXTIO_INT0_BROADCAST /* Indicates to BIOS that we want to use the newer SMM NMI handler */ -#define UVH_NMI_MMRX_REQ UVH_SCRATCH5_ALIAS_2 +#define UVH_NMI_MMRX_REQ UVH_BIOS_KERNEL_MMR_ALIAS_2 #define UVH_NMI_MMRX_REQ_SHIFT 62 struct uv_hub_nmi_s { diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 0f7f925e8ae1b93cf3f1b860f42028b8c7fe7564..e1b8e8bf6b3c2572b30f0de39957c602abc2434d 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -154,6 +154,48 @@ static int __init early_get_pnodeid(void) return pnode; } +static void __init uv_tsc_check_sync(void) +{ + u64 mmr; + int sync_state; + int mmr_shift; + char *state; + bool valid; + + /* Accommodate different UV arch BIOSes */ + mmr = uv_early_read_mmr(UVH_TSC_SYNC_MMR); + mmr_shift = + is_uv1_hub() ? 0 : + is_uv2_hub() ? UVH_TSC_SYNC_SHIFT_UV2K : UVH_TSC_SYNC_SHIFT; + if (mmr_shift) + sync_state = (mmr >> mmr_shift) & UVH_TSC_SYNC_MASK; + else + sync_state = 0; + + switch (sync_state) { + case UVH_TSC_SYNC_VALID: + state = "in sync"; + valid = true; + break; + + case UVH_TSC_SYNC_INVALID: + state = "unstable"; + valid = false; + break; + default: + state = "unknown: assuming valid"; + valid = true; + break; + } + pr_info("UV: TSC sync state from BIOS:0%d(%s)\n", sync_state, state); + + /* Mark flag that says TSC != 0 is valid for socket 0 */ + if (valid) + mark_tsc_async_resets("UV BIOS"); + else + mark_tsc_unstable("UV BIOS"); +} + /* [Copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */ #define SMT_LEVEL 0 /* Leaf 0xb SMT level */ @@ -288,6 +330,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) } pr_info("UV: OEM IDs %s/%s, System/HUB Types %d/%d, uv_apic %d\n", oem_id, oem_table_id, uv_system_type, uv_min_hub_revision_id, uv_apic); + uv_tsc_check_sync(); return uv_apic; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 47073f6aeaf244293af1a35e774edc8ee9182396..8af2e8d0c0a1d2d0290ff2026afeab056cdc59b6 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -812,26 +812,6 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) return 0; } -static void __init simple_udelay_calibration(void) -{ - unsigned int tsc_khz, cpu_khz; - unsigned long lpj; - - if (!boot_cpu_has(X86_FEATURE_TSC)) - return; - - cpu_khz = x86_platform.calibrate_cpu(); - tsc_khz = x86_platform.calibrate_tsc(); - - tsc_khz = tsc_khz ? : cpu_khz; - if (!tsc_khz) - return; - - lpj = tsc_khz * 1000; - do_div(lpj, HZ); - loops_per_jiffy = lpj; -} - /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -1039,8 +1019,6 @@ void __init setup_arch(char **cmdline_p) */ init_hypervisor_platform(); - simple_udelay_calibration(); - x86_init.resources.probe_roms(); /* after parse_early_param, so could debug it */ @@ -1125,9 +1103,6 @@ void __init setup_arch(char **cmdline_p) memblock_set_current_limit(ISA_END_ADDRESS); e820__memblock_setup(); - if (!early_xdbc_setup_hardware()) - early_xdbc_register_console(); - reserve_bios_regions(); if (efi_enabled(EFI_MEMMAP)) { @@ -1233,6 +1208,10 @@ void __init setup_arch(char **cmdline_p) kvmclock_init(); #endif + tsc_early_delay_calibrate(); + if (!early_xdbc_setup_hardware()) + early_xdbc_register_console(); + x86_init.paging.pagetable_init(); kasan_init(); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index ad2b925a808e7327dec37ae03c60ea2901d48352..8ea117f8142e192ac0a143f225e505803c82106a 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -112,7 +112,7 @@ static void cyc2ns_data_init(struct cyc2ns_data *data) data->cyc2ns_offset = 0; } -static void cyc2ns_init(int cpu) +static void __init cyc2ns_init(int cpu) { struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); @@ -812,13 +812,13 @@ unsigned long native_calibrate_cpu(void) return tsc_pit_min; } -int recalibrate_cpu_khz(void) +void recalibrate_cpu_khz(void) { #ifndef CONFIG_SMP unsigned long cpu_khz_old = cpu_khz; if (!boot_cpu_has(X86_FEATURE_TSC)) - return -ENODEV; + return; cpu_khz = x86_platform.calibrate_cpu(); tsc_khz = x86_platform.calibrate_tsc(); @@ -828,10 +828,6 @@ int recalibrate_cpu_khz(void) cpu_khz = tsc_khz; cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy, cpu_khz_old, cpu_khz); - - return 0; -#else - return -ENODEV; #endif } @@ -959,17 +955,21 @@ core_initcall(cpufreq_register_tsc_scaling); /* * If ART is present detect the numerator:denominator to convert to TSC */ -static void detect_art(void) +static void __init detect_art(void) { unsigned int unused[2]; if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF) return; - /* Don't enable ART in a VM, non-stop TSC and TSC_ADJUST required */ + /* + * Don't enable ART in a VM, non-stop TSC and TSC_ADJUST required, + * and the TSC counter resets must not occur asynchronously. + */ if (boot_cpu_has(X86_FEATURE_HYPERVISOR) || !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) || - !boot_cpu_has(X86_FEATURE_TSC_ADJUST)) + !boot_cpu_has(X86_FEATURE_TSC_ADJUST) || + tsc_async_resets) return; cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator, @@ -1263,6 +1263,25 @@ static int __init init_tsc_clocksource(void) */ device_initcall(init_tsc_clocksource); +void __init tsc_early_delay_calibrate(void) +{ + unsigned long lpj; + + if (!boot_cpu_has(X86_FEATURE_TSC)) + return; + + cpu_khz = x86_platform.calibrate_cpu(); + tsc_khz = x86_platform.calibrate_tsc(); + + tsc_khz = tsc_khz ? : cpu_khz; + if (!tsc_khz) + return; + + lpj = tsc_khz * 1000; + do_div(lpj, HZ); + loops_per_jiffy = lpj; +} + void __init tsc_init(void) { u64 lpj, cyc; diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index e76a9881306b34fc800b4e290e1eb7d38baaad6b..ec534f978867db90e662e7ee1d82e366796d1340 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -31,6 +31,20 @@ struct tsc_adjust { static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust); +/* + * TSC's on different sockets may be reset asynchronously. + * This may cause the TSC ADJUST value on socket 0 to be NOT 0. + */ +bool __read_mostly tsc_async_resets; + +void mark_tsc_async_resets(char *reason) +{ + if (tsc_async_resets) + return; + tsc_async_resets = true; + pr_info("tsc: Marking TSC async resets true due to %s\n", reason); +} + void tsc_verify_tsc_adjust(bool resume) { struct tsc_adjust *adj = this_cpu_ptr(&tsc_adjust); @@ -39,6 +53,10 @@ void tsc_verify_tsc_adjust(bool resume) if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST)) return; + /* Skip unnecessary error messages if TSC already unstable */ + if (check_tsc_unstable()) + return; + /* Rate limit the MSR check */ if (!resume && time_before(jiffies, adj->nextcheck)) return; @@ -72,12 +90,22 @@ static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval, * non zero. We don't do that on non boot cpus because physical * hotplug should have set the ADJUST register to a value > 0 so * the TSC is in sync with the already running cpus. + * + * Also don't force the ADJUST value to zero if that is a valid value + * for socket 0 as determined by the system arch. This is required + * when multiple sockets are reset asynchronously with each other + * and socket 0 may not have an TSC ADJUST value of 0. */ if (bootcpu && bootval != 0) { - pr_warn(FW_BUG "TSC ADJUST: CPU%u: %lld force to 0\n", cpu, - bootval); - wrmsrl(MSR_IA32_TSC_ADJUST, 0); - bootval = 0; + if (likely(!tsc_async_resets)) { + pr_warn(FW_BUG "TSC ADJUST: CPU%u: %lld force to 0\n", + cpu, bootval); + wrmsrl(MSR_IA32_TSC_ADJUST, 0); + bootval = 0; + } else { + pr_info("TSC ADJUST: CPU%u: %lld NOT forced to 0\n", + cpu, bootval); + } } cur->adjusted = bootval; } @@ -91,6 +119,10 @@ bool __init tsc_store_and_check_tsc_adjust(bool bootcpu) if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST)) return false; + /* Skip unnecessary error messages if TSC already unstable */ + if (check_tsc_unstable()) + return false; + rdmsrl(MSR_IA32_TSC_ADJUST, bootval); cur->bootval = bootval; cur->nextcheck = jiffies + HZ; @@ -118,6 +150,13 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu) cur->nextcheck = jiffies + HZ; cur->warned = false; + /* + * If a non-zero TSC value for socket 0 may be valid then the default + * adjusted value cannot assumed to be zero either. + */ + if (tsc_async_resets) + cur->adjusted = bootval; + /* * Check whether this CPU is the first in a package to come up. In * this case do not check the boot value against another package @@ -139,10 +178,9 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu) * Compare the boot value and complain if it differs in the * package. */ - if (bootval != ref->bootval) { - pr_warn(FW_BUG "TSC ADJUST differs: Reference CPU%u: %lld CPU%u: %lld\n", - refcpu, ref->bootval, cpu, bootval); - } + if (bootval != ref->bootval) + printk_once(FW_BUG "TSC ADJUST differs within socket(s), fixing all errors\n"); + /* * The TSC_ADJUST values in a package must be the same. If the boot * value on this newly upcoming CPU differs from the adjustment @@ -150,8 +188,6 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu) * adjusted value. */ if (bootval != ref->adjusted) { - pr_warn("TSC ADJUST synchronize: Reference CPU%u: %lld CPU%u: %lld\n", - refcpu, ref->adjusted, cpu, bootval); cur->adjusted = ref->adjusted; wrmsrl(MSR_IA32_TSC_ADJUST, ref->adjusted); }