提交 99306dfc 编写于 作者: L Linus Torvalds

Merge branch 'x86-timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 timer updates from Thomas Gleixner:
 "These updates are related to TSC handling:

   - Support platforms which have synchronized TSCs but the boot CPU has
     a non zero TSC_ADJUST value, which is considered a firmware bug on
     normal systems.

     This applies to HPE/SGI UV platforms where the platform firmware
     uses TSC_ADJUST to ensure TSC synchronization across a huge number
     of sockets, but due to power on timings the boot CPU cannot be
     guaranteed to have a zero TSC_ADJUST register value.

   - Fix the ordering of udelay calibration and kvmclock_init()

   - Cleanup the udelay and calibration code"

* 'x86-timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/tsc: Mark cyc2ns_init() and detect_art() __init
  x86/platform/UV: Mark tsc_check_sync as an init function
  x86/tsc: Make CONFIG_X86_TSC=n build work again
  x86/platform/UV: Add check of TSC state set by UV BIOS
  x86/tsc: Provide a means to disable TSC ART
  x86/tsc: Drastically reduce the number of firmware bug warnings
  x86/tsc: Skip TSC test and error messages if already unstable
  x86/tsc: Add option that TSC on Socket 0 being non-zero is valid
  x86/timers: Move simple_udelay_calibration() past kvmclock_init()
  x86/timers: Make recalibrate_cpu_khz() void
  x86/timers: Move the simple udelay calibration to tsc.h
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
#define TICK_SIZE (tick_nsec / 1000) #define TICK_SIZE (tick_nsec / 1000)
unsigned long long native_sched_clock(void); unsigned long long native_sched_clock(void);
extern int recalibrate_cpu_khz(void); extern void recalibrate_cpu_khz(void);
extern int no_timer_check; extern int no_timer_check;
......
...@@ -32,15 +32,22 @@ static inline cycles_t get_cycles(void) ...@@ -32,15 +32,22 @@ static inline cycles_t get_cycles(void)
extern struct system_counterval_t convert_art_to_tsc(u64 art); extern struct system_counterval_t convert_art_to_tsc(u64 art);
extern void tsc_early_delay_calibrate(void);
extern void tsc_init(void); extern void tsc_init(void);
extern void mark_tsc_unstable(char *reason); extern void mark_tsc_unstable(char *reason);
extern int unsynchronized_tsc(void); extern int unsynchronized_tsc(void);
extern int check_tsc_unstable(void); extern int check_tsc_unstable(void);
extern void mark_tsc_async_resets(char *reason);
extern unsigned long native_calibrate_cpu(void); extern unsigned long native_calibrate_cpu(void);
extern unsigned long native_calibrate_tsc(void); extern unsigned long native_calibrate_tsc(void);
extern unsigned long long native_sched_clock_from_tsc(u64 tsc); extern unsigned long long native_sched_clock_from_tsc(u64 tsc);
extern int tsc_clocksource_reliable; extern int tsc_clocksource_reliable;
#ifdef CONFIG_X86_TSC
extern bool tsc_async_resets;
#else
# define tsc_async_resets false
#endif
/* /*
* Boot-time check whether the TSCs are synchronized across * Boot-time check whether the TSCs are synchronized across
......
...@@ -776,23 +776,36 @@ static inline int uv_num_possible_blades(void) ...@@ -776,23 +776,36 @@ static inline int uv_num_possible_blades(void)
extern void uv_nmi_setup(void); extern void uv_nmi_setup(void);
extern void uv_nmi_setup_hubless(void); extern void uv_nmi_setup_hubless(void);
/* BIOS/Kernel flags exchange MMR */
#define UVH_BIOS_KERNEL_MMR UVH_SCRATCH5
#define UVH_BIOS_KERNEL_MMR_ALIAS UVH_SCRATCH5_ALIAS
#define UVH_BIOS_KERNEL_MMR_ALIAS_2 UVH_SCRATCH5_ALIAS_2
/* TSC sync valid, set by BIOS */
#define UVH_TSC_SYNC_MMR UVH_BIOS_KERNEL_MMR
#define UVH_TSC_SYNC_SHIFT 10
#define UVH_TSC_SYNC_SHIFT_UV2K 16 /* UV2/3k have different bits */
#define UVH_TSC_SYNC_MASK 3 /* 0011 */
#define UVH_TSC_SYNC_VALID 3 /* 0011 */
#define UVH_TSC_SYNC_INVALID 2 /* 0010 */
/* BMC sets a bit this MMR non-zero before sending an NMI */ /* BMC sets a bit this MMR non-zero before sending an NMI */
#define UVH_NMI_MMR UVH_SCRATCH5 #define UVH_NMI_MMR UVH_BIOS_KERNEL_MMR
#define UVH_NMI_MMR_CLEAR UVH_SCRATCH5_ALIAS #define UVH_NMI_MMR_CLEAR UVH_BIOS_KERNEL_MMR_ALIAS
#define UVH_NMI_MMR_SHIFT 63 #define UVH_NMI_MMR_SHIFT 63
#define UVH_NMI_MMR_TYPE "SCRATCH5" #define UVH_NMI_MMR_TYPE "SCRATCH5"
/* Newer SMM NMI handler, not present in all systems */ /* Newer SMM NMI handler, not present in all systems */
#define UVH_NMI_MMRX UVH_EVENT_OCCURRED0 #define UVH_NMI_MMRX UVH_EVENT_OCCURRED0
#define UVH_NMI_MMRX_CLEAR UVH_EVENT_OCCURRED0_ALIAS #define UVH_NMI_MMRX_CLEAR UVH_EVENT_OCCURRED0_ALIAS
#define UVH_NMI_MMRX_SHIFT UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT #define UVH_NMI_MMRX_SHIFT UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT
#define UVH_NMI_MMRX_TYPE "EXTIO_INT0" #define UVH_NMI_MMRX_TYPE "EXTIO_INT0"
/* Non-zero indicates newer SMM NMI handler present */ /* Non-zero indicates newer SMM NMI handler present */
#define UVH_NMI_MMRX_SUPPORTED UVH_EXTIO_INT0_BROADCAST #define UVH_NMI_MMRX_SUPPORTED UVH_EXTIO_INT0_BROADCAST
/* Indicates to BIOS that we want to use the newer SMM NMI handler */ /* Indicates to BIOS that we want to use the newer SMM NMI handler */
#define UVH_NMI_MMRX_REQ UVH_SCRATCH5_ALIAS_2 #define UVH_NMI_MMRX_REQ UVH_BIOS_KERNEL_MMR_ALIAS_2
#define UVH_NMI_MMRX_REQ_SHIFT 62 #define UVH_NMI_MMRX_REQ_SHIFT 62
struct uv_hub_nmi_s { struct uv_hub_nmi_s {
......
...@@ -154,6 +154,48 @@ static int __init early_get_pnodeid(void) ...@@ -154,6 +154,48 @@ static int __init early_get_pnodeid(void)
return pnode; return pnode;
} }
static void __init uv_tsc_check_sync(void)
{
u64 mmr;
int sync_state;
int mmr_shift;
char *state;
bool valid;
/* Accommodate different UV arch BIOSes */
mmr = uv_early_read_mmr(UVH_TSC_SYNC_MMR);
mmr_shift =
is_uv1_hub() ? 0 :
is_uv2_hub() ? UVH_TSC_SYNC_SHIFT_UV2K : UVH_TSC_SYNC_SHIFT;
if (mmr_shift)
sync_state = (mmr >> mmr_shift) & UVH_TSC_SYNC_MASK;
else
sync_state = 0;
switch (sync_state) {
case UVH_TSC_SYNC_VALID:
state = "in sync";
valid = true;
break;
case UVH_TSC_SYNC_INVALID:
state = "unstable";
valid = false;
break;
default:
state = "unknown: assuming valid";
valid = true;
break;
}
pr_info("UV: TSC sync state from BIOS:0%d(%s)\n", sync_state, state);
/* Mark flag that says TSC != 0 is valid for socket 0 */
if (valid)
mark_tsc_async_resets("UV BIOS");
else
mark_tsc_unstable("UV BIOS");
}
/* [Copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */ /* [Copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */
#define SMT_LEVEL 0 /* Leaf 0xb SMT level */ #define SMT_LEVEL 0 /* Leaf 0xb SMT level */
...@@ -288,6 +330,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) ...@@ -288,6 +330,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
} }
pr_info("UV: OEM IDs %s/%s, System/HUB Types %d/%d, uv_apic %d\n", oem_id, oem_table_id, uv_system_type, uv_min_hub_revision_id, uv_apic); pr_info("UV: OEM IDs %s/%s, System/HUB Types %d/%d, uv_apic %d\n", oem_id, oem_table_id, uv_system_type, uv_min_hub_revision_id, uv_apic);
uv_tsc_check_sync();
return uv_apic; return uv_apic;
......
...@@ -812,26 +812,6 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) ...@@ -812,26 +812,6 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
return 0; return 0;
} }
static void __init simple_udelay_calibration(void)
{
unsigned int tsc_khz, cpu_khz;
unsigned long lpj;
if (!boot_cpu_has(X86_FEATURE_TSC))
return;
cpu_khz = x86_platform.calibrate_cpu();
tsc_khz = x86_platform.calibrate_tsc();
tsc_khz = tsc_khz ? : cpu_khz;
if (!tsc_khz)
return;
lpj = tsc_khz * 1000;
do_div(lpj, HZ);
loops_per_jiffy = lpj;
}
/* /*
* Determine if we were loaded by an EFI loader. If so, then we have also been * Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures * passed the efi memmap, systab, etc., so we should use these data structures
...@@ -1039,8 +1019,6 @@ void __init setup_arch(char **cmdline_p) ...@@ -1039,8 +1019,6 @@ void __init setup_arch(char **cmdline_p)
*/ */
init_hypervisor_platform(); init_hypervisor_platform();
simple_udelay_calibration();
x86_init.resources.probe_roms(); x86_init.resources.probe_roms();
/* after parse_early_param, so could debug it */ /* after parse_early_param, so could debug it */
...@@ -1125,9 +1103,6 @@ void __init setup_arch(char **cmdline_p) ...@@ -1125,9 +1103,6 @@ void __init setup_arch(char **cmdline_p)
memblock_set_current_limit(ISA_END_ADDRESS); memblock_set_current_limit(ISA_END_ADDRESS);
e820__memblock_setup(); e820__memblock_setup();
if (!early_xdbc_setup_hardware())
early_xdbc_register_console();
reserve_bios_regions(); reserve_bios_regions();
if (efi_enabled(EFI_MEMMAP)) { if (efi_enabled(EFI_MEMMAP)) {
...@@ -1233,6 +1208,10 @@ void __init setup_arch(char **cmdline_p) ...@@ -1233,6 +1208,10 @@ void __init setup_arch(char **cmdline_p)
kvmclock_init(); kvmclock_init();
#endif #endif
tsc_early_delay_calibrate();
if (!early_xdbc_setup_hardware())
early_xdbc_register_console();
x86_init.paging.pagetable_init(); x86_init.paging.pagetable_init();
kasan_init(); kasan_init();
......
...@@ -112,7 +112,7 @@ static void cyc2ns_data_init(struct cyc2ns_data *data) ...@@ -112,7 +112,7 @@ static void cyc2ns_data_init(struct cyc2ns_data *data)
data->cyc2ns_offset = 0; data->cyc2ns_offset = 0;
} }
static void cyc2ns_init(int cpu) static void __init cyc2ns_init(int cpu)
{ {
struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
...@@ -812,13 +812,13 @@ unsigned long native_calibrate_cpu(void) ...@@ -812,13 +812,13 @@ unsigned long native_calibrate_cpu(void)
return tsc_pit_min; return tsc_pit_min;
} }
int recalibrate_cpu_khz(void) void recalibrate_cpu_khz(void)
{ {
#ifndef CONFIG_SMP #ifndef CONFIG_SMP
unsigned long cpu_khz_old = cpu_khz; unsigned long cpu_khz_old = cpu_khz;
if (!boot_cpu_has(X86_FEATURE_TSC)) if (!boot_cpu_has(X86_FEATURE_TSC))
return -ENODEV; return;
cpu_khz = x86_platform.calibrate_cpu(); cpu_khz = x86_platform.calibrate_cpu();
tsc_khz = x86_platform.calibrate_tsc(); tsc_khz = x86_platform.calibrate_tsc();
...@@ -828,10 +828,6 @@ int recalibrate_cpu_khz(void) ...@@ -828,10 +828,6 @@ int recalibrate_cpu_khz(void)
cpu_khz = tsc_khz; cpu_khz = tsc_khz;
cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy, cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
cpu_khz_old, cpu_khz); cpu_khz_old, cpu_khz);
return 0;
#else
return -ENODEV;
#endif #endif
} }
...@@ -959,17 +955,21 @@ core_initcall(cpufreq_register_tsc_scaling); ...@@ -959,17 +955,21 @@ core_initcall(cpufreq_register_tsc_scaling);
/* /*
* If ART is present detect the numerator:denominator to convert to TSC * If ART is present detect the numerator:denominator to convert to TSC
*/ */
static void detect_art(void) static void __init detect_art(void)
{ {
unsigned int unused[2]; unsigned int unused[2];
if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF) if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF)
return; return;
/* Don't enable ART in a VM, non-stop TSC and TSC_ADJUST required */ /*
* Don't enable ART in a VM, non-stop TSC and TSC_ADJUST required,
* and the TSC counter resets must not occur asynchronously.
*/
if (boot_cpu_has(X86_FEATURE_HYPERVISOR) || if (boot_cpu_has(X86_FEATURE_HYPERVISOR) ||
!boot_cpu_has(X86_FEATURE_NONSTOP_TSC) || !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
!boot_cpu_has(X86_FEATURE_TSC_ADJUST)) !boot_cpu_has(X86_FEATURE_TSC_ADJUST) ||
tsc_async_resets)
return; return;
cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator, cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator,
...@@ -1263,6 +1263,25 @@ static int __init init_tsc_clocksource(void) ...@@ -1263,6 +1263,25 @@ static int __init init_tsc_clocksource(void)
*/ */
device_initcall(init_tsc_clocksource); device_initcall(init_tsc_clocksource);
void __init tsc_early_delay_calibrate(void)
{
unsigned long lpj;
if (!boot_cpu_has(X86_FEATURE_TSC))
return;
cpu_khz = x86_platform.calibrate_cpu();
tsc_khz = x86_platform.calibrate_tsc();
tsc_khz = tsc_khz ? : cpu_khz;
if (!tsc_khz)
return;
lpj = tsc_khz * 1000;
do_div(lpj, HZ);
loops_per_jiffy = lpj;
}
void __init tsc_init(void) void __init tsc_init(void)
{ {
u64 lpj, cyc; u64 lpj, cyc;
......
...@@ -31,6 +31,20 @@ struct tsc_adjust { ...@@ -31,6 +31,20 @@ struct tsc_adjust {
static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust); static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust);
/*
* TSC's on different sockets may be reset asynchronously.
* This may cause the TSC ADJUST value on socket 0 to be NOT 0.
*/
bool __read_mostly tsc_async_resets;
void mark_tsc_async_resets(char *reason)
{
if (tsc_async_resets)
return;
tsc_async_resets = true;
pr_info("tsc: Marking TSC async resets true due to %s\n", reason);
}
void tsc_verify_tsc_adjust(bool resume) void tsc_verify_tsc_adjust(bool resume)
{ {
struct tsc_adjust *adj = this_cpu_ptr(&tsc_adjust); struct tsc_adjust *adj = this_cpu_ptr(&tsc_adjust);
...@@ -39,6 +53,10 @@ void tsc_verify_tsc_adjust(bool resume) ...@@ -39,6 +53,10 @@ void tsc_verify_tsc_adjust(bool resume)
if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST)) if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
return; return;
/* Skip unnecessary error messages if TSC already unstable */
if (check_tsc_unstable())
return;
/* Rate limit the MSR check */ /* Rate limit the MSR check */
if (!resume && time_before(jiffies, adj->nextcheck)) if (!resume && time_before(jiffies, adj->nextcheck))
return; return;
...@@ -72,12 +90,22 @@ static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval, ...@@ -72,12 +90,22 @@ static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval,
* non zero. We don't do that on non boot cpus because physical * non zero. We don't do that on non boot cpus because physical
* hotplug should have set the ADJUST register to a value > 0 so * hotplug should have set the ADJUST register to a value > 0 so
* the TSC is in sync with the already running cpus. * the TSC is in sync with the already running cpus.
*
* Also don't force the ADJUST value to zero if that is a valid value
* for socket 0 as determined by the system arch. This is required
* when multiple sockets are reset asynchronously with each other
* and socket 0 may not have an TSC ADJUST value of 0.
*/ */
if (bootcpu && bootval != 0) { if (bootcpu && bootval != 0) {
pr_warn(FW_BUG "TSC ADJUST: CPU%u: %lld force to 0\n", cpu, if (likely(!tsc_async_resets)) {
bootval); pr_warn(FW_BUG "TSC ADJUST: CPU%u: %lld force to 0\n",
wrmsrl(MSR_IA32_TSC_ADJUST, 0); cpu, bootval);
bootval = 0; wrmsrl(MSR_IA32_TSC_ADJUST, 0);
bootval = 0;
} else {
pr_info("TSC ADJUST: CPU%u: %lld NOT forced to 0\n",
cpu, bootval);
}
} }
cur->adjusted = bootval; cur->adjusted = bootval;
} }
...@@ -91,6 +119,10 @@ bool __init tsc_store_and_check_tsc_adjust(bool bootcpu) ...@@ -91,6 +119,10 @@ bool __init tsc_store_and_check_tsc_adjust(bool bootcpu)
if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST)) if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
return false; return false;
/* Skip unnecessary error messages if TSC already unstable */
if (check_tsc_unstable())
return false;
rdmsrl(MSR_IA32_TSC_ADJUST, bootval); rdmsrl(MSR_IA32_TSC_ADJUST, bootval);
cur->bootval = bootval; cur->bootval = bootval;
cur->nextcheck = jiffies + HZ; cur->nextcheck = jiffies + HZ;
...@@ -118,6 +150,13 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu) ...@@ -118,6 +150,13 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu)
cur->nextcheck = jiffies + HZ; cur->nextcheck = jiffies + HZ;
cur->warned = false; cur->warned = false;
/*
* If a non-zero TSC value for socket 0 may be valid then the default
* adjusted value cannot assumed to be zero either.
*/
if (tsc_async_resets)
cur->adjusted = bootval;
/* /*
* Check whether this CPU is the first in a package to come up. In * Check whether this CPU is the first in a package to come up. In
* this case do not check the boot value against another package * this case do not check the boot value against another package
...@@ -139,10 +178,9 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu) ...@@ -139,10 +178,9 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu)
* Compare the boot value and complain if it differs in the * Compare the boot value and complain if it differs in the
* package. * package.
*/ */
if (bootval != ref->bootval) { if (bootval != ref->bootval)
pr_warn(FW_BUG "TSC ADJUST differs: Reference CPU%u: %lld CPU%u: %lld\n", printk_once(FW_BUG "TSC ADJUST differs within socket(s), fixing all errors\n");
refcpu, ref->bootval, cpu, bootval);
}
/* /*
* The TSC_ADJUST values in a package must be the same. If the boot * The TSC_ADJUST values in a package must be the same. If the boot
* value on this newly upcoming CPU differs from the adjustment * value on this newly upcoming CPU differs from the adjustment
...@@ -150,8 +188,6 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu) ...@@ -150,8 +188,6 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu)
* adjusted value. * adjusted value.
*/ */
if (bootval != ref->adjusted) { if (bootval != ref->adjusted) {
pr_warn("TSC ADJUST synchronize: Reference CPU%u: %lld CPU%u: %lld\n",
refcpu, ref->adjusted, cpu, bootval);
cur->adjusted = ref->adjusted; cur->adjusted = ref->adjusted;
wrmsrl(MSR_IA32_TSC_ADJUST, ref->adjusted); wrmsrl(MSR_IA32_TSC_ADJUST, ref->adjusted);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册