提交 13e091b6 编写于 作者: L Linus Torvalds

Merge branch 'x86-timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 timer updates from Thomas Gleixner:
 "Early TSC based time stamping to allow better boot time analysis.

  This comes with a general cleanup of the TSC calibration code which
  grew warts and duct taping over the years and removes 250 lines of
  code. Initiated and mostly implemented by Pavel with help from various
  folks"

* 'x86-timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (37 commits)
  x86/kvmclock: Mark kvm_get_preset_lpj() as __init
  x86/tsc: Consolidate init code
  sched/clock: Disable interrupts when calling generic_sched_clock_init()
  timekeeping: Prevent false warning when persistent clock is not available
  sched/clock: Close a hole in sched_clock_init()
  x86/tsc: Make use of tsc_calibrate_cpu_early()
  x86/tsc: Split native_calibrate_cpu() into early and late parts
  sched/clock: Use static key for sched_clock_running
  sched/clock: Enable sched clock early
  sched/clock: Move sched clock initialization and merge with generic clock
  x86/tsc: Use TSC as sched clock early
  x86/tsc: Initialize cyc2ns when tsc frequency is determined
  x86/tsc: Calibrate tsc only once
  ARM/time: Remove read_boot_clock64()
  s390/time: Remove read_boot_clock64()
  timekeeping: Default boot time offset to local_clock()
  timekeeping: Replace read_boot_clock64() with read_persistent_wall_and_boot_offset()
  s390/time: Add read_persistent_wall_and_boot_offset()
  x86/xen/time: Output xen sched_clock time from 0
  x86/xen/time: Initialize pv xen time in init_hypervisor_platform()
  ...
...@@ -2835,8 +2835,6 @@ ...@@ -2835,8 +2835,6 @@
nosync [HW,M68K] Disables sync negotiation for all devices. nosync [HW,M68K] Disables sync negotiation for all devices.
notsc [BUGS=X86-32] Disable Time Stamp Counter
nowatchdog [KNL] Disable both lockup detectors, i.e. nowatchdog [KNL] Disable both lockup detectors, i.e.
soft-lockup and NMI watchdog (hard-lockup). soft-lockup and NMI watchdog (hard-lockup).
......
...@@ -92,9 +92,7 @@ APICs ...@@ -92,9 +92,7 @@ APICs
Timing Timing
notsc notsc
Don't use the CPU time stamp counter to read the wall time. Deprecated, use tsc=unstable instead.
This can be used to work around timing problems on multiprocessor systems
with not properly synchronized CPUs.
nohpet nohpet
Don't use the HPET timer. Don't use the HPET timer.
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
extern void timer_tick(void); extern void timer_tick(void);
typedef void (*clock_access_fn)(struct timespec64 *); typedef void (*clock_access_fn)(struct timespec64 *);
extern int register_persistent_clock(clock_access_fn read_boot, extern int register_persistent_clock(clock_access_fn read_persistent);
clock_access_fn read_persistent);
#endif #endif
...@@ -83,29 +83,18 @@ static void dummy_clock_access(struct timespec64 *ts) ...@@ -83,29 +83,18 @@ static void dummy_clock_access(struct timespec64 *ts)
} }
static clock_access_fn __read_persistent_clock = dummy_clock_access; static clock_access_fn __read_persistent_clock = dummy_clock_access;
static clock_access_fn __read_boot_clock = dummy_clock_access;
void read_persistent_clock64(struct timespec64 *ts) void read_persistent_clock64(struct timespec64 *ts)
{ {
__read_persistent_clock(ts); __read_persistent_clock(ts);
} }
void read_boot_clock64(struct timespec64 *ts) int __init register_persistent_clock(clock_access_fn read_persistent)
{
__read_boot_clock(ts);
}
int __init register_persistent_clock(clock_access_fn read_boot,
clock_access_fn read_persistent)
{ {
/* Only allow the clockaccess functions to be registered once */ /* Only allow the clockaccess functions to be registered once */
if (__read_persistent_clock == dummy_clock_access && if (__read_persistent_clock == dummy_clock_access) {
__read_boot_clock == dummy_clock_access) {
if (read_boot)
__read_boot_clock = read_boot;
if (read_persistent) if (read_persistent)
__read_persistent_clock = read_persistent; __read_persistent_clock = read_persistent;
return 0; return 0;
} }
......
...@@ -110,7 +110,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase) ...@@ -110,7 +110,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase)
} }
sched_clock_register(omap_32k_read_sched_clock, 32, 32768); sched_clock_register(omap_32k_read_sched_clock, 32, 32768);
register_persistent_clock(NULL, omap_read_persistent_clock64); register_persistent_clock(omap_read_persistent_clock64);
pr_info("OMAP clocksource: 32k_counter at 32768 Hz\n"); pr_info("OMAP clocksource: 32k_counter at 32768 Hz\n");
return 0; return 0;
......
...@@ -221,17 +221,22 @@ void read_persistent_clock64(struct timespec64 *ts) ...@@ -221,17 +221,22 @@ void read_persistent_clock64(struct timespec64 *ts)
ext_to_timespec64(clk, ts); ext_to_timespec64(clk, ts);
} }
void read_boot_clock64(struct timespec64 *ts) void __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
struct timespec64 *boot_offset)
{ {
unsigned char clk[STORE_CLOCK_EXT_SIZE]; unsigned char clk[STORE_CLOCK_EXT_SIZE];
struct timespec64 boot_time;
__u64 delta; __u64 delta;
delta = initial_leap_seconds + TOD_UNIX_EPOCH; delta = initial_leap_seconds + TOD_UNIX_EPOCH;
memcpy(clk, tod_clock_base, 16); memcpy(clk, tod_clock_base, STORE_CLOCK_EXT_SIZE);
*(__u64 *) &clk[1] -= delta; *(__u64 *)&clk[1] -= delta;
if (*(__u64 *) &clk[1] > delta) if (*(__u64 *)&clk[1] > delta)
clk[0]--; clk[0]--;
ext_to_timespec64(clk, ts); ext_to_timespec64(clk, &boot_time);
read_persistent_clock64(wall_time);
*boot_offset = timespec64_sub(*wall_time, boot_time);
} }
static u64 read_tod_clock(struct clocksource *cs) static u64 read_tod_clock(struct clocksource *cs)
......
...@@ -76,4 +76,17 @@ ...@@ -76,4 +76,17 @@
#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */ #define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */
/* Useful macros */
#define INTEL_CPU_FAM_ANY(_family, _model, _driver_data) \
{ \
.vendor = X86_VENDOR_INTEL, \
.family = _family, \
.model = _model, \
.feature = X86_FEATURE_ANY, \
.driver_data = (kernel_ulong_t)&_driver_data \
}
#define INTEL_CPU_FAM6(_model, _driver_data) \
INTEL_CPU_FAM_ANY(6, INTEL_FAM6_##_model, _driver_data)
#endif /* _ASM_X86_INTEL_FAMILY_H */ #endif /* _ASM_X86_INTEL_FAMILY_H */
...@@ -80,35 +80,6 @@ enum intel_mid_cpu_type { ...@@ -80,35 +80,6 @@ enum intel_mid_cpu_type {
extern enum intel_mid_cpu_type __intel_mid_cpu_chip; extern enum intel_mid_cpu_type __intel_mid_cpu_chip;
/**
* struct intel_mid_ops - Interface between intel-mid & sub archs
* @arch_setup: arch_setup function to re-initialize platform
* structures (x86_init, x86_platform_init)
*
* This structure can be extended if any new interface is required
* between intel-mid & its sub arch files.
*/
struct intel_mid_ops {
void (*arch_setup)(void);
};
/* Helper API's for INTEL_MID_OPS_INIT */
#define DECLARE_INTEL_MID_OPS_INIT(cpuname, cpuid) \
[cpuid] = get_##cpuname##_ops
/* Maximum number of CPU ops */
#define MAX_CPU_OPS(a) (sizeof(a)/sizeof(void *))
/*
* For every new cpu addition, a weak get_<cpuname>_ops() function needs be
* declared in arch/x86/platform/intel_mid/intel_mid_weak_decls.h.
*/
#define INTEL_MID_OPS_INIT { \
DECLARE_INTEL_MID_OPS_INIT(penwell, INTEL_MID_CPU_CHIP_PENWELL), \
DECLARE_INTEL_MID_OPS_INIT(cloverview, INTEL_MID_CPU_CHIP_CLOVERVIEW), \
DECLARE_INTEL_MID_OPS_INIT(tangier, INTEL_MID_CPU_CHIP_TANGIER) \
};
#ifdef CONFIG_X86_INTEL_MID #ifdef CONFIG_X86_INTEL_MID
static inline enum intel_mid_cpu_type intel_mid_identify_cpu(void) static inline enum intel_mid_cpu_type intel_mid_identify_cpu(void)
...@@ -136,20 +107,6 @@ enum intel_mid_timer_options { ...@@ -136,20 +107,6 @@ enum intel_mid_timer_options {
extern enum intel_mid_timer_options intel_mid_timer_options; extern enum intel_mid_timer_options intel_mid_timer_options;
/*
* Penwell uses spread spectrum clock, so the freq number is not exactly
* the same as reported by MSR based on SDM.
*/
#define FSB_FREQ_83SKU 83200
#define FSB_FREQ_100SKU 99840
#define FSB_FREQ_133SKU 133000
#define FSB_FREQ_167SKU 167000
#define FSB_FREQ_200SKU 200000
#define FSB_FREQ_267SKU 267000
#define FSB_FREQ_333SKU 333000
#define FSB_FREQ_400SKU 400000
/* Bus Select SoC Fuse value */ /* Bus Select SoC Fuse value */
#define BSEL_SOC_FUSE_MASK 0x7 #define BSEL_SOC_FUSE_MASK 0x7
/* FSB 133MHz */ /* FSB 133MHz */
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_KVM_GUEST_H
#define _ASM_X86_KVM_GUEST_H
int kvm_setup_vsyscall_timeinfo(void);
#endif /* _ASM_X86_KVM_GUEST_H */
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
#include <uapi/asm/kvm_para.h> #include <uapi/asm/kvm_para.h>
extern void kvmclock_init(void); extern void kvmclock_init(void);
extern int kvm_register_clock(char *txt);
#ifdef CONFIG_KVM_GUEST #ifdef CONFIG_KVM_GUEST
bool kvm_check_and_clear_guest_paused(void); bool kvm_check_and_clear_guest_paused(void);
......
...@@ -37,5 +37,6 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len); ...@@ -37,5 +37,6 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
extern void *text_poke(void *addr, const void *opcode, size_t len); extern void *text_poke(void *addr, const void *opcode, size_t len);
extern int poke_int3_handler(struct pt_regs *regs); extern int poke_int3_handler(struct pt_regs *regs);
extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
extern int after_bootmem;
#endif /* _ASM_X86_TEXT_PATCHING_H */ #endif /* _ASM_X86_TEXT_PATCHING_H */
...@@ -33,13 +33,13 @@ static inline cycles_t get_cycles(void) ...@@ -33,13 +33,13 @@ static inline cycles_t get_cycles(void)
extern struct system_counterval_t convert_art_to_tsc(u64 art); extern struct system_counterval_t convert_art_to_tsc(u64 art);
extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns); extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns);
extern void tsc_early_delay_calibrate(void); extern void tsc_early_init(void);
extern void tsc_init(void); extern void tsc_init(void);
extern void mark_tsc_unstable(char *reason); extern void mark_tsc_unstable(char *reason);
extern int unsynchronized_tsc(void); extern int unsynchronized_tsc(void);
extern int check_tsc_unstable(void); extern int check_tsc_unstable(void);
extern void mark_tsc_async_resets(char *reason); extern void mark_tsc_async_resets(char *reason);
extern unsigned long native_calibrate_cpu(void); extern unsigned long native_calibrate_cpu_early(void);
extern unsigned long native_calibrate_tsc(void); extern unsigned long native_calibrate_tsc(void);
extern unsigned long long native_sched_clock_from_tsc(u64 tsc); extern unsigned long long native_sched_clock_from_tsc(u64 tsc);
......
...@@ -668,6 +668,7 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode, ...@@ -668,6 +668,7 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode,
local_irq_save(flags); local_irq_save(flags);
memcpy(addr, opcode, len); memcpy(addr, opcode, len);
local_irq_restore(flags); local_irq_restore(flags);
sync_core();
/* Could also do a CLFLUSH here to speed up CPU recovery; but /* Could also do a CLFLUSH here to speed up CPU recovery; but
that causes hangs on some VIA CPUs. */ that causes hangs on some VIA CPUs. */
return addr; return addr;
...@@ -693,6 +694,12 @@ void *text_poke(void *addr, const void *opcode, size_t len) ...@@ -693,6 +694,12 @@ void *text_poke(void *addr, const void *opcode, size_t len)
struct page *pages[2]; struct page *pages[2];
int i; int i;
/*
* While boot memory allocator is runnig we cannot use struct
* pages as they are not yet initialized.
*/
BUG_ON(!after_bootmem);
if (!core_kernel_text((unsigned long)addr)) { if (!core_kernel_text((unsigned long)addr)) {
pages[0] = vmalloc_to_page(addr); pages[0] = vmalloc_to_page(addr);
pages[1] = vmalloc_to_page(addr + PAGE_SIZE); pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
......
...@@ -232,8 +232,6 @@ static void init_amd_k7(struct cpuinfo_x86 *c) ...@@ -232,8 +232,6 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
} }
} }
set_cpu_cap(c, X86_FEATURE_K7);
/* calling is from identify_secondary_cpu() ? */ /* calling is from identify_secondary_cpu() ? */
if (!c->cpu_index) if (!c->cpu_index)
return; return;
...@@ -617,6 +615,14 @@ static void early_init_amd(struct cpuinfo_x86 *c) ...@@ -617,6 +615,14 @@ static void early_init_amd(struct cpuinfo_x86 *c)
early_init_amd_mc(c); early_init_amd_mc(c);
#ifdef CONFIG_X86_32
if (c->x86 == 6)
set_cpu_cap(c, X86_FEATURE_K7);
#endif
if (c->x86 >= 0xf)
set_cpu_cap(c, X86_FEATURE_K8);
rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
/* /*
...@@ -863,9 +869,6 @@ static void init_amd(struct cpuinfo_x86 *c) ...@@ -863,9 +869,6 @@ static void init_amd(struct cpuinfo_x86 *c)
init_amd_cacheinfo(c); init_amd_cacheinfo(c);
if (c->x86 >= 0xf)
set_cpu_cap(c, X86_FEATURE_K8);
if (cpu_has(c, X86_FEATURE_XMM2)) { if (cpu_has(c, X86_FEATURE_XMM2)) {
unsigned long long val; unsigned long long val;
int ret; int ret;
......
...@@ -1018,6 +1018,24 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) ...@@ -1018,6 +1018,24 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
} }
/*
* The NOPL instruction is supposed to exist on all CPUs of family >= 6;
* unfortunately, that's not true in practice because of early VIA
* chips and (more importantly) broken virtualizers that are not easy
* to detect. In the latter case it doesn't even *fail* reliably, so
* probing for it doesn't even work. Disable it completely on 32-bit
* unless we can find a reliable way to detect all the broken cases.
* Enable it explicitly on 64-bit for non-constant inputs of cpu_has().
*/
static void detect_nopl(void)
{
#ifdef CONFIG_X86_32
setup_clear_cpu_cap(X86_FEATURE_NOPL);
#else
setup_force_cpu_cap(X86_FEATURE_NOPL);
#endif
}
/* /*
* Do minimum CPU detection early. * Do minimum CPU detection early.
* Fields really needed: vendor, cpuid_level, family, model, mask, * Fields really needed: vendor, cpuid_level, family, model, mask,
...@@ -1092,6 +1110,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) ...@@ -1092,6 +1110,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
*/ */
if (!pgtable_l5_enabled()) if (!pgtable_l5_enabled())
setup_clear_cpu_cap(X86_FEATURE_LA57); setup_clear_cpu_cap(X86_FEATURE_LA57);
detect_nopl();
} }
void __init early_cpu_init(void) void __init early_cpu_init(void)
...@@ -1127,24 +1147,6 @@ void __init early_cpu_init(void) ...@@ -1127,24 +1147,6 @@ void __init early_cpu_init(void)
early_identify_cpu(&boot_cpu_data); early_identify_cpu(&boot_cpu_data);
} }
/*
* The NOPL instruction is supposed to exist on all CPUs of family >= 6;
* unfortunately, that's not true in practice because of early VIA
* chips and (more importantly) broken virtualizers that are not easy
* to detect. In the latter case it doesn't even *fail* reliably, so
* probing for it doesn't even work. Disable it completely on 32-bit
* unless we can find a reliable way to detect all the broken cases.
* Enable it explicitly on 64-bit for non-constant inputs of cpu_has().
*/
static void detect_nopl(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_32
clear_cpu_cap(c, X86_FEATURE_NOPL);
#else
set_cpu_cap(c, X86_FEATURE_NOPL);
#endif
}
static void detect_null_seg_behavior(struct cpuinfo_x86 *c) static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
{ {
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
...@@ -1207,8 +1209,6 @@ static void generic_identify(struct cpuinfo_x86 *c) ...@@ -1207,8 +1209,6 @@ static void generic_identify(struct cpuinfo_x86 *c)
get_model_name(c); /* Default name */ get_model_name(c); /* Default name */
detect_nopl(c);
detect_null_seg_behavior(c); detect_null_seg_behavior(c);
/* /*
......
...@@ -37,15 +37,18 @@ static void bug_at(unsigned char *ip, int line) ...@@ -37,15 +37,18 @@ static void bug_at(unsigned char *ip, int line)
BUG(); BUG();
} }
static void __jump_label_transform(struct jump_entry *entry, static void __ref __jump_label_transform(struct jump_entry *entry,
enum jump_label_type type, enum jump_label_type type,
void *(*poker)(void *, const void *, size_t), void *(*poker)(void *, const void *, size_t),
int init) int init)
{ {
union jump_code_union code; union jump_code_union code;
const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
if (early_boot_irqs_disabled)
poker = text_poke_early;
if (type == JUMP_LABEL_JMP) { if (type == JUMP_LABEL_JMP) {
if (init) { if (init) {
/* /*
......
...@@ -45,7 +45,6 @@ ...@@ -45,7 +45,6 @@
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/apicdef.h> #include <asm/apicdef.h>
#include <asm/hypervisor.h> #include <asm/hypervisor.h>
#include <asm/kvm_guest.h>
static int kvmapf = 1; static int kvmapf = 1;
...@@ -66,15 +65,6 @@ static int __init parse_no_stealacc(char *arg) ...@@ -66,15 +65,6 @@ static int __init parse_no_stealacc(char *arg)
early_param("no-steal-acc", parse_no_stealacc); early_param("no-steal-acc", parse_no_stealacc);
static int kvmclock_vsyscall = 1;
static int __init parse_no_kvmclock_vsyscall(char *arg)
{
kvmclock_vsyscall = 0;
return 0;
}
early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
static DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64); static DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64);
static int has_steal_clock = 0; static int has_steal_clock = 0;
...@@ -560,9 +550,6 @@ static void __init kvm_guest_init(void) ...@@ -560,9 +550,6 @@ static void __init kvm_guest_init(void)
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
apic_set_eoi_write(kvm_guest_apic_eoi_write); apic_set_eoi_write(kvm_guest_apic_eoi_write);
if (kvmclock_vsyscall)
kvm_setup_vsyscall_timeinfo();
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus; smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
...@@ -628,6 +615,7 @@ const __initconst struct hypervisor_x86 x86_hyper_kvm = { ...@@ -628,6 +615,7 @@ const __initconst struct hypervisor_x86 x86_hyper_kvm = {
.name = "KVM", .name = "KVM",
.detect = kvm_detect, .detect = kvm_detect,
.type = X86_HYPER_KVM, .type = X86_HYPER_KVM,
.init.init_platform = kvmclock_init,
.init.guest_late_init = kvm_guest_init, .init.guest_late_init = kvm_guest_init,
.init.x2apic_available = kvm_para_available, .init.x2apic_available = kvm_para_available,
}; };
......
...@@ -23,30 +23,56 @@ ...@@ -23,30 +23,56 @@
#include <asm/apic.h> #include <asm/apic.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/hardirq.h> #include <linux/hardirq.h>
#include <linux/memblock.h> #include <linux/cpuhotplug.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/clock.h> #include <linux/sched/clock.h>
#include <linux/mm.h>
#include <asm/hypervisor.h>
#include <asm/mem_encrypt.h> #include <asm/mem_encrypt.h>
#include <asm/x86_init.h> #include <asm/x86_init.h>
#include <asm/reboot.h> #include <asm/reboot.h>
#include <asm/kvmclock.h> #include <asm/kvmclock.h>
static int kvmclock __ro_after_init = 1; static int kvmclock __initdata = 1;
static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; static int kvmclock_vsyscall __initdata = 1;
static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; static int msr_kvm_system_time __ro_after_init = MSR_KVM_SYSTEM_TIME;
static u64 kvm_sched_clock_offset; static int msr_kvm_wall_clock __ro_after_init = MSR_KVM_WALL_CLOCK;
static u64 kvm_sched_clock_offset __ro_after_init;
static int parse_no_kvmclock(char *arg) static int __init parse_no_kvmclock(char *arg)
{ {
kvmclock = 0; kvmclock = 0;
return 0; return 0;
} }
early_param("no-kvmclock", parse_no_kvmclock); early_param("no-kvmclock", parse_no_kvmclock);
/* The hypervisor will put information about time periodically here */ static int __init parse_no_kvmclock_vsyscall(char *arg)
static struct pvclock_vsyscall_time_info *hv_clock; {
static struct pvclock_wall_clock *wall_clock; kvmclock_vsyscall = 0;
return 0;
}
early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
/* Aligned to page sizes to match whats mapped via vsyscalls to userspace */
#define HV_CLOCK_SIZE (sizeof(struct pvclock_vsyscall_time_info) * NR_CPUS)
#define HVC_BOOT_ARRAY_SIZE \
(PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info))
static struct pvclock_vsyscall_time_info
hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __aligned(PAGE_SIZE);
static struct pvclock_wall_clock wall_clock;
static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
{
return &this_cpu_read(hv_clock_per_cpu)->pvti;
}
static inline struct pvclock_vsyscall_time_info *this_cpu_hvclock(void)
{
return this_cpu_read(hv_clock_per_cpu);
}
/* /*
* The wallclock is the time of day when we booted. Since then, some time may * The wallclock is the time of day when we booted. Since then, some time may
...@@ -55,21 +81,10 @@ static struct pvclock_wall_clock *wall_clock; ...@@ -55,21 +81,10 @@ static struct pvclock_wall_clock *wall_clock;
*/ */
static void kvm_get_wallclock(struct timespec64 *now) static void kvm_get_wallclock(struct timespec64 *now)
{ {
struct pvclock_vcpu_time_info *vcpu_time; wrmsrl(msr_kvm_wall_clock, slow_virt_to_phys(&wall_clock));
int low, high; preempt_disable();
int cpu; pvclock_read_wallclock(&wall_clock, this_cpu_pvti(), now);
preempt_enable();
low = (int)slow_virt_to_phys(wall_clock);
high = ((u64)slow_virt_to_phys(wall_clock) >> 32);
native_write_msr(msr_kvm_wall_clock, low, high);
cpu = get_cpu();
vcpu_time = &hv_clock[cpu].pvti;
pvclock_read_wallclock(wall_clock, vcpu_time, now);
put_cpu();
} }
static int kvm_set_wallclock(const struct timespec64 *now) static int kvm_set_wallclock(const struct timespec64 *now)
...@@ -79,14 +94,10 @@ static int kvm_set_wallclock(const struct timespec64 *now) ...@@ -79,14 +94,10 @@ static int kvm_set_wallclock(const struct timespec64 *now)
static u64 kvm_clock_read(void) static u64 kvm_clock_read(void)
{ {
struct pvclock_vcpu_time_info *src;
u64 ret; u64 ret;
int cpu;
preempt_disable_notrace(); preempt_disable_notrace();
cpu = smp_processor_id(); ret = pvclock_clocksource_read(this_cpu_pvti());
src = &hv_clock[cpu].pvti;
ret = pvclock_clocksource_read(src);
preempt_enable_notrace(); preempt_enable_notrace();
return ret; return ret;
} }
...@@ -112,11 +123,11 @@ static inline void kvm_sched_clock_init(bool stable) ...@@ -112,11 +123,11 @@ static inline void kvm_sched_clock_init(bool stable)
kvm_sched_clock_offset = kvm_clock_read(); kvm_sched_clock_offset = kvm_clock_read();
pv_time_ops.sched_clock = kvm_sched_clock_read; pv_time_ops.sched_clock = kvm_sched_clock_read;
printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n", pr_info("kvm-clock: using sched offset of %llu cycles",
kvm_sched_clock_offset); kvm_sched_clock_offset);
BUILD_BUG_ON(sizeof(kvm_sched_clock_offset) > BUILD_BUG_ON(sizeof(kvm_sched_clock_offset) >
sizeof(((struct pvclock_vcpu_time_info *)NULL)->system_time)); sizeof(((struct pvclock_vcpu_time_info *)NULL)->system_time));
} }
/* /*
...@@ -130,19 +141,11 @@ static inline void kvm_sched_clock_init(bool stable) ...@@ -130,19 +141,11 @@ static inline void kvm_sched_clock_init(bool stable)
*/ */
static unsigned long kvm_get_tsc_khz(void) static unsigned long kvm_get_tsc_khz(void)
{ {
struct pvclock_vcpu_time_info *src;
int cpu;
unsigned long tsc_khz;
cpu = get_cpu();
src = &hv_clock[cpu].pvti;
tsc_khz = pvclock_tsc_khz(src);
put_cpu();
setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
return tsc_khz; return pvclock_tsc_khz(this_cpu_pvti());
} }
static void kvm_get_preset_lpj(void) static void __init kvm_get_preset_lpj(void)
{ {
unsigned long khz; unsigned long khz;
u64 lpj; u64 lpj;
...@@ -156,49 +159,40 @@ static void kvm_get_preset_lpj(void) ...@@ -156,49 +159,40 @@ static void kvm_get_preset_lpj(void)
bool kvm_check_and_clear_guest_paused(void) bool kvm_check_and_clear_guest_paused(void)
{ {
struct pvclock_vsyscall_time_info *src = this_cpu_hvclock();
bool ret = false; bool ret = false;
struct pvclock_vcpu_time_info *src;
int cpu = smp_processor_id();
if (!hv_clock) if (!src)
return ret; return ret;
src = &hv_clock[cpu].pvti; if ((src->pvti.flags & PVCLOCK_GUEST_STOPPED) != 0) {
if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { src->pvti.flags &= ~PVCLOCK_GUEST_STOPPED;
src->flags &= ~PVCLOCK_GUEST_STOPPED;
pvclock_touch_watchdogs(); pvclock_touch_watchdogs();
ret = true; ret = true;
} }
return ret; return ret;
} }
struct clocksource kvm_clock = { struct clocksource kvm_clock = {
.name = "kvm-clock", .name = "kvm-clock",
.read = kvm_clock_get_cycles, .read = kvm_clock_get_cycles,
.rating = 400, .rating = 400,
.mask = CLOCKSOURCE_MASK(64), .mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS, .flags = CLOCK_SOURCE_IS_CONTINUOUS,
}; };
EXPORT_SYMBOL_GPL(kvm_clock); EXPORT_SYMBOL_GPL(kvm_clock);
int kvm_register_clock(char *txt) static void kvm_register_clock(char *txt)
{ {
int cpu = smp_processor_id(); struct pvclock_vsyscall_time_info *src = this_cpu_hvclock();
int low, high, ret; u64 pa;
struct pvclock_vcpu_time_info *src;
if (!hv_clock)
return 0;
src = &hv_clock[cpu].pvti; if (!src)
low = (int)slow_virt_to_phys(src) | 1; return;
high = ((u64)slow_virt_to_phys(src) >> 32);
ret = native_write_msr_safe(msr_kvm_system_time, low, high);
printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
cpu, high, low, txt);
return ret; pa = slow_virt_to_phys(&src->pvti) | 0x01ULL;
wrmsrl(msr_kvm_system_time, pa);
pr_info("kvm-clock: cpu %d, msr %llx, %s", smp_processor_id(), pa, txt);
} }
static void kvm_save_sched_clock_state(void) static void kvm_save_sched_clock_state(void)
...@@ -213,11 +207,7 @@ static void kvm_restore_sched_clock_state(void) ...@@ -213,11 +207,7 @@ static void kvm_restore_sched_clock_state(void)
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
static void kvm_setup_secondary_clock(void) static void kvm_setup_secondary_clock(void)
{ {
/* kvm_register_clock("secondary cpu clock");
* Now that the first cpu already had this clocksource initialized,
* we shouldn't fail.
*/
WARN_ON(kvm_register_clock("secondary cpu clock"));
} }
#endif #endif
...@@ -245,100 +235,84 @@ static void kvm_shutdown(void) ...@@ -245,100 +235,84 @@ static void kvm_shutdown(void)
native_machine_shutdown(); native_machine_shutdown();
} }
static phys_addr_t __init kvm_memblock_alloc(phys_addr_t size, static int __init kvm_setup_vsyscall_timeinfo(void)
phys_addr_t align)
{ {
phys_addr_t mem; #ifdef CONFIG_X86_64
u8 flags;
mem = memblock_alloc(size, align); if (!per_cpu(hv_clock_per_cpu, 0) || !kvmclock_vsyscall)
if (!mem)
return 0; return 0;
if (sev_active()) { flags = pvclock_read_flags(&hv_clock_boot[0].pvti);
if (early_set_memory_decrypted((unsigned long)__va(mem), size)) if (!(flags & PVCLOCK_TSC_STABLE_BIT))
goto e_free; return 0;
}
return mem; kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
e_free: #endif
memblock_free(mem, size);
return 0; return 0;
} }
early_initcall(kvm_setup_vsyscall_timeinfo);
static void __init kvm_memblock_free(phys_addr_t addr, phys_addr_t size) static int kvmclock_setup_percpu(unsigned int cpu)
{ {
if (sev_active()) struct pvclock_vsyscall_time_info *p = per_cpu(hv_clock_per_cpu, cpu);
early_set_memory_encrypted((unsigned long)__va(addr), size);
memblock_free(addr, size); /*
* The per cpu area setup replicates CPU0 data to all cpu
* pointers. So carefully check. CPU0 has been set up in init
* already.
*/
if (!cpu || (p && p != per_cpu(hv_clock_per_cpu, 0)))
return 0;
/* Use the static page for the first CPUs, allocate otherwise */
if (cpu < HVC_BOOT_ARRAY_SIZE)
p = &hv_clock_boot[cpu];
else
p = kzalloc(sizeof(*p), GFP_KERNEL);
per_cpu(hv_clock_per_cpu, cpu) = p;
return p ? 0 : -ENOMEM;
} }
void __init kvmclock_init(void) void __init kvmclock_init(void)
{ {
struct pvclock_vcpu_time_info *vcpu_time;
unsigned long mem, mem_wall_clock;
int size, cpu, wall_clock_size;
u8 flags; u8 flags;
size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); if (!kvm_para_available() || !kvmclock)
if (!kvm_para_available())
return; return;
if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2)) { if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2)) {
msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW; msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW;
msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW; msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW;
} else if (!(kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE))) } else if (!kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
return;
wall_clock_size = PAGE_ALIGN(sizeof(struct pvclock_wall_clock));
mem_wall_clock = kvm_memblock_alloc(wall_clock_size, PAGE_SIZE);
if (!mem_wall_clock)
return;
wall_clock = __va(mem_wall_clock);
memset(wall_clock, 0, wall_clock_size);
mem = kvm_memblock_alloc(size, PAGE_SIZE);
if (!mem) {
kvm_memblock_free(mem_wall_clock, wall_clock_size);
wall_clock = NULL;
return; return;
} }
hv_clock = __va(mem); if (cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "kvmclock:setup_percpu",
memset(hv_clock, 0, size); kvmclock_setup_percpu, NULL) < 0) {
if (kvm_register_clock("primary cpu clock")) {
hv_clock = NULL;
kvm_memblock_free(mem, size);
kvm_memblock_free(mem_wall_clock, wall_clock_size);
wall_clock = NULL;
return; return;
} }
printk(KERN_INFO "kvm-clock: Using msrs %x and %x", pr_info("kvm-clock: Using msrs %x and %x",
msr_kvm_system_time, msr_kvm_wall_clock); msr_kvm_system_time, msr_kvm_wall_clock);
pvclock_set_pvti_cpu0_va(hv_clock); this_cpu_write(hv_clock_per_cpu, &hv_clock_boot[0]);
kvm_register_clock("primary cpu clock");
pvclock_set_pvti_cpu0_va(hv_clock_boot);
if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
cpu = get_cpu(); flags = pvclock_read_flags(&hv_clock_boot[0].pvti);
vcpu_time = &hv_clock[cpu].pvti;
flags = pvclock_read_flags(vcpu_time);
kvm_sched_clock_init(flags & PVCLOCK_TSC_STABLE_BIT); kvm_sched_clock_init(flags & PVCLOCK_TSC_STABLE_BIT);
put_cpu();
x86_platform.calibrate_tsc = kvm_get_tsc_khz; x86_platform.calibrate_tsc = kvm_get_tsc_khz;
x86_platform.calibrate_cpu = kvm_get_tsc_khz; x86_platform.calibrate_cpu = kvm_get_tsc_khz;
x86_platform.get_wallclock = kvm_get_wallclock; x86_platform.get_wallclock = kvm_get_wallclock;
x86_platform.set_wallclock = kvm_set_wallclock; x86_platform.set_wallclock = kvm_set_wallclock;
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
x86_cpuinit.early_percpu_clock_init = x86_cpuinit.early_percpu_clock_init = kvm_setup_secondary_clock;
kvm_setup_secondary_clock;
#endif #endif
x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
...@@ -350,31 +324,3 @@ void __init kvmclock_init(void) ...@@ -350,31 +324,3 @@ void __init kvmclock_init(void)
clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
pv_info.name = "KVM"; pv_info.name = "KVM";
} }
int __init kvm_setup_vsyscall_timeinfo(void)
{
#ifdef CONFIG_X86_64
int cpu;
u8 flags;
struct pvclock_vcpu_time_info *vcpu_time;
unsigned int size;
if (!hv_clock)
return 0;
size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
cpu = get_cpu();
vcpu_time = &hv_clock[cpu].pvti;
flags = pvclock_read_flags(vcpu_time);
put_cpu();
if (!(flags & PVCLOCK_TSC_STABLE_BIT))
return 1;
kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
#endif
return 0;
}
...@@ -866,6 +866,8 @@ void __init setup_arch(char **cmdline_p) ...@@ -866,6 +866,8 @@ void __init setup_arch(char **cmdline_p)
idt_setup_early_traps(); idt_setup_early_traps();
early_cpu_init(); early_cpu_init();
arch_init_ideal_nops();
jump_label_init();
early_ioremap_init(); early_ioremap_init();
setup_olpc_ofw_pgd(); setup_olpc_ofw_pgd();
...@@ -1012,6 +1014,7 @@ void __init setup_arch(char **cmdline_p) ...@@ -1012,6 +1014,7 @@ void __init setup_arch(char **cmdline_p)
*/ */
init_hypervisor_platform(); init_hypervisor_platform();
tsc_early_init();
x86_init.resources.probe_roms(); x86_init.resources.probe_roms();
/* after parse_early_param, so could debug it */ /* after parse_early_param, so could debug it */
...@@ -1197,11 +1200,6 @@ void __init setup_arch(char **cmdline_p) ...@@ -1197,11 +1200,6 @@ void __init setup_arch(char **cmdline_p)
memblock_find_dma_reserve(); memblock_find_dma_reserve();
#ifdef CONFIG_KVM_GUEST
kvmclock_init();
#endif
tsc_early_delay_calibrate();
if (!early_xdbc_setup_hardware()) if (!early_xdbc_setup_hardware())
early_xdbc_register_console(); early_xdbc_register_console();
...@@ -1272,8 +1270,6 @@ void __init setup_arch(char **cmdline_p) ...@@ -1272,8 +1270,6 @@ void __init setup_arch(char **cmdline_p)
mcheck_init(); mcheck_init();
arch_init_ideal_nops();
register_refined_jiffies(CLOCK_TICK_RATE); register_refined_jiffies(CLOCK_TICK_RATE);
#ifdef CONFIG_EFI #ifdef CONFIG_EFI
......
...@@ -33,16 +33,13 @@ EXPORT_SYMBOL(cpu_khz); ...@@ -33,16 +33,13 @@ EXPORT_SYMBOL(cpu_khz);
unsigned int __read_mostly tsc_khz; unsigned int __read_mostly tsc_khz;
EXPORT_SYMBOL(tsc_khz); EXPORT_SYMBOL(tsc_khz);
#define KHZ 1000
/* /*
* TSC can be unstable due to cpufreq or due to unsynced TSCs * TSC can be unstable due to cpufreq or due to unsynced TSCs
*/ */
static int __read_mostly tsc_unstable; static int __read_mostly tsc_unstable;
/* native_sched_clock() is called before tsc_init(), so
we must start with the TSC soft disabled to prevent
erroneous rdtsc usage on !boot_cpu_has(X86_FEATURE_TSC) processors */
static int __read_mostly tsc_disabled = -1;
static DEFINE_STATIC_KEY_FALSE(__use_tsc); static DEFINE_STATIC_KEY_FALSE(__use_tsc);
int tsc_clocksource_reliable; int tsc_clocksource_reliable;
...@@ -106,23 +103,6 @@ void cyc2ns_read_end(void) ...@@ -106,23 +103,6 @@ void cyc2ns_read_end(void)
* -johnstul@us.ibm.com "math is hard, lets go shopping!" * -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/ */
static void cyc2ns_data_init(struct cyc2ns_data *data)
{
data->cyc2ns_mul = 0;
data->cyc2ns_shift = 0;
data->cyc2ns_offset = 0;
}
static void __init cyc2ns_init(int cpu)
{
struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
cyc2ns_data_init(&c2n->data[0]);
cyc2ns_data_init(&c2n->data[1]);
seqcount_init(&c2n->seq);
}
static inline unsigned long long cycles_2_ns(unsigned long long cyc) static inline unsigned long long cycles_2_ns(unsigned long long cyc)
{ {
struct cyc2ns_data data; struct cyc2ns_data data;
...@@ -138,18 +118,11 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc) ...@@ -138,18 +118,11 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
return ns; return ns;
} }
static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now) static void __set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
{ {
unsigned long long ns_now; unsigned long long ns_now;
struct cyc2ns_data data; struct cyc2ns_data data;
struct cyc2ns *c2n; struct cyc2ns *c2n;
unsigned long flags;
local_irq_save(flags);
sched_clock_idle_sleep_event();
if (!khz)
goto done;
ns_now = cycles_2_ns(tsc_now); ns_now = cycles_2_ns(tsc_now);
...@@ -181,12 +154,55 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_ ...@@ -181,12 +154,55 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_
c2n->data[0] = data; c2n->data[0] = data;
raw_write_seqcount_latch(&c2n->seq); raw_write_seqcount_latch(&c2n->seq);
c2n->data[1] = data; c2n->data[1] = data;
}
static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
{
unsigned long flags;
local_irq_save(flags);
sched_clock_idle_sleep_event();
if (khz)
__set_cyc2ns_scale(khz, cpu, tsc_now);
done:
sched_clock_idle_wakeup_event(); sched_clock_idle_wakeup_event();
local_irq_restore(flags); local_irq_restore(flags);
} }
/*
* Initialize cyc2ns for boot cpu
*/
static void __init cyc2ns_init_boot_cpu(void)
{
struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);
seqcount_init(&c2n->seq);
__set_cyc2ns_scale(tsc_khz, smp_processor_id(), rdtsc());
}
/*
* Secondary CPUs do not run through tsc_init(), so set up
* all the scale factors for all CPUs, assuming the same
* speed as the bootup CPU. (cpufreq notifiers will fix this
* up if their speed diverges)
*/
static void __init cyc2ns_init_secondary_cpus(void)
{
unsigned int cpu, this_cpu = smp_processor_id();
struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);
struct cyc2ns_data *data = c2n->data;
for_each_possible_cpu(cpu) {
if (cpu != this_cpu) {
seqcount_init(&c2n->seq);
c2n = per_cpu_ptr(&cyc2ns, cpu);
c2n->data[0] = data[0];
c2n->data[1] = data[1];
}
}
}
/* /*
* Scheduler clock - returns current time in nanosec units. * Scheduler clock - returns current time in nanosec units.
*/ */
...@@ -248,8 +264,7 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable); ...@@ -248,8 +264,7 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable);
#ifdef CONFIG_X86_TSC #ifdef CONFIG_X86_TSC
int __init notsc_setup(char *str) int __init notsc_setup(char *str)
{ {
pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n"); mark_tsc_unstable("boot parameter notsc");
tsc_disabled = 1;
return 1; return 1;
} }
#else #else
...@@ -665,30 +680,17 @@ static unsigned long cpu_khz_from_cpuid(void) ...@@ -665,30 +680,17 @@ static unsigned long cpu_khz_from_cpuid(void)
return eax_base_mhz * 1000; return eax_base_mhz * 1000;
} }
/** /*
* native_calibrate_cpu - calibrate the cpu on boot * calibrate cpu using pit, hpet, and ptimer methods. They are available
* later in boot after acpi is initialized.
*/ */
unsigned long native_calibrate_cpu(void) static unsigned long pit_hpet_ptimer_calibrate_cpu(void)
{ {
u64 tsc1, tsc2, delta, ref1, ref2; u64 tsc1, tsc2, delta, ref1, ref2;
unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
unsigned long flags, latch, ms, fast_calibrate; unsigned long flags, latch, ms;
int hpet = is_hpet_enabled(), i, loopmin; int hpet = is_hpet_enabled(), i, loopmin;
fast_calibrate = cpu_khz_from_cpuid();
if (fast_calibrate)
return fast_calibrate;
fast_calibrate = cpu_khz_from_msr();
if (fast_calibrate)
return fast_calibrate;
local_irq_save(flags);
fast_calibrate = quick_pit_calibrate();
local_irq_restore(flags);
if (fast_calibrate)
return fast_calibrate;
/* /*
* Run 5 calibration loops to get the lowest frequency value * Run 5 calibration loops to get the lowest frequency value
* (the best estimate). We use two different calibration modes * (the best estimate). We use two different calibration modes
...@@ -831,6 +833,37 @@ unsigned long native_calibrate_cpu(void) ...@@ -831,6 +833,37 @@ unsigned long native_calibrate_cpu(void)
return tsc_pit_min; return tsc_pit_min;
} }
/**
* native_calibrate_cpu_early - can calibrate the cpu early in boot
*/
unsigned long native_calibrate_cpu_early(void)
{
unsigned long flags, fast_calibrate = cpu_khz_from_cpuid();
if (!fast_calibrate)
fast_calibrate = cpu_khz_from_msr();
if (!fast_calibrate) {
local_irq_save(flags);
fast_calibrate = quick_pit_calibrate();
local_irq_restore(flags);
}
return fast_calibrate;
}
/**
* native_calibrate_cpu - calibrate the cpu
*/
static unsigned long native_calibrate_cpu(void)
{
unsigned long tsc_freq = native_calibrate_cpu_early();
if (!tsc_freq)
tsc_freq = pit_hpet_ptimer_calibrate_cpu();
return tsc_freq;
}
void recalibrate_cpu_khz(void) void recalibrate_cpu_khz(void)
{ {
#ifndef CONFIG_SMP #ifndef CONFIG_SMP
...@@ -1307,7 +1340,7 @@ static void tsc_refine_calibration_work(struct work_struct *work) ...@@ -1307,7 +1340,7 @@ static void tsc_refine_calibration_work(struct work_struct *work)
static int __init init_tsc_clocksource(void) static int __init init_tsc_clocksource(void)
{ {
if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz) if (!boot_cpu_has(X86_FEATURE_TSC) || !tsc_khz)
return 0; return 0;
if (tsc_unstable) if (tsc_unstable)
...@@ -1341,40 +1374,22 @@ static int __init init_tsc_clocksource(void) ...@@ -1341,40 +1374,22 @@ static int __init init_tsc_clocksource(void)
*/ */
device_initcall(init_tsc_clocksource); device_initcall(init_tsc_clocksource);
void __init tsc_early_delay_calibrate(void) static bool __init determine_cpu_tsc_frequencies(bool early)
{ {
unsigned long lpj; /* Make sure that cpu and tsc are not already calibrated */
WARN_ON(cpu_khz || tsc_khz);
if (!boot_cpu_has(X86_FEATURE_TSC))
return; if (early) {
cpu_khz = x86_platform.calibrate_cpu();
cpu_khz = x86_platform.calibrate_cpu(); tsc_khz = x86_platform.calibrate_tsc();
tsc_khz = x86_platform.calibrate_tsc(); } else {
/* We should not be here with non-native cpu calibration */
tsc_khz = tsc_khz ? : cpu_khz; WARN_ON(x86_platform.calibrate_cpu != native_calibrate_cpu);
if (!tsc_khz) cpu_khz = pit_hpet_ptimer_calibrate_cpu();
return;
lpj = tsc_khz * 1000;
do_div(lpj, HZ);
loops_per_jiffy = lpj;
}
void __init tsc_init(void)
{
u64 lpj, cyc;
int cpu;
if (!boot_cpu_has(X86_FEATURE_TSC)) {
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
return;
} }
cpu_khz = x86_platform.calibrate_cpu();
tsc_khz = x86_platform.calibrate_tsc();
/* /*
* Trust non-zero tsc_khz as authorative, * Trust non-zero tsc_khz as authoritative,
* and use it to sanity check cpu_khz, * and use it to sanity check cpu_khz,
* which will be off if system timer is off. * which will be off if system timer is off.
*/ */
...@@ -1383,52 +1398,78 @@ void __init tsc_init(void) ...@@ -1383,52 +1398,78 @@ void __init tsc_init(void)
else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz) else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
cpu_khz = tsc_khz; cpu_khz = tsc_khz;
if (!tsc_khz) { if (tsc_khz == 0)
mark_tsc_unstable("could not calculate TSC khz"); return false;
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
return;
}
pr_info("Detected %lu.%03lu MHz processor\n", pr_info("Detected %lu.%03lu MHz processor\n",
(unsigned long)cpu_khz / 1000, (unsigned long)cpu_khz / KHZ,
(unsigned long)cpu_khz % 1000); (unsigned long)cpu_khz % KHZ);
if (cpu_khz != tsc_khz) { if (cpu_khz != tsc_khz) {
pr_info("Detected %lu.%03lu MHz TSC", pr_info("Detected %lu.%03lu MHz TSC",
(unsigned long)tsc_khz / 1000, (unsigned long)tsc_khz / KHZ,
(unsigned long)tsc_khz % 1000); (unsigned long)tsc_khz % KHZ);
} }
return true;
}
static unsigned long __init get_loops_per_jiffy(void)
{
unsigned long lpj = tsc_khz * KHZ;
do_div(lpj, HZ);
return lpj;
}
static void __init tsc_enable_sched_clock(void)
{
/* Sanitize TSC ADJUST before cyc2ns gets initialized */ /* Sanitize TSC ADJUST before cyc2ns gets initialized */
tsc_store_and_check_tsc_adjust(true); tsc_store_and_check_tsc_adjust(true);
cyc2ns_init_boot_cpu();
static_branch_enable(&__use_tsc);
}
void __init tsc_early_init(void)
{
if (!boot_cpu_has(X86_FEATURE_TSC))
return;
if (!determine_cpu_tsc_frequencies(true))
return;
loops_per_jiffy = get_loops_per_jiffy();
tsc_enable_sched_clock();
}
void __init tsc_init(void)
{
/* /*
* Secondary CPUs do not run through tsc_init(), so set up * native_calibrate_cpu_early can only calibrate using methods that are
* all the scale factors for all CPUs, assuming the same * available early in boot.
* speed as the bootup CPU. (cpufreq notifiers will fix this
* up if their speed diverges)
*/ */
cyc = rdtsc(); if (x86_platform.calibrate_cpu == native_calibrate_cpu_early)
for_each_possible_cpu(cpu) { x86_platform.calibrate_cpu = native_calibrate_cpu;
cyc2ns_init(cpu);
set_cyc2ns_scale(tsc_khz, cpu, cyc);
}
if (tsc_disabled > 0) if (!boot_cpu_has(X86_FEATURE_TSC)) {
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
return; return;
}
/* now allow native_sched_clock() to use rdtsc */ if (!tsc_khz) {
/* We failed to determine frequencies earlier, try again */
if (!determine_cpu_tsc_frequencies(false)) {
mark_tsc_unstable("could not calculate TSC khz");
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
return;
}
tsc_enable_sched_clock();
}
tsc_disabled = 0; cyc2ns_init_secondary_cpus();
static_branch_enable(&__use_tsc);
if (!no_sched_irq_time) if (!no_sched_irq_time)
enable_sched_clock_irqtime(); enable_sched_clock_irqtime();
lpj = ((u64)tsc_khz * 1000); lpj_fine = get_loops_per_jiffy();
do_div(lpj, HZ);
lpj_fine = lpj;
use_tsc_delay(); use_tsc_delay();
check_system_tsc_reliable(); check_system_tsc_reliable();
...@@ -1455,7 +1496,7 @@ unsigned long calibrate_delay_is_known(void) ...@@ -1455,7 +1496,7 @@ unsigned long calibrate_delay_is_known(void)
int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC); int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC);
const struct cpumask *mask = topology_core_cpumask(cpu); const struct cpumask *mask = topology_core_cpumask(cpu);
if (tsc_disabled || !constant_tsc || !mask) if (!constant_tsc || !mask)
return 0; return 0;
sibling = cpumask_any_but(mask, cpu); sibling = cpumask_any_but(mask, cpu);
......
// SPDX-License-Identifier: GPL-2.0
/* /*
* tsc_msr.c - TSC frequency enumeration via MSR * TSC frequency enumeration via MSR
* *
* Copyright (C) 2013 Intel Corporation * Copyright (C) 2013, 2018 Intel Corporation
* Author: Bin Gao <bin.gao@intel.com> * Author: Bin Gao <bin.gao@intel.com>
*
* This file is released under the GPLv2.
*/ */
#include <linux/kernel.h> #include <linux/kernel.h>
#include <asm/processor.h>
#include <asm/setup.h>
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include <asm/msr.h>
#include <asm/param.h> #include <asm/param.h>
#include <asm/tsc.h>
#define MAX_NUM_FREQS 9 #define MAX_NUM_FREQS 9
...@@ -23,44 +25,48 @@ ...@@ -23,44 +25,48 @@
* field msr_plat does. * field msr_plat does.
*/ */
struct freq_desc { struct freq_desc {
u8 x86_family; /* CPU family */
u8 x86_model; /* model */
u8 msr_plat; /* 1: use MSR_PLATFORM_INFO, 0: MSR_IA32_PERF_STATUS */ u8 msr_plat; /* 1: use MSR_PLATFORM_INFO, 0: MSR_IA32_PERF_STATUS */
u32 freqs[MAX_NUM_FREQS]; u32 freqs[MAX_NUM_FREQS];
}; };
static struct freq_desc freq_desc_tables[] = { /*
/* PNW */ * Penwell and Clovertrail use spread spectrum clock,
{ 6, 0x27, 0, { 0, 0, 0, 0, 0, 99840, 0, 83200 } }, * so the freq number is not exactly the same as reported
/* CLV+ */ * by MSR based on SDM.
{ 6, 0x35, 0, { 0, 133200, 0, 0, 0, 99840, 0, 83200 } }, */
/* TNG - Intel Atom processor Z3400 series */ static const struct freq_desc freq_desc_pnw = {
{ 6, 0x4a, 1, { 0, 100000, 133300, 0, 0, 0, 0, 0 } }, 0, { 0, 0, 0, 0, 0, 99840, 0, 83200 }
/* VLV2 - Intel Atom processor E3000, Z3600, Z3700 series */
{ 6, 0x37, 1, { 83300, 100000, 133300, 116700, 80000, 0, 0, 0 } },
/* ANN - Intel Atom processor Z3500 series */
{ 6, 0x5a, 1, { 83300, 100000, 133300, 100000, 0, 0, 0, 0 } },
/* AMT - Intel Atom processor X7-Z8000 and X5-Z8000 series */
{ 6, 0x4c, 1, { 83300, 100000, 133300, 116700,
80000, 93300, 90000, 88900, 87500 } },
}; };
static int match_cpu(u8 family, u8 model) static const struct freq_desc freq_desc_clv = {
{ 0, { 0, 133200, 0, 0, 0, 99840, 0, 83200 }
int i; };
for (i = 0; i < ARRAY_SIZE(freq_desc_tables); i++) { static const struct freq_desc freq_desc_byt = {
if ((family == freq_desc_tables[i].x86_family) && 1, { 83300, 100000, 133300, 116700, 80000, 0, 0, 0 }
(model == freq_desc_tables[i].x86_model)) };
return i;
}
return -1; static const struct freq_desc freq_desc_cht = {
} 1, { 83300, 100000, 133300, 116700, 80000, 93300, 90000, 88900, 87500 }
};
/* Map CPU reference clock freq ID(0-7) to CPU reference clock freq(KHz) */ static const struct freq_desc freq_desc_tng = {
#define id_to_freq(cpu_index, freq_id) \ 1, { 0, 100000, 133300, 0, 0, 0, 0, 0 }
(freq_desc_tables[cpu_index].freqs[freq_id]) };
static const struct freq_desc freq_desc_ann = {
1, { 83300, 100000, 133300, 100000, 0, 0, 0, 0 }
};
static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
INTEL_CPU_FAM6(ATOM_PENWELL, freq_desc_pnw),
INTEL_CPU_FAM6(ATOM_CLOVERVIEW, freq_desc_clv),
INTEL_CPU_FAM6(ATOM_SILVERMONT1, freq_desc_byt),
INTEL_CPU_FAM6(ATOM_AIRMONT, freq_desc_cht),
INTEL_CPU_FAM6(ATOM_MERRIFIELD, freq_desc_tng),
INTEL_CPU_FAM6(ATOM_MOOREFIELD, freq_desc_ann),
{}
};
/* /*
* MSR-based CPU/TSC frequency discovery for certain CPUs. * MSR-based CPU/TSC frequency discovery for certain CPUs.
...@@ -70,18 +76,17 @@ static int match_cpu(u8 family, u8 model) ...@@ -70,18 +76,17 @@ static int match_cpu(u8 family, u8 model)
*/ */
unsigned long cpu_khz_from_msr(void) unsigned long cpu_khz_from_msr(void)
{ {
u32 lo, hi, ratio, freq_id, freq; u32 lo, hi, ratio, freq;
const struct freq_desc *freq_desc;
const struct x86_cpu_id *id;
unsigned long res; unsigned long res;
int cpu_index;
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return 0;
cpu_index = match_cpu(boot_cpu_data.x86, boot_cpu_data.x86_model); id = x86_match_cpu(tsc_msr_cpu_ids);
if (cpu_index < 0) if (!id)
return 0; return 0;
if (freq_desc_tables[cpu_index].msr_plat) { freq_desc = (struct freq_desc *)id->driver_data;
if (freq_desc->msr_plat) {
rdmsr(MSR_PLATFORM_INFO, lo, hi); rdmsr(MSR_PLATFORM_INFO, lo, hi);
ratio = (lo >> 8) & 0xff; ratio = (lo >> 8) & 0xff;
} else { } else {
...@@ -91,8 +96,9 @@ unsigned long cpu_khz_from_msr(void) ...@@ -91,8 +96,9 @@ unsigned long cpu_khz_from_msr(void)
/* Get FSB FREQ ID */ /* Get FSB FREQ ID */
rdmsr(MSR_FSB_FREQ, lo, hi); rdmsr(MSR_FSB_FREQ, lo, hi);
freq_id = lo & 0x7;
freq = id_to_freq(cpu_index, freq_id); /* Map CPU reference clock freq ID(0-7) to CPU reference clock freq(KHz) */
freq = freq_desc->freqs[lo & 0x7];
/* TSC frequency = maximum resolved freq * maximum resolved bus ratio */ /* TSC frequency = maximum resolved freq * maximum resolved bus ratio */
res = freq * ratio; res = freq * ratio;
......
...@@ -109,7 +109,7 @@ struct x86_cpuinit_ops x86_cpuinit = { ...@@ -109,7 +109,7 @@ struct x86_cpuinit_ops x86_cpuinit = {
static void default_nmi_init(void) { }; static void default_nmi_init(void) { };
struct x86_platform_ops x86_platform __ro_after_init = { struct x86_platform_ops x86_platform __ro_after_init = {
.calibrate_cpu = native_calibrate_cpu, .calibrate_cpu = native_calibrate_cpu_early,
.calibrate_tsc = native_calibrate_tsc, .calibrate_tsc = native_calibrate_tsc,
.get_wallclock = mach_get_cmos_time, .get_wallclock = mach_get_cmos_time,
.set_wallclock = mach_set_rtc_mmss, .set_wallclock = mach_set_rtc_mmss,
......
obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfld.o pwr.o obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o pwr.o
# SFI specific code # SFI specific code
ifdef CONFIG_X86_INTEL_MID ifdef CONFIG_X86_INTEL_MID
......
...@@ -36,8 +36,6 @@ ...@@ -36,8 +36,6 @@
#include <asm/apb_timer.h> #include <asm/apb_timer.h>
#include <asm/reboot.h> #include <asm/reboot.h>
#include "intel_mid_weak_decls.h"
/* /*
* the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
* cmdline option x86_intel_mid_timer can be used to override the configuration * cmdline option x86_intel_mid_timer can be used to override the configuration
...@@ -61,10 +59,6 @@ ...@@ -61,10 +59,6 @@
enum intel_mid_timer_options intel_mid_timer_options; enum intel_mid_timer_options intel_mid_timer_options;
/* intel_mid_ops to store sub arch ops */
static struct intel_mid_ops *intel_mid_ops;
/* getter function for sub arch ops*/
static void *(*get_intel_mid_ops[])(void) = INTEL_MID_OPS_INIT;
enum intel_mid_cpu_type __intel_mid_cpu_chip; enum intel_mid_cpu_type __intel_mid_cpu_chip;
EXPORT_SYMBOL_GPL(__intel_mid_cpu_chip); EXPORT_SYMBOL_GPL(__intel_mid_cpu_chip);
...@@ -82,11 +76,6 @@ static void intel_mid_reboot(void) ...@@ -82,11 +76,6 @@ static void intel_mid_reboot(void)
intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0); intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0);
} }
static unsigned long __init intel_mid_calibrate_tsc(void)
{
return 0;
}
static void __init intel_mid_setup_bp_timer(void) static void __init intel_mid_setup_bp_timer(void)
{ {
apbt_time_init(); apbt_time_init();
...@@ -133,6 +122,7 @@ static void intel_mid_arch_setup(void) ...@@ -133,6 +122,7 @@ static void intel_mid_arch_setup(void)
case 0x3C: case 0x3C:
case 0x4A: case 0x4A:
__intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_TANGIER; __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_TANGIER;
x86_platform.legacy.rtc = 1;
break; break;
case 0x27: case 0x27:
default: default:
...@@ -140,17 +130,7 @@ static void intel_mid_arch_setup(void) ...@@ -140,17 +130,7 @@ static void intel_mid_arch_setup(void)
break; break;
} }
if (__intel_mid_cpu_chip < MAX_CPU_OPS(get_intel_mid_ops))
intel_mid_ops = get_intel_mid_ops[__intel_mid_cpu_chip]();
else {
intel_mid_ops = get_intel_mid_ops[INTEL_MID_CPU_CHIP_PENWELL]();
pr_info("ARCH: Unknown SoC, assuming Penwell!\n");
}
out: out:
if (intel_mid_ops->arch_setup)
intel_mid_ops->arch_setup();
/* /*
* Intel MID platforms are using explicitly defined regulators. * Intel MID platforms are using explicitly defined regulators.
* *
...@@ -191,7 +171,6 @@ void __init x86_intel_mid_early_setup(void) ...@@ -191,7 +171,6 @@ void __init x86_intel_mid_early_setup(void)
x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock; x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock;
x86_platform.calibrate_tsc = intel_mid_calibrate_tsc;
x86_platform.get_nmi_reason = intel_mid_get_nmi_reason; x86_platform.get_nmi_reason = intel_mid_get_nmi_reason;
x86_init.pci.arch_init = intel_mid_pci_init; x86_init.pci.arch_init = intel_mid_pci_init;
......
/*
* intel_mid_weak_decls.h: Weak declarations of intel-mid.c
*
* (C) Copyright 2013 Intel Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*/
/* For every CPU addition a new get_<cpuname>_ops interface needs
* to be added.
*/
extern void *get_penwell_ops(void);
extern void *get_cloverview_ops(void);
extern void *get_tangier_ops(void);
/*
* mfld.c: Intel Medfield platform setup code
*
* (C) Copyright 2013 Intel Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*/
#include <linux/init.h>
#include <asm/apic.h>
#include <asm/intel-mid.h>
#include <asm/intel_mid_vrtc.h>
#include "intel_mid_weak_decls.h"
static unsigned long __init mfld_calibrate_tsc(void)
{
unsigned long fast_calibrate;
u32 lo, hi, ratio, fsb;
rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi);
ratio = (hi >> 8) & 0x1f;
pr_debug("ratio is %d\n", ratio);
if (!ratio) {
pr_err("read a zero ratio, should be incorrect!\n");
pr_err("force tsc ratio to 16 ...\n");
ratio = 16;
}
rdmsr(MSR_FSB_FREQ, lo, hi);
if ((lo & 0x7) == 0x7)
fsb = FSB_FREQ_83SKU;
else
fsb = FSB_FREQ_100SKU;
fast_calibrate = ratio * fsb;
pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
lapic_timer_frequency = fsb * 1000 / HZ;
/*
* TSC on Intel Atom SoCs is reliable and of known frequency.
* See tsc_msr.c for details.
*/
setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
return fast_calibrate;
}
static void __init penwell_arch_setup(void)
{
x86_platform.calibrate_tsc = mfld_calibrate_tsc;
}
static struct intel_mid_ops penwell_ops = {
.arch_setup = penwell_arch_setup,
};
void *get_penwell_ops(void)
{
return &penwell_ops;
}
void *get_cloverview_ops(void)
{
return &penwell_ops;
}
/*
* Intel Merrifield platform specific setup code
*
* (C) Copyright 2013 Intel Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*/
#include <linux/init.h>
#include <asm/apic.h>
#include <asm/intel-mid.h>
#include "intel_mid_weak_decls.h"
static unsigned long __init tangier_calibrate_tsc(void)
{
unsigned long fast_calibrate;
u32 lo, hi, ratio, fsb, bus_freq;
/* *********************** */
/* Compute TSC:Ratio * FSB */
/* *********************** */
/* Compute Ratio */
rdmsr(MSR_PLATFORM_INFO, lo, hi);
pr_debug("IA32 PLATFORM_INFO is 0x%x : %x\n", hi, lo);
ratio = (lo >> 8) & 0xFF;
pr_debug("ratio is %d\n", ratio);
if (!ratio) {
pr_err("Read a zero ratio, force tsc ratio to 4 ...\n");
ratio = 4;
}
/* Compute FSB */
rdmsr(MSR_FSB_FREQ, lo, hi);
pr_debug("Actual FSB frequency detected by SOC 0x%x : %x\n",
hi, lo);
bus_freq = lo & 0x7;
pr_debug("bus_freq = 0x%x\n", bus_freq);
if (bus_freq == 0)
fsb = FSB_FREQ_100SKU;
else if (bus_freq == 1)
fsb = FSB_FREQ_100SKU;
else if (bus_freq == 2)
fsb = FSB_FREQ_133SKU;
else if (bus_freq == 3)
fsb = FSB_FREQ_167SKU;
else if (bus_freq == 4)
fsb = FSB_FREQ_83SKU;
else if (bus_freq == 5)
fsb = FSB_FREQ_400SKU;
else if (bus_freq == 6)
fsb = FSB_FREQ_267SKU;
else if (bus_freq == 7)
fsb = FSB_FREQ_333SKU;
else {
BUG();
pr_err("Invalid bus_freq! Setting to minimal value!\n");
fsb = FSB_FREQ_100SKU;
}
/* TSC = FSB Freq * Resolved HFM Ratio */
fast_calibrate = ratio * fsb;
pr_debug("calculate tangier tsc %lu KHz\n", fast_calibrate);
/* ************************************ */
/* Calculate Local APIC Timer Frequency */
/* ************************************ */
lapic_timer_frequency = (fsb * 1000) / HZ;
pr_debug("Setting lapic_timer_frequency = %d\n",
lapic_timer_frequency);
/*
* TSC on Intel Atom SoCs is reliable and of known frequency.
* See tsc_msr.c for details.
*/
setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
return fast_calibrate;
}
static void __init tangier_arch_setup(void)
{
x86_platform.calibrate_tsc = tangier_calibrate_tsc;
x86_platform.legacy.rtc = 1;
}
/* tangier arch ops */
static struct intel_mid_ops tangier_ops = {
.arch_setup = tangier_arch_setup,
};
void *get_tangier_ops(void)
{
return &tangier_ops;
}
...@@ -119,6 +119,27 @@ static void __init xen_banner(void) ...@@ -119,6 +119,27 @@ static void __init xen_banner(void)
version >> 16, version & 0xffff, extra.extraversion, version >> 16, version & 0xffff, extra.extraversion,
xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
} }
static void __init xen_pv_init_platform(void)
{
set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info);
HYPERVISOR_shared_info = (void *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
/* xen clock uses per-cpu vcpu_info, need to init it for boot cpu */
xen_vcpu_info_reset(0);
/* pvclock is in shared info area */
xen_init_time_ops();
}
static void __init xen_pv_guest_late_init(void)
{
#ifndef CONFIG_SMP
/* Setup shared vcpu info for non-smp configurations */
xen_setup_vcpu_info_placement();
#endif
}
/* Check if running on Xen version (major, minor) or later */ /* Check if running on Xen version (major, minor) or later */
bool bool
xen_running_on_version_or_later(unsigned int major, unsigned int minor) xen_running_on_version_or_later(unsigned int major, unsigned int minor)
...@@ -947,34 +968,8 @@ static void xen_write_msr(unsigned int msr, unsigned low, unsigned high) ...@@ -947,34 +968,8 @@ static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
xen_write_msr_safe(msr, low, high); xen_write_msr_safe(msr, low, high);
} }
void xen_setup_shared_info(void)
{
set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info);
HYPERVISOR_shared_info =
(struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
xen_setup_mfn_list_list();
if (system_state == SYSTEM_BOOTING) {
#ifndef CONFIG_SMP
/*
* In UP this is as good a place as any to set up shared info.
* Limit this to boot only, at restore vcpu setup is done via
* xen_vcpu_restore().
*/
xen_setup_vcpu_info_placement();
#endif
/*
* Now that shared info is set up we can start using routines
* that point to pvclock area.
*/
xen_init_time_ops();
}
}
/* This is called once we have the cpu_possible_mask */ /* This is called once we have the cpu_possible_mask */
void __ref xen_setup_vcpu_info_placement(void) void __init xen_setup_vcpu_info_placement(void)
{ {
int cpu; int cpu;
...@@ -1228,6 +1223,8 @@ asmlinkage __visible void __init xen_start_kernel(void) ...@@ -1228,6 +1223,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
x86_init.irqs.intr_mode_init = x86_init_noop; x86_init.irqs.intr_mode_init = x86_init_noop;
x86_init.oem.arch_setup = xen_arch_setup; x86_init.oem.arch_setup = xen_arch_setup;
x86_init.oem.banner = xen_banner; x86_init.oem.banner = xen_banner;
x86_init.hyper.init_platform = xen_pv_init_platform;
x86_init.hyper.guest_late_init = xen_pv_guest_late_init;
/* /*
* Set up some pagetable state before starting to set any ptes. * Set up some pagetable state before starting to set any ptes.
......
...@@ -1230,8 +1230,7 @@ static void __init xen_pagetable_p2m_free(void) ...@@ -1230,8 +1230,7 @@ static void __init xen_pagetable_p2m_free(void)
* We roundup to the PMD, which means that if anybody at this stage is * We roundup to the PMD, which means that if anybody at this stage is
* using the __ka address of xen_start_info or * using the __ka address of xen_start_info or
* xen_start_info->shared_info they are in going to crash. Fortunatly * xen_start_info->shared_info they are in going to crash. Fortunatly
* we have already revectored in xen_setup_kernel_pagetable and in * we have already revectored in xen_setup_kernel_pagetable.
* xen_setup_shared_info.
*/ */
size = roundup(size, PMD_SIZE); size = roundup(size, PMD_SIZE);
...@@ -1292,8 +1291,7 @@ static void __init xen_pagetable_init(void) ...@@ -1292,8 +1291,7 @@ static void __init xen_pagetable_init(void)
/* Remap memory freed due to conflicts with E820 map */ /* Remap memory freed due to conflicts with E820 map */
xen_remap_memory(); xen_remap_memory();
xen_setup_mfn_list_list();
xen_setup_shared_info();
} }
static void xen_write_cr2(unsigned long cr2) static void xen_write_cr2(unsigned long cr2)
{ {
......
...@@ -27,8 +27,9 @@ void xen_pv_pre_suspend(void) ...@@ -27,8 +27,9 @@ void xen_pv_pre_suspend(void)
void xen_pv_post_suspend(int suspend_cancelled) void xen_pv_post_suspend(int suspend_cancelled)
{ {
xen_build_mfn_list_list(); xen_build_mfn_list_list();
set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info);
xen_setup_shared_info(); HYPERVISOR_shared_info = (void *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
xen_setup_mfn_list_list();
if (suspend_cancelled) { if (suspend_cancelled) {
xen_start_info->store_mfn = xen_start_info->store_mfn =
......
...@@ -31,6 +31,8 @@ ...@@ -31,6 +31,8 @@
/* Xen may fire a timer up to this many ns early */ /* Xen may fire a timer up to this many ns early */
#define TIMER_SLOP 100000 #define TIMER_SLOP 100000
static u64 xen_sched_clock_offset __read_mostly;
/* Get the TSC speed from Xen */ /* Get the TSC speed from Xen */
static unsigned long xen_tsc_khz(void) static unsigned long xen_tsc_khz(void)
{ {
...@@ -40,7 +42,7 @@ static unsigned long xen_tsc_khz(void) ...@@ -40,7 +42,7 @@ static unsigned long xen_tsc_khz(void)
return pvclock_tsc_khz(info); return pvclock_tsc_khz(info);
} }
u64 xen_clocksource_read(void) static u64 xen_clocksource_read(void)
{ {
struct pvclock_vcpu_time_info *src; struct pvclock_vcpu_time_info *src;
u64 ret; u64 ret;
...@@ -57,6 +59,11 @@ static u64 xen_clocksource_get_cycles(struct clocksource *cs) ...@@ -57,6 +59,11 @@ static u64 xen_clocksource_get_cycles(struct clocksource *cs)
return xen_clocksource_read(); return xen_clocksource_read();
} }
static u64 xen_sched_clock(void)
{
return xen_clocksource_read() - xen_sched_clock_offset;
}
static void xen_read_wallclock(struct timespec64 *ts) static void xen_read_wallclock(struct timespec64 *ts)
{ {
struct shared_info *s = HYPERVISOR_shared_info; struct shared_info *s = HYPERVISOR_shared_info;
...@@ -367,7 +374,7 @@ void xen_timer_resume(void) ...@@ -367,7 +374,7 @@ void xen_timer_resume(void)
} }
static const struct pv_time_ops xen_time_ops __initconst = { static const struct pv_time_ops xen_time_ops __initconst = {
.sched_clock = xen_clocksource_read, .sched_clock = xen_sched_clock,
.steal_clock = xen_steal_clock, .steal_clock = xen_steal_clock,
}; };
...@@ -503,8 +510,9 @@ static void __init xen_time_init(void) ...@@ -503,8 +510,9 @@ static void __init xen_time_init(void)
pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
} }
void __ref xen_init_time_ops(void) void __init xen_init_time_ops(void)
{ {
xen_sched_clock_offset = xen_clocksource_read();
pv_time_ops = xen_time_ops; pv_time_ops = xen_time_ops;
x86_init.timers.timer_init = xen_time_init; x86_init.timers.timer_init = xen_time_init;
...@@ -542,11 +550,11 @@ void __init xen_hvm_init_time_ops(void) ...@@ -542,11 +550,11 @@ void __init xen_hvm_init_time_ops(void)
return; return;
if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
printk(KERN_INFO "Xen doesn't support pvclock on HVM," pr_info("Xen doesn't support pvclock on HVM, disable pv timer");
"disable pv timer\n");
return; return;
} }
xen_sched_clock_offset = xen_clocksource_read();
pv_time_ops = xen_time_ops; pv_time_ops = xen_time_ops;
x86_init.timers.setup_percpu_clockev = xen_time_init; x86_init.timers.setup_percpu_clockev = xen_time_init;
x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents; x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
......
...@@ -31,7 +31,6 @@ extern struct shared_info xen_dummy_shared_info; ...@@ -31,7 +31,6 @@ extern struct shared_info xen_dummy_shared_info;
extern struct shared_info *HYPERVISOR_shared_info; extern struct shared_info *HYPERVISOR_shared_info;
void xen_setup_mfn_list_list(void); void xen_setup_mfn_list_list(void);
void xen_setup_shared_info(void);
void xen_build_mfn_list_list(void); void xen_build_mfn_list_list(void);
void xen_setup_machphys_mapping(void); void xen_setup_machphys_mapping(void);
void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
...@@ -68,12 +67,11 @@ void xen_init_irq_ops(void); ...@@ -68,12 +67,11 @@ void xen_init_irq_ops(void);
void xen_setup_timer(int cpu); void xen_setup_timer(int cpu);
void xen_setup_runstate_info(int cpu); void xen_setup_runstate_info(int cpu);
void xen_teardown_timer(int cpu); void xen_teardown_timer(int cpu);
u64 xen_clocksource_read(void);
void xen_setup_cpu_clockevents(void); void xen_setup_cpu_clockevents(void);
void xen_save_time_memory_area(void); void xen_save_time_memory_area(void);
void xen_restore_time_memory_area(void); void xen_restore_time_memory_area(void);
void __ref xen_init_time_ops(void); void xen_init_time_ops(void);
void __init xen_hvm_init_time_ops(void); void xen_hvm_init_time_ops(void);
irqreturn_t xen_debug_interrupt(int irq, void *dev_id); irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
......
...@@ -259,6 +259,6 @@ static int __init tegra20_init_rtc(struct device_node *np) ...@@ -259,6 +259,6 @@ static int __init tegra20_init_rtc(struct device_node *np)
else else
clk_prepare_enable(clk); clk_prepare_enable(clk);
return register_persistent_clock(NULL, tegra_read_persistent_clock64); return register_persistent_clock(tegra_read_persistent_clock64);
} }
TIMER_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc); TIMER_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc);
...@@ -9,17 +9,16 @@ ...@@ -9,17 +9,16 @@
#define LINUX_SCHED_CLOCK #define LINUX_SCHED_CLOCK
#ifdef CONFIG_GENERIC_SCHED_CLOCK #ifdef CONFIG_GENERIC_SCHED_CLOCK
extern void sched_clock_postinit(void); extern void generic_sched_clock_init(void);
extern void sched_clock_register(u64 (*read)(void), int bits, extern void sched_clock_register(u64 (*read)(void), int bits,
unsigned long rate); unsigned long rate);
#else #else
static inline void sched_clock_postinit(void) { } static inline void generic_sched_clock_init(void) { }
static inline void sched_clock_register(u64 (*read)(void), int bits, static inline void sched_clock_register(u64 (*read)(void), int bits,
unsigned long rate) unsigned long rate)
{ {
;
} }
#endif #endif
......
...@@ -243,7 +243,8 @@ extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot); ...@@ -243,7 +243,8 @@ extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot);
extern int persistent_clock_is_local; extern int persistent_clock_is_local;
extern void read_persistent_clock64(struct timespec64 *ts); extern void read_persistent_clock64(struct timespec64 *ts);
extern void read_boot_clock64(struct timespec64 *ts); void read_persistent_clock_and_boot_offset(struct timespec64 *wall_clock,
struct timespec64 *boot_offset);
extern int update_persistent_clock64(struct timespec64 now); extern int update_persistent_clock64(struct timespec64 now);
/* /*
......
...@@ -79,7 +79,7 @@ ...@@ -79,7 +79,7 @@
#include <linux/pti.h> #include <linux/pti.h>
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/elevator.h> #include <linux/elevator.h>
#include <linux/sched_clock.h> #include <linux/sched/clock.h>
#include <linux/sched/task.h> #include <linux/sched/task.h>
#include <linux/sched/task_stack.h> #include <linux/sched/task_stack.h>
#include <linux/context_tracking.h> #include <linux/context_tracking.h>
...@@ -642,7 +642,6 @@ asmlinkage __visible void __init start_kernel(void) ...@@ -642,7 +642,6 @@ asmlinkage __visible void __init start_kernel(void)
softirq_init(); softirq_init();
timekeeping_init(); timekeeping_init();
time_init(); time_init();
sched_clock_postinit();
printk_safe_init(); printk_safe_init();
perf_event_init(); perf_event_init();
profile_init(); profile_init();
...@@ -697,6 +696,7 @@ asmlinkage __visible void __init start_kernel(void) ...@@ -697,6 +696,7 @@ asmlinkage __visible void __init start_kernel(void)
acpi_early_init(); acpi_early_init();
if (late_time_init) if (late_time_init)
late_time_init(); late_time_init();
sched_clock_init();
calibrate_delay(); calibrate_delay();
pid_idr_init(); pid_idr_init();
anon_vma_init(); anon_vma_init();
......
...@@ -53,6 +53,7 @@ ...@@ -53,6 +53,7 @@
* *
*/ */
#include "sched.h" #include "sched.h"
#include <linux/sched_clock.h>
/* /*
* Scheduler clock - returns current time in nanosec units. * Scheduler clock - returns current time in nanosec units.
...@@ -66,12 +67,7 @@ unsigned long long __weak sched_clock(void) ...@@ -66,12 +67,7 @@ unsigned long long __weak sched_clock(void)
} }
EXPORT_SYMBOL_GPL(sched_clock); EXPORT_SYMBOL_GPL(sched_clock);
__read_mostly int sched_clock_running; static DEFINE_STATIC_KEY_FALSE(sched_clock_running);
void sched_clock_init(void)
{
sched_clock_running = 1;
}
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
/* /*
...@@ -195,17 +191,40 @@ void clear_sched_clock_stable(void) ...@@ -195,17 +191,40 @@ void clear_sched_clock_stable(void)
smp_mb(); /* matches sched_clock_init_late() */ smp_mb(); /* matches sched_clock_init_late() */
if (sched_clock_running == 2) if (static_key_count(&sched_clock_running.key) == 2)
__clear_sched_clock_stable(); __clear_sched_clock_stable();
} }
static void __sched_clock_gtod_offset(void)
{
struct sched_clock_data *scd = this_scd();
__scd_stamp(scd);
__gtod_offset = (scd->tick_raw + __sched_clock_offset) - scd->tick_gtod;
}
void __init sched_clock_init(void)
{
/*
* Set __gtod_offset such that once we mark sched_clock_running,
* sched_clock_tick() continues where sched_clock() left off.
*
* Even if TSC is buggered, we're still UP at this point so it
* can't really be out of sync.
*/
local_irq_disable();
__sched_clock_gtod_offset();
local_irq_enable();
static_branch_inc(&sched_clock_running);
}
/* /*
* We run this as late_initcall() such that it runs after all built-in drivers, * We run this as late_initcall() such that it runs after all built-in drivers,
* notably: acpi_processor and intel_idle, which can mark the TSC as unstable. * notably: acpi_processor and intel_idle, which can mark the TSC as unstable.
*/ */
static int __init sched_clock_init_late(void) static int __init sched_clock_init_late(void)
{ {
sched_clock_running = 2; static_branch_inc(&sched_clock_running);
/* /*
* Ensure that it is impossible to not do a static_key update. * Ensure that it is impossible to not do a static_key update.
* *
...@@ -350,8 +369,8 @@ u64 sched_clock_cpu(int cpu) ...@@ -350,8 +369,8 @@ u64 sched_clock_cpu(int cpu)
if (sched_clock_stable()) if (sched_clock_stable())
return sched_clock() + __sched_clock_offset; return sched_clock() + __sched_clock_offset;
if (unlikely(!sched_clock_running)) if (!static_branch_unlikely(&sched_clock_running))
return 0ull; return sched_clock();
preempt_disable_notrace(); preempt_disable_notrace();
scd = cpu_sdc(cpu); scd = cpu_sdc(cpu);
...@@ -373,7 +392,7 @@ void sched_clock_tick(void) ...@@ -373,7 +392,7 @@ void sched_clock_tick(void)
if (sched_clock_stable()) if (sched_clock_stable())
return; return;
if (unlikely(!sched_clock_running)) if (!static_branch_unlikely(&sched_clock_running))
return; return;
lockdep_assert_irqs_disabled(); lockdep_assert_irqs_disabled();
...@@ -385,8 +404,6 @@ void sched_clock_tick(void) ...@@ -385,8 +404,6 @@ void sched_clock_tick(void)
void sched_clock_tick_stable(void) void sched_clock_tick_stable(void)
{ {
u64 gtod, clock;
if (!sched_clock_stable()) if (!sched_clock_stable())
return; return;
...@@ -398,9 +415,7 @@ void sched_clock_tick_stable(void) ...@@ -398,9 +415,7 @@ void sched_clock_tick_stable(void)
* TSC to be unstable, any computation will be computing crap. * TSC to be unstable, any computation will be computing crap.
*/ */
local_irq_disable(); local_irq_disable();
gtod = ktime_get_ns(); __sched_clock_gtod_offset();
clock = sched_clock();
__gtod_offset = (clock + __sched_clock_offset) - gtod;
local_irq_enable(); local_irq_enable();
} }
...@@ -434,9 +449,17 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); ...@@ -434,9 +449,17 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
void __init sched_clock_init(void)
{
static_branch_inc(&sched_clock_running);
local_irq_disable();
generic_sched_clock_init();
local_irq_enable();
}
u64 sched_clock_cpu(int cpu) u64 sched_clock_cpu(int cpu)
{ {
if (unlikely(!sched_clock_running)) if (!static_branch_unlikely(&sched_clock_running))
return 0; return 0;
return sched_clock(); return sched_clock();
......
...@@ -5916,7 +5916,6 @@ void __init sched_init(void) ...@@ -5916,7 +5916,6 @@ void __init sched_init(void)
int i, j; int i, j;
unsigned long alloc_size = 0, ptr; unsigned long alloc_size = 0, ptr;
sched_clock_init();
wait_bit_init(); wait_bit_init();
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
......
...@@ -622,8 +622,6 @@ void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq) ...@@ -622,8 +622,6 @@ void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)
#undef PU #undef PU
} }
extern __read_mostly int sched_clock_running;
static void print_cpu(struct seq_file *m, int cpu) static void print_cpu(struct seq_file *m, int cpu)
{ {
struct rq *rq = cpu_rq(cpu); struct rq *rq = cpu_rq(cpu);
......
...@@ -237,7 +237,7 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) ...@@ -237,7 +237,7 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
pr_debug("Registered %pF as sched_clock source\n", read); pr_debug("Registered %pF as sched_clock source\n", read);
} }
void __init sched_clock_postinit(void) void __init generic_sched_clock_init(void)
{ {
/* /*
* If no sched_clock() function has been provided at that point, * If no sched_clock() function has been provided at that point,
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <linux/nmi.h> #include <linux/nmi.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/loadavg.h> #include <linux/sched/loadavg.h>
#include <linux/sched/clock.h>
#include <linux/syscore_ops.h> #include <linux/syscore_ops.h>
#include <linux/clocksource.h> #include <linux/clocksource.h>
#include <linux/jiffies.h> #include <linux/jiffies.h>
...@@ -1505,18 +1506,23 @@ void __weak read_persistent_clock64(struct timespec64 *ts64) ...@@ -1505,18 +1506,23 @@ void __weak read_persistent_clock64(struct timespec64 *ts64)
} }
/** /**
* read_boot_clock64 - Return time of the system start. * read_persistent_wall_and_boot_offset - Read persistent clock, and also offset
* from the boot.
* *
* Weak dummy function for arches that do not yet support it. * Weak dummy function for arches that do not yet support it.
* Function to read the exact time the system has been started. * wall_time - current time as returned by persistent clock
* Returns a timespec64 with tv_sec=0 and tv_nsec=0 if unsupported. * boot_offset - offset that is defined as wall_time - boot_time
* * The default function calculates offset based on the current value of
* XXX - Do be sure to remove it once all arches implement it. * local_clock(). This way architectures that support sched_clock() but don't
* support dedicated boot time clock will provide the best estimate of the
* boot time.
*/ */
void __weak read_boot_clock64(struct timespec64 *ts) void __weak __init
read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
struct timespec64 *boot_offset)
{ {
ts->tv_sec = 0; read_persistent_clock64(wall_time);
ts->tv_nsec = 0; *boot_offset = ns_to_timespec64(local_clock());
} }
/* /*
...@@ -1542,28 +1548,29 @@ static bool persistent_clock_exists; ...@@ -1542,28 +1548,29 @@ static bool persistent_clock_exists;
*/ */
void __init timekeeping_init(void) void __init timekeeping_init(void)
{ {
struct timespec64 wall_time, boot_offset, wall_to_mono;
struct timekeeper *tk = &tk_core.timekeeper; struct timekeeper *tk = &tk_core.timekeeper;
struct clocksource *clock; struct clocksource *clock;
unsigned long flags; unsigned long flags;
struct timespec64 now, boot, tmp;
read_persistent_clock64(&now);
if (!timespec64_valid_strict(&now)) {
pr_warn("WARNING: Persistent clock returned invalid value!\n"
" Check your CMOS/BIOS settings.\n");
now.tv_sec = 0;
now.tv_nsec = 0;
} else if (now.tv_sec || now.tv_nsec)
persistent_clock_exists = true;
read_boot_clock64(&boot); read_persistent_wall_and_boot_offset(&wall_time, &boot_offset);
if (!timespec64_valid_strict(&boot)) { if (timespec64_valid_strict(&wall_time) &&
pr_warn("WARNING: Boot clock returned invalid value!\n" timespec64_to_ns(&wall_time) > 0) {
" Check your CMOS/BIOS settings.\n"); persistent_clock_exists = true;
boot.tv_sec = 0; } else if (timespec64_to_ns(&wall_time) != 0) {
boot.tv_nsec = 0; pr_warn("Persistent clock returned invalid value");
wall_time = (struct timespec64){0};
} }
if (timespec64_compare(&wall_time, &boot_offset) < 0)
boot_offset = (struct timespec64){0};
/*
* We want set wall_to_mono, so the following is true:
* wall time + wall_to_mono = boot time
*/
wall_to_mono = timespec64_sub(boot_offset, wall_time);
raw_spin_lock_irqsave(&timekeeper_lock, flags); raw_spin_lock_irqsave(&timekeeper_lock, flags);
write_seqcount_begin(&tk_core.seq); write_seqcount_begin(&tk_core.seq);
ntp_init(); ntp_init();
...@@ -1573,13 +1580,10 @@ void __init timekeeping_init(void) ...@@ -1573,13 +1580,10 @@ void __init timekeeping_init(void)
clock->enable(clock); clock->enable(clock);
tk_setup_internals(tk, clock); tk_setup_internals(tk, clock);
tk_set_xtime(tk, &now); tk_set_xtime(tk, &wall_time);
tk->raw_sec = 0; tk->raw_sec = 0;
if (boot.tv_sec == 0 && boot.tv_nsec == 0)
boot = tk_xtime(tk);
set_normalized_timespec64(&tmp, -boot.tv_sec, -boot.tv_nsec); tk_set_wall_to_mono(tk, wall_to_mono);
tk_set_wall_to_mono(tk, tmp);
timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册