提交 ec6b79f6 编写于 作者: X Xu Qiang 提交者: Zheng Zengkai

watchdog/corelockup: Optimized core lockup detection judgment rules

ascend inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4F3V1
CVE: NA

--------------------------------

Optimized core lockup detection judgment rules to
make it easier to understand.

Core suspension detection is performed in the hrtimer
interrupt processing function. The detection condition
is that the hrtimer interrupt and NMI interrupt are not
updated for multiple consecutive times.
Signed-off-by: NXu Qiang <xuqiang36@huawei.com>
Reviewed-by: NDing Tianhong <dingtianhong@huawei.com>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
Reviewed-by: NDing Tianhong <dingtianhong@huawei.com>
Signed-off-by: NZheng Zengkai <zhengzengkai@huawei.com>
上级 3cd0ed4c
......@@ -15,23 +15,8 @@
#define nops(n) asm volatile(__nops(n))
#define sev() asm volatile("sev" : : : "memory")
#ifdef CONFIG_CORELOCKUP_DETECTOR
extern unsigned int close_wfi_wfe;
#define wfe() \
do { \
if (likely(close_wfi_wfe == 0)) \
asm volatile("wfe" : : : "memory"); \
} while (0)
#define wfi() \
do { \
if (likely(close_wfi_wfe == 0)) \
asm volatile("wfi" : : : "memory"); \
} while (0)
#else
#define wfe() asm volatile("wfe" : : : "memory")
#define wfi() asm volatile("wfi" : : : "memory")
#endif
#define isb() asm volatile("isb" : : : "memory")
#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
......
......@@ -64,14 +64,9 @@ EXPORT_SYMBOL(arch_touch_nmi_watchdog);
*
* When using pmu events as nmi source, the pmu clock is disabled
* under wfi/wfe mode. And the nmi can't respond periodically.
* To minimize the misjudgment by wfi/wfe, we adopt a simple method
* which to disable wfi/wfe at the right time and the watchdog hrtimer
* is a good baseline.
*
* The watchdog hrtimer is based on generate timer and has high freq
* than nmi. If watchdog hrtimer not works we disable wfi/wfe mode
* then the pmu nmi should always responds as long as the cpu core
* not suspend.
* However, when the core is suspended, the hrtimer interrupt and
* NMI interrupt cannot be received. This can be used as the basis
* for determining whether the core is suspended.
*
* detector_cpu: the target cpu to detector of current cpu
* nmi_interrupts: the nmi counts of current cpu
......@@ -79,20 +74,14 @@ EXPORT_SYMBOL(arch_touch_nmi_watchdog);
* nmi_cnt_missed: the nmi consecutive miss counts of detector_cpu
* hrint_saved: saved hrtimer interrupts of detector_cpu
* hrint_missed: the hrtimer consecutive miss counts of detector_cpu
* corelockup_cpumask/close_wfi_wfe:
* the cpu mask is set if certain cpu maybe fall in suspend and close
* wfi/wfe mode if any bit is set
*/
static DEFINE_PER_CPU(unsigned int, detector_cpu);
static DEFINE_PER_CPU(unsigned long, nmi_interrupts);
static DEFINE_PER_CPU(unsigned long, nmi_cnt_saved);
static DEFINE_PER_CPU(unsigned long, nmi_cnt_missed);
static DEFINE_PER_CPU(bool, core_watchdog_warn);
static DEFINE_PER_CPU(unsigned long, hrint_saved);
static DEFINE_PER_CPU(unsigned long, hrint_missed);
struct cpumask corelockup_cpumask __read_mostly;
unsigned int close_wfi_wfe;
static bool pmu_based_nmi;
static unsigned long corelockup_allcpu_dumped;
bool enable_corelockup_detector;
static int __init enable_corelockup_detector_setup(char *str)
......@@ -152,6 +141,11 @@ void watchdog_check_hrtimer(void)
{
unsigned int cpu = __this_cpu_read(detector_cpu);
unsigned long hrint = watchdog_hrtimer_interrupts(cpu);
unsigned long nmi_int = per_cpu(nmi_interrupts, cpu);
/* skip check if only one cpu online */
if (cpu == smp_processor_id())
return;
/*
* The freq of hrtimer is fast than nmi interrupts and
......@@ -161,23 +155,31 @@ void watchdog_check_hrtimer(void)
*/
watchdog_nmi_interrupts();
if (!pmu_based_nmi)
return;
if (__this_cpu_read(hrint_saved) != hrint) {
__this_cpu_write(hrint_saved, hrint);
__this_cpu_write(hrint_missed, 0);
cpumask_clear_cpu(cpu, &corelockup_cpumask);
} else {
__this_cpu_inc(hrint_missed);
if (__this_cpu_read(hrint_missed) > 2)
cpumask_set_cpu(cpu, &corelockup_cpumask);
return;
}
__this_cpu_inc(hrint_missed);
if (__this_cpu_read(nmi_cnt_saved) != nmi_int) {
__this_cpu_write(nmi_cnt_saved, nmi_int);
__this_cpu_write(nmi_cnt_missed, 0);
return;
}
__this_cpu_inc(nmi_cnt_missed);
if (likely(cpumask_empty(&corelockup_cpumask)))
close_wfi_wfe = 0;
else
close_wfi_wfe = 1;
if ((__this_cpu_read(hrint_missed) > 5) && (__this_cpu_read(nmi_cnt_missed) > 5)) {
pr_emerg("Watchdog detected core LOCKUP on cpu %d\n", cpu);
if (!test_and_set_bit(0, &corelockup_allcpu_dumped)) {
trigger_allbutself_cpu_backtrace();
panic("Core LOCKUP");
} else {
while (1)
cpu_relax();
}
}
}
/*
......@@ -208,9 +210,6 @@ void corelockup_detector_offline_cpu(unsigned int cpu)
unsigned int prev = nr_cpu_ids;
unsigned int i;
/* clear bitmap */
cpumask_clear_cpu(cpu, &corelockup_cpumask);
/* found prev cpu */
for_each_cpu_and(i, &watchdog_cpumask, cpu_online_mask) {
if (per_cpu(detector_cpu, i) == cpu) {
......@@ -225,45 +224,6 @@ void corelockup_detector_offline_cpu(unsigned int cpu)
/* prev->next */
corelockup_status_copy(cpu, prev);
}
static bool is_corelockup(unsigned int cpu)
{
unsigned long nmi_int = per_cpu(nmi_interrupts, cpu);
/* skip check if only one cpu online */
if (cpu == smp_processor_id())
return false;
if (__this_cpu_read(nmi_cnt_saved) != nmi_int) {
__this_cpu_write(nmi_cnt_saved, nmi_int);
__this_cpu_write(nmi_cnt_missed, 0);
per_cpu(core_watchdog_warn, cpu) = false;
return false;
}
__this_cpu_inc(nmi_cnt_missed);
if (__this_cpu_read(nmi_cnt_missed) > 2)
return true;
return false;
}
NOKPROBE_SYMBOL(is_corelockup);
static void watchdog_corelockup_check(struct pt_regs *regs)
{
unsigned int cpu = __this_cpu_read(detector_cpu);
if (is_corelockup(cpu)) {
if (per_cpu(core_watchdog_warn, cpu) == true)
return;
pr_emerg("Watchdog detected core LOCKUP on cpu %d\n", cpu);
if (hardlockup_panic)
nmi_panic(regs, "Core LOCKUP");
per_cpu(core_watchdog_warn, cpu) = true;
}
}
#endif
#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP
......@@ -337,9 +297,6 @@ void watchdog_hardlockup_check(struct pt_regs *regs)
if (enable_corelockup_detector) {
/* Kick nmi interrupts */
watchdog_nmi_interrupts();
/* corelockup check */
watchdog_corelockup_check(regs);
}
#endif
......@@ -549,9 +506,6 @@ int __init hardlockup_detector_perf_init(void)
perf_event_release_kernel(this_cpu_read(watchdog_ev));
this_cpu_write(watchdog_ev, NULL);
}
#ifdef CONFIG_CORELOCKUP_DETECTOR
pmu_based_nmi = true;
#endif
return ret;
}
#endif /* CONFIG_HARDLOCKUP_DETECTOR_PERF */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册