提交 541048a1 编写于 作者: L Linus Torvalds

Merge branch 'x86-debug-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

* 'x86-debug-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86, reboot: Fix typo in nmi reboot path
  x86, NMI: Add to_cpumask() to silence compile warning
  x86, NMI: NMI selftest depends on the local apic
  x86: Add stack top margin for stack overflow checking
  x86, NMI: NMI-selftest should handle the UP case properly
  x86: Fix the 32-bit stackoverflow-debug build
  x86, NMI: Add knob to disable using NMI IPIs to stop cpus
  x86, NMI: Add NMI IPI selftest
  x86, reboot: Use NMI instead of REBOOT_VECTOR to stop cpus
  x86: Clean up the range of stack overflow checking
  x86: Panic on detection of stack overflow
  x86: Check stack overflow in detail
......@@ -1824,6 +1824,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
nomfgpt [X86-32] Disable Multi-Function General Purpose
Timer usage (for AMD Geode machines).
nonmi_ipi [X86] Disable using NMI IPIs during panic/reboot to
shutdown the other cpus. Instead use the REBOOT_VECTOR
irq.
nopat [X86] Disable PAT (page attribute table extension of
pagetables) support.
......
......@@ -49,6 +49,7 @@ show up in /proc/sys/kernel:
- panic
- panic_on_oops
- panic_on_unrecovered_nmi
- panic_on_stackoverflow
- pid_max
- powersave-nap [ PPC only ]
- printk
......@@ -393,6 +394,19 @@ Controls the kernel's behaviour when an oops or BUG is encountered.
==============================================================
panic_on_stackoverflow:
Controls the kernel's behavior when detecting the overflows of
kernel, IRQ and exception stacks except a user stack.
This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled.
0: try to continue operation.
1: panic immediately.
==============================================================
pid_max:
PID allocation wrap value. When the kernel's next PID value
......
......@@ -63,8 +63,11 @@ config DEBUG_STACKOVERFLOW
bool "Check for stack overflows"
depends on DEBUG_KERNEL
---help---
This option will cause messages to be printed if free stack space
drops below a certain limit.
Say Y here if you want to check the overflows of kernel, IRQ
and exception stacks. This option will cause messages of the
stacks in detail when free stack space drops below a certain
limit.
If in doubt, say "N".
config X86_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs"
......@@ -284,4 +287,16 @@ config DEBUG_STRICT_USER_COPY_CHECKS
If unsure, or if you run an older (pre 4.4) gcc, say N.
config DEBUG_NMI_SELFTEST
bool "NMI Selftest"
depends on DEBUG_KERNEL && X86_LOCAL_APIC
---help---
Enabling this option turns on a quick NMI selftest to verify
that the NMI behaves correctly.
This might help diagnose strange hangs that rely on NMI to
function properly.
If unsure, say N.
endmenu
......@@ -225,5 +225,11 @@ extern int hard_smp_processor_id(void);
#endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_DEBUG_NMI_SELFTEST
extern void nmi_selftest(void);
#else
#define nmi_selftest() do { } while (0)
#endif
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_SMP_H */
......@@ -80,6 +80,7 @@ obj-$(CONFIG_APB_TIMER) += apb_timer.o
obj-$(CONFIG_AMD_NB) += amd_nb.o
obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
obj-$(CONFIG_KVM_GUEST) += kvm.o
obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
......
......@@ -28,6 +28,9 @@ DEFINE_PER_CPU(struct pt_regs *, irq_regs);
EXPORT_PER_CPU_SYMBOL(irq_regs);
#ifdef CONFIG_DEBUG_STACKOVERFLOW
int sysctl_panic_on_stackoverflow __read_mostly;
/* Debugging check for stack overflow: is there less than 1KB free? */
static int check_stack_overflow(void)
{
......@@ -43,6 +46,8 @@ static void print_stack_overflow(void)
{
printk(KERN_WARNING "low stack detected by irq handler\n");
dump_stack();
if (sysctl_panic_on_stackoverflow)
panic("low stack detected by irq handler - check messages\n");
}
#else
......
......@@ -26,6 +26,8 @@ EXPORT_PER_CPU_SYMBOL(irq_stat);
DEFINE_PER_CPU(struct pt_regs *, irq_regs);
EXPORT_PER_CPU_SYMBOL(irq_regs);
int sysctl_panic_on_stackoverflow;
/*
* Probabilistic stack overflow check:
*
......@@ -36,18 +38,39 @@ EXPORT_PER_CPU_SYMBOL(irq_regs);
static inline void stack_overflow_check(struct pt_regs *regs)
{
#ifdef CONFIG_DEBUG_STACKOVERFLOW
#define STACK_TOP_MARGIN 128
struct orig_ist *oist;
u64 irq_stack_top, irq_stack_bottom;
u64 estack_top, estack_bottom;
u64 curbase = (u64)task_stack_page(current);
if (user_mode_vm(regs))
return;
WARN_ONCE(regs->sp >= curbase &&
regs->sp <= curbase + THREAD_SIZE &&
regs->sp < curbase + sizeof(struct thread_info) +
sizeof(struct pt_regs) + 128,
if (regs->sp >= curbase + sizeof(struct thread_info) +
sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
regs->sp <= curbase + THREAD_SIZE)
return;
irq_stack_top = (u64)__get_cpu_var(irq_stack_union.irq_stack) +
STACK_TOP_MARGIN;
irq_stack_bottom = (u64)__get_cpu_var(irq_stack_ptr);
if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom)
return;
oist = &__get_cpu_var(orig_ist);
estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN;
estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1];
if (regs->sp >= estack_top && regs->sp <= estack_bottom)
return;
WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
current->comm, curbase, regs->sp,
irq_stack_top, irq_stack_bottom,
estack_top, estack_bottom);
"do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
current->comm, curbase, regs->sp);
if (sysctl_panic_on_stackoverflow)
panic("low stack detected by irq handler - check messages\n");
#endif
}
......
/*
* arch/x86/kernel/nmi-selftest.c
*
* Testsuite for NMI: IPIs
*
* Started by Don Zickus:
* (using lib/locking-selftest.c as a guide)
*
* Copyright (C) 2011 Red Hat, Inc., Don Zickus <dzickus@redhat.com>
*/
#include <linux/smp.h>
#include <linux/cpumask.h>
#include <linux/delay.h>
#include <asm/apic.h>
#include <asm/nmi.h>
#define SUCCESS 0
#define FAILURE 1
#define TIMEOUT 2
static int nmi_fail;
/* check to see if NMI IPIs work on this machine */
static DECLARE_BITMAP(nmi_ipi_mask, NR_CPUS) __read_mostly;
static int testcase_total;
static int testcase_successes;
static int expected_testcase_failures;
static int unexpected_testcase_failures;
static int unexpected_testcase_unknowns;
static int nmi_unk_cb(unsigned int val, struct pt_regs *regs)
{
unexpected_testcase_unknowns++;
return NMI_HANDLED;
}
static void init_nmi_testsuite(void)
{
/* trap all the unknown NMIs we may generate */
register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk");
}
static void cleanup_nmi_testsuite(void)
{
unregister_nmi_handler(NMI_UNKNOWN, "nmi_selftest_unk");
}
static int test_nmi_ipi_callback(unsigned int val, struct pt_regs *regs)
{
int cpu = raw_smp_processor_id();
if (cpumask_test_and_clear_cpu(cpu, to_cpumask(nmi_ipi_mask)))
return NMI_HANDLED;
return NMI_DONE;
}
static void test_nmi_ipi(struct cpumask *mask)
{
unsigned long timeout;
if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback,
NMI_FLAG_FIRST, "nmi_selftest")) {
nmi_fail = FAILURE;
return;
}
/* sync above data before sending NMI */
wmb();
apic->send_IPI_mask(mask, NMI_VECTOR);
/* Don't wait longer than a second */
timeout = USEC_PER_SEC;
while (!cpumask_empty(mask) && timeout--)
udelay(1);
/* What happens if we timeout, do we still unregister?? */
unregister_nmi_handler(NMI_LOCAL, "nmi_selftest");
if (!timeout)
nmi_fail = TIMEOUT;
return;
}
static void remote_ipi(void)
{
cpumask_copy(to_cpumask(nmi_ipi_mask), cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask));
if (!cpumask_empty(to_cpumask(nmi_ipi_mask)))
test_nmi_ipi(to_cpumask(nmi_ipi_mask));
}
static void local_ipi(void)
{
cpumask_clear(to_cpumask(nmi_ipi_mask));
cpumask_set_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask));
test_nmi_ipi(to_cpumask(nmi_ipi_mask));
}
static void reset_nmi(void)
{
nmi_fail = 0;
}
static void dotest(void (*testcase_fn)(void), int expected)
{
testcase_fn();
/*
* Filter out expected failures:
*/
if (nmi_fail != expected) {
unexpected_testcase_failures++;
if (nmi_fail == FAILURE)
printk("FAILED |");
else if (nmi_fail == TIMEOUT)
printk("TIMEOUT|");
else
printk("ERROR |");
dump_stack();
} else {
testcase_successes++;
printk(" ok |");
}
testcase_total++;
reset_nmi();
}
static inline void print_testname(const char *testname)
{
printk("%12s:", testname);
}
void nmi_selftest(void)
{
init_nmi_testsuite();
/*
* Run the testsuite:
*/
printk("----------------\n");
printk("| NMI testsuite:\n");
printk("--------------------\n");
print_testname("remote IPI");
dotest(remote_ipi, SUCCESS);
printk("\n");
print_testname("local IPI");
dotest(local_ipi, SUCCESS);
printk("\n");
cleanup_nmi_testsuite();
if (unexpected_testcase_failures) {
printk("--------------------\n");
printk("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n",
unexpected_testcase_failures, testcase_total);
printk("-----------------------------------------------------------------\n");
} else if (expected_testcase_failures && testcase_successes) {
printk("--------------------\n");
printk("%3d out of %3d testcases failed, as expected. |\n",
expected_testcase_failures, testcase_total);
printk("----------------------------------------------------\n");
} else if (expected_testcase_failures && !testcase_successes) {
printk("--------------------\n");
printk("All %3d testcases failed, as expected. |\n",
expected_testcase_failures);
printk("----------------------------------------\n");
} else {
printk("--------------------\n");
printk("Good, all %3d testcases passed! |\n",
testcase_successes);
printk("---------------------------------\n");
}
}
......@@ -29,6 +29,7 @@
#include <asm/mmu_context.h>
#include <asm/proto.h>
#include <asm/apic.h>
#include <asm/nmi.h>
/*
* Some notes on x86 processor bugs affecting SMP operation:
*
......@@ -148,6 +149,60 @@ void native_send_call_func_ipi(const struct cpumask *mask)
free_cpumask_var(allbutself);
}
static atomic_t stopping_cpu = ATOMIC_INIT(-1);
static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
{
/* We are registered on stopping cpu too, avoid spurious NMI */
if (raw_smp_processor_id() == atomic_read(&stopping_cpu))
return NMI_HANDLED;
stop_this_cpu(NULL);
return NMI_HANDLED;
}
static void native_nmi_stop_other_cpus(int wait)
{
unsigned long flags;
unsigned long timeout;
if (reboot_force)
return;
/*
* Use an own vector here because smp_call_function
* does lots of things not suitable in a panic situation.
*/
if (num_online_cpus() > 1) {
/* did someone beat us here? */
if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1)
return;
if (register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback,
NMI_FLAG_FIRST, "smp_stop"))
/* Note: we ignore failures here */
return;
/* sync above data before sending NMI */
wmb();
apic->send_IPI_allbutself(NMI_VECTOR);
/*
* Don't wait longer than a second if the caller
* didn't ask us to wait.
*/
timeout = USEC_PER_SEC;
while (num_online_cpus() > 1 && (wait || timeout--))
udelay(1);
}
local_irq_save(flags);
disable_local_APIC();
local_irq_restore(flags);
}
/*
* this function calls the 'stop' function on all other CPUs in the system.
*/
......@@ -160,7 +215,7 @@ asmlinkage void smp_reboot_interrupt(void)
irq_exit();
}
static void native_stop_other_cpus(int wait)
static void native_irq_stop_other_cpus(int wait)
{
unsigned long flags;
unsigned long timeout;
......@@ -194,6 +249,11 @@ static void native_stop_other_cpus(int wait)
local_irq_restore(flags);
}
static void native_smp_disable_nmi_ipi(void)
{
smp_ops.stop_other_cpus = native_irq_stop_other_cpus;
}
/*
* Reschedule call back.
*/
......@@ -225,12 +285,20 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
irq_exit();
}
static int __init nonmi_ipi_setup(char *str)
{
native_smp_disable_nmi_ipi();
return 1;
}
__setup("nonmi_ipi", nonmi_ipi_setup);
struct smp_ops smp_ops = {
.smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
.smp_prepare_cpus = native_smp_prepare_cpus,
.smp_cpus_done = native_smp_cpus_done,
.stop_other_cpus = native_stop_other_cpus,
.stop_other_cpus = native_nmi_stop_other_cpus,
.smp_send_reschedule = native_smp_send_reschedule,
.cpu_up = native_cpu_up,
......
......@@ -1143,6 +1143,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
{
pr_debug("Boot done.\n");
nmi_selftest();
impress_friends();
#ifdef CONFIG_X86_IO_APIC
setup_ioapic_dest();
......
......@@ -341,6 +341,7 @@ extern int panic_timeout;
extern int panic_on_oops;
extern int panic_on_unrecovered_nmi;
extern int panic_on_io_nmi;
extern int sysctl_panic_on_stackoverflow;
extern const char *print_tainted(void);
extern void add_taint(unsigned flag);
extern int test_taint(unsigned flag);
......
......@@ -803,6 +803,15 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
#ifdef CONFIG_DEBUG_STACKOVERFLOW
{
.procname = "panic_on_stackoverflow",
.data = &sysctl_panic_on_stackoverflow,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
#endif
{
.procname = "bootloader_type",
.data = &bootloader_type,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册