dumpstack.c 7.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
/*
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
 */
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/utsname.h>
#include <linux/hardirq.h>
#include <linux/kdebug.h>
#include <linux/module.h>
#include <linux/ptrace.h>
13
#include <linux/ftrace.h>
14 15 16 17 18 19
#include <linux/kexec.h>
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/sysfs.h>

#include <asm/stacktrace.h>
20
#include <asm/unwind.h>
21 22

int panic_on_unrecovered_nmi;
23
int panic_on_io_nmi;
24 25 26 27
unsigned int code_bytes = 64;
int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
static int die_counter;

28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
bool in_task_stack(unsigned long *stack, struct task_struct *task,
		   struct stack_info *info)
{
	unsigned long *begin = task_stack_page(task);
	unsigned long *end   = task_stack_page(task) + THREAD_SIZE;

	if (stack < begin || stack >= end)
		return false;

	info->type	= STACK_TYPE_TASK;
	info->begin	= begin;
	info->end	= end;
	info->next_sp	= NULL;

	return true;
}

45
static void printk_stack_address(unsigned long address, int reliable,
46
				 char *log_lvl)
47
{
48
	touch_nmi_watchdog();
49
	printk("%s [<%p>] %s%pB\n",
50
		log_lvl, (void *)address, reliable ? "" : "? ",
51
		(void *)address);
52 53
}

54 55 56 57 58
void printk_address(unsigned long address)
{
	pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address);
}

59 60
void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
			unsigned long *stack, char *log_lvl)
61
{
62 63 64 65
	struct unwind_state state;
	struct stack_info stack_info = {0};
	unsigned long visit_mask = 0;
	int graph_idx = 0;
66

67
	printk("%sCall Trace:\n", log_lvl);
68

69
	unwind_start(&state, task, regs, stack);
70

71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
	/*
	 * Iterate through the stacks, starting with the current stack pointer.
	 * Each stack has a pointer to the next one.
	 *
	 * x86-64 can have several stacks:
	 * - task stack
	 * - interrupt stack
	 * - HW exception stacks (double fault, nmi, debug, mce)
	 *
	 * x86-32 can have up to three stacks:
	 * - task stack
	 * - softirq stack
	 * - hardirq stack
	 */
	for (; stack; stack = stack_info.next_sp) {
		const char *str_begin, *str_end;

		/*
		 * If we overflowed the task stack into a guard page, jump back
		 * to the bottom of the usable stack.
		 */
		if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
			stack = task_stack_page(task);

		if (get_stack_info(stack, task, &stack_info, &visit_mask))
			break;

		stack_type_str(stack_info.type, &str_begin, &str_end);
		if (str_begin)
			printk("%s <%s> ", log_lvl, str_begin);

		/*
		 * Scan the stack, printing any text addresses we find.  At the
		 * same time, follow proper stack frames with the unwinder.
		 *
		 * Addresses found during the scan which are not reported by
		 * the unwinder are considered to be additional clues which are
		 * sometimes useful for debugging and are prefixed with '?'.
		 * This also serves as a failsafe option in case the unwinder
		 * goes off in the weeds.
		 */
		for (; stack < stack_info.end; stack++) {
			unsigned long real_addr;
			int reliable = 0;
115
			unsigned long addr = READ_ONCE_NOCHECK(*stack);
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
			unsigned long *ret_addr_p =
				unwind_get_return_address_ptr(&state);

			if (!__kernel_text_address(addr))
				continue;

			if (stack == ret_addr_p)
				reliable = 1;

			/*
			 * When function graph tracing is enabled for a
			 * function, its return address on the stack is
			 * replaced with the address of an ftrace handler
			 * (return_to_handler).  In that case, before printing
			 * the "real" address, we want to print the handler
			 * address as an "unreliable" hint that function graph
			 * tracing was involved.
			 */
			real_addr = ftrace_graph_ret_addr(task, &graph_idx,
							  addr, stack);
			if (real_addr != addr)
				printk_stack_address(addr, 0, log_lvl);
			printk_stack_address(real_addr, reliable, log_lvl);

			if (!reliable)
				continue;

			/*
			 * Get the next frame from the unwinder.  No need to
			 * check for an error: if anything goes wrong, the rest
			 * of the addresses will just be printed as unreliable.
			 */
			unwind_next_frame(&state);
		}

		if (str_end)
			printk("%s <%s> ", log_lvl, str_end);
	}
154 155 156 157
}

void show_stack(struct task_struct *task, unsigned long *sp)
{
158 159
	task = task ? : current;

160 161 162 163
	/*
	 * Stack frames below this one aren't interesting.  Don't show them
	 * if we're printing for %current.
	 */
164
	if (!sp && task == current)
165
		sp = get_stack_pointer(current, NULL);
166

167
	show_stack_log_lvl(task, NULL, sp, "");
168 169
}

170 171
void show_stack_regs(struct pt_regs *regs)
{
172
	show_stack_log_lvl(current, regs, NULL, "");
173 174
}

175
static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
176 177 178
static int die_owner = -1;
static unsigned int die_nest_count;

179
unsigned long oops_begin(void)
180 181 182 183 184 185 186 187 188
{
	int cpu;
	unsigned long flags;

	oops_enter();

	/* racy, but better than risking deadlock. */
	raw_local_irq_save(flags);
	cpu = smp_processor_id();
189
	if (!arch_spin_trylock(&die_lock)) {
190 191 192
		if (cpu == die_owner)
			/* nested oops. should stop eventually */;
		else
193
			arch_spin_lock(&die_lock);
194 195 196 197 198 199 200
	}
	die_nest_count++;
	die_owner = cpu;
	console_verbose();
	bust_spinlocks(1);
	return flags;
}
201
EXPORT_SYMBOL_GPL(oops_begin);
202
NOKPROBE_SYMBOL(oops_begin);
203

204 205
void __noreturn rewind_stack_do_exit(int signr);

206
void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
207 208 209 210 211 212
{
	if (regs && kexec_should_crash(current))
		crash_kexec(regs);

	bust_spinlocks(0);
	die_owner = -1;
213
	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
214 215 216
	die_nest_count--;
	if (!die_nest_count)
		/* Nest count reaches zero, release the lock. */
217
		arch_spin_unlock(&die_lock);
218 219 220 221 222 223 224 225 226
	raw_local_irq_restore(flags);
	oops_exit();

	if (!signr)
		return;
	if (in_interrupt())
		panic("Fatal exception in interrupt");
	if (panic_on_oops)
		panic("Fatal exception");
227 228 229 230 231 232 233

	/*
	 * We're not going to return, but we might be on an IST stack or
	 * have very little stack space left.  Rewind the stack and kill
	 * the task.
	 */
	rewind_stack_do_exit(signr);
234
}
235
NOKPROBE_SYMBOL(oops_end);
236

237
int __die(const char *str, struct pt_regs *regs, long err)
238 239 240 241 242
{
#ifdef CONFIG_X86_32
	unsigned short ss;
	unsigned long sp;
#endif
243
	printk(KERN_DEFAULT
244 245 246 247 248 249
	       "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter,
	       IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT"         : "",
	       IS_ENABLED(CONFIG_SMP)     ? " SMP"             : "",
	       debug_pagealloc_enabled()  ? " DEBUG_PAGEALLOC" : "",
	       IS_ENABLED(CONFIG_KASAN)   ? " KASAN"           : "");

250
	if (notify_die(DIE_OOPS, str, regs, err,
251
			current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
252 253
		return 1;

254
	print_modules();
255
	show_regs(regs);
256
#ifdef CONFIG_X86_32
257
	if (user_mode(regs)) {
258 259
		sp = regs->sp;
		ss = regs->ss & 0xffff;
260 261 262
	} else {
		sp = kernel_stack_pointer(regs);
		savesegment(ss, ss);
263 264 265 266 267 268 269
	}
	printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
	print_symbol("%s", regs->ip);
	printk(" SS:ESP %04x:%08lx\n", ss, sp);
#else
	/* Executive summary in case the oops scrolled away */
	printk(KERN_ALERT "RIP ");
270
	printk_address(regs->ip);
271 272 273 274
	printk(" RSP <%016lx>\n", regs->sp);
#endif
	return 0;
}
275
NOKPROBE_SYMBOL(__die);
276 277 278 279 280 281 282 283 284 285

/*
 * This is gone through when something in the kernel has done something bad
 * and is about to be terminated:
 */
void die(const char *str, struct pt_regs *regs, long err)
{
	unsigned long flags = oops_begin();
	int sig = SIGSEGV;

286
	if (!user_mode(regs))
287 288 289 290 291 292 293 294 295
		report_bug(regs->ip, regs);

	if (__die(str, regs, err))
		sig = 0;
	oops_end(flags, regs, sig);
}

static int __init kstack_setup(char *s)
{
296 297 298
	ssize_t ret;
	unsigned long val;

299 300
	if (!s)
		return -EINVAL;
301 302 303 304 305

	ret = kstrtoul(s, 0, &val);
	if (ret)
		return ret;
	kstack_depth_to_print = val;
306 307 308 309 310 311
	return 0;
}
early_param("kstack", kstack_setup);

static int __init code_bytes_setup(char *s)
{
312 313 314 315 316 317 318 319 320 321 322
	ssize_t ret;
	unsigned long val;

	if (!s)
		return -EINVAL;

	ret = kstrtoul(s, 0, &val);
	if (ret)
		return ret;

	code_bytes = val;
323 324 325 326 327 328
	if (code_bytes > 8192)
		code_bytes = 8192;

	return 1;
}
__setup("code_bytes=", code_bytes_setup);