trace.c 179.3 KB
Newer Older
1 2 3
/*
 * ring buffer based function tracer
 *
4
 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5 6 7 8 9 10 11
 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
 *
 * Originally taken from the RT patch by:
 *    Arnaldo Carvalho de Melo <acme@redhat.com>
 *
 * Based on code from the latency_tracer, that is:
 *  Copyright (C) 2004-2006 Ingo Molnar
12
 *  Copyright (C) 2004 Nadia Yvette Chambers
13
 */
14
#include <linux/ring_buffer.h>
15
#include <generated/utsrelease.h>
16 17
#include <linux/stacktrace.h>
#include <linux/writeback.h>
18 19
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
20
#include <linux/notifier.h>
21
#include <linux/irqflags.h>
22
#include <linux/debugfs.h>
23
#include <linux/tracefs.h>
24
#include <linux/pagemap.h>
25 26 27
#include <linux/hardirq.h>
#include <linux/linkage.h>
#include <linux/uaccess.h>
28
#include <linux/kprobes.h>
29 30 31
#include <linux/ftrace.h>
#include <linux/module.h>
#include <linux/percpu.h>
32
#include <linux/splice.h>
33
#include <linux/kdebug.h>
34
#include <linux/string.h>
35
#include <linux/mount.h>
36
#include <linux/rwsem.h>
37
#include <linux/slab.h>
38 39
#include <linux/ctype.h>
#include <linux/init.h>
40
#include <linux/poll.h>
41
#include <linux/nmi.h>
42
#include <linux/fs.h>
43
#include <linux/sched/rt.h>
I
Ingo Molnar 已提交
44

45
#include "trace.h"
46
#include "trace_output.h"
47

48 49 50 51
/*
 * On boot up, the ring buffer is set to the minimum size, so that
 * we do not waste memory on systems that are not using tracing.
 */
52
bool ring_buffer_expanded;
53

54 55
/*
 * We need to change this state when a selftest is running.
56 57
 * A selftest will lurk into the ring-buffer to count the
 * entries inserted during the selftest although some concurrent
58
 * insertions into the ring-buffer such as trace_printk could occurred
59 60
 * at the same time, giving false positive or negative results.
 */
61
static bool __read_mostly tracing_selftest_running;
62

63 64 65
/*
 * If a tracer is running, we do not want to run SELFTEST.
 */
66
bool __read_mostly tracing_selftest_disabled;
67

68 69 70 71
/* Pipe tracepoints to printk */
struct trace_iterator *tracepoint_print_iter;
int tracepoint_printk;

72 73 74 75 76
/* For tracers that don't implement custom flags */
static struct tracer_opt dummy_tracer_opt[] = {
	{ }
};

77 78
static int
dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 80 81
{
	return 0;
}
82

83 84 85 86 87 88 89
/*
 * To prevent the comm cache from being overwritten when no
 * tracing is active, only save the comm when a trace event
 * occurred.
 */
static DEFINE_PER_CPU(bool, trace_cmdline_save);

90 91 92 93 94 95
/*
 * Kill all tracing for good (never come back).
 * It is initialized to 1 but will turn to zero if the initialization
 * of the tracer is successful. But that is the only place that sets
 * this back to zero.
 */
96
static int tracing_disabled = 1;
97

98
cpumask_var_t __read_mostly	tracing_buffer_mask;
99

100 101 102 103 104 105 106 107 108 109 110
/*
 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
 *
 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
 * is set, then ftrace_dump is called. This will output the contents
 * of the ftrace buffers to the console.  This is very useful for
 * capturing traces that lead to crashes and outputing it to a
 * serial console.
 *
 * It is default off, but you can enable it with either specifying
 * "ftrace_dump_on_oops" in the kernel command line, or setting
111 112 113
 * /proc/sys/kernel/ftrace_dump_on_oops
 * Set 1 if you want to dump buffers of all CPUs
 * Set 2 if you want to dump the buffer of the CPU that triggered oops
114
 */
115 116

enum ftrace_dump_mode ftrace_dump_on_oops;
117

118 119 120
/* When set, tracing will stop when a WARN*() is hit */
int __disable_trace_on_warning;

121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
#ifdef CONFIG_TRACE_ENUM_MAP_FILE
/* Map of enums to their values, for "enum_map" file */
struct trace_enum_map_head {
	struct module			*mod;
	unsigned long			length;
};

union trace_enum_map_item;

struct trace_enum_map_tail {
	/*
	 * "end" is first and points to NULL as it must be different
	 * than "mod" or "enum_string"
	 */
	union trace_enum_map_item	*next;
	const char			*end;	/* points to NULL */
};

static DEFINE_MUTEX(trace_enum_mutex);

/*
 * The trace_enum_maps are saved in an array with two extra elements,
 * one at the beginning, and one at the end. The beginning item contains
 * the count of the saved maps (head.length), and the module they
 * belong to if not built in (head.mod). The ending item contains a
 * pointer to the next array of saved enum_map items.
 */
union trace_enum_map_item {
	struct trace_enum_map		map;
	struct trace_enum_map_head	head;
	struct trace_enum_map_tail	tail;
};

static union trace_enum_map_item *trace_enum_maps;
#endif /* CONFIG_TRACE_ENUM_MAP_FILE */

157
static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158

L
Li Zefan 已提交
159 160
#define MAX_TRACER_SIZE		100
static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161
static char *default_bootup_tracer;
162

163 164
static bool allocate_snapshot;

165
static int __init set_cmdline_ftrace(char *str)
166
{
167
	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168
	default_bootup_tracer = bootup_tracer_buf;
169
	/* We are using ftrace early, expand it */
170
	ring_buffer_expanded = true;
171 172
	return 1;
}
173
__setup("ftrace=", set_cmdline_ftrace);
174

175 176
static int __init set_ftrace_dump_on_oops(char *str)
{
177 178 179 180 181 182 183 184 185 186 187
	if (*str++ != '=' || !*str) {
		ftrace_dump_on_oops = DUMP_ALL;
		return 1;
	}

	if (!strcmp("orig_cpu", str)) {
		ftrace_dump_on_oops = DUMP_ORIG;
                return 1;
        }

        return 0;
188 189
}
__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
S
Steven Rostedt 已提交
190

191 192
static int __init stop_trace_on_warning(char *str)
{
193 194
	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
		__disable_trace_on_warning = 1;
195 196
	return 1;
}
197
__setup("traceoff_on_warning", stop_trace_on_warning);
198

199
static int __init boot_alloc_snapshot(char *str)
200 201 202 203 204 205
{
	allocate_snapshot = true;
	/* We also need the main ring buffer expanded */
	ring_buffer_expanded = true;
	return 1;
}
206
__setup("alloc_snapshot", boot_alloc_snapshot);
207

208 209 210 211 212

static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;

static int __init set_trace_boot_options(char *str)
{
213
	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214 215 216 217
	return 0;
}
__setup("trace_options=", set_trace_boot_options);

218 219 220 221 222 223 224 225 226 227 228
static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
static char *trace_boot_clock __initdata;

static int __init set_trace_boot_clock(char *str)
{
	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
	trace_boot_clock = trace_boot_clock_buf;
	return 0;
}
__setup("trace_clock=", set_trace_boot_clock);

229 230 231 232 233 234 235
static int __init set_tracepoint_printk(char *str)
{
	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
		tracepoint_printk = 1;
	return 1;
}
__setup("tp_printk", set_tracepoint_printk);
236

237
unsigned long long ns2usecs(cycle_t nsec)
238 239 240 241 242 243
{
	nsec += 500;
	do_div(nsec, 1000);
	return nsec;
}

244 245 246 247 248 249 250 251
/* trace_flags holds trace_options default values */
#define TRACE_DEFAULT_FLAGS						\
	(FUNCTION_DEFAULT_FLAGS |					\
	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)

252 253 254 255
/* trace_options that are only supported by global_trace */
#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)

256 257 258
/* trace_flags that are default zero for instances */
#define ZEROED_TRACE_FLAGS \
	TRACE_ITER_EVENT_FORK
259

S
Steven Rostedt 已提交
260 261 262 263 264 265 266 267 268 269 270 271
/*
 * The global_trace is the descriptor that holds the tracing
 * buffers for the live tracing. For each CPU, it contains
 * a link list of pages that will store trace entries. The
 * page descriptor of the pages in the memory is used to hold
 * the link list by linking the lru item in the page descriptor
 * to each of the pages in the buffer per CPU.
 *
 * For each active CPU there is a data field that holds the
 * pages for the buffer for that CPU. Each CPU has the same number
 * of pages allocated for its buffer.
 */
272 273 274
static struct trace_array global_trace = {
	.trace_flags = TRACE_DEFAULT_FLAGS,
};
275

276
LIST_HEAD(ftrace_trace_arrays);
277

278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
int trace_array_get(struct trace_array *this_tr)
{
	struct trace_array *tr;
	int ret = -ENODEV;

	mutex_lock(&trace_types_lock);
	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
		if (tr == this_tr) {
			tr->ref++;
			ret = 0;
			break;
		}
	}
	mutex_unlock(&trace_types_lock);

	return ret;
}

static void __trace_array_put(struct trace_array *this_tr)
{
	WARN_ON(!this_tr->ref);
	this_tr->ref--;
}

void trace_array_put(struct trace_array *this_tr)
{
	mutex_lock(&trace_types_lock);
	__trace_array_put(this_tr);
	mutex_unlock(&trace_types_lock);
}

309
int call_filter_check_discard(struct trace_event_call *call, void *rec,
310 311 312 313 314
			      struct ring_buffer *buffer,
			      struct ring_buffer_event *event)
{
	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
	    !filter_match_preds(call->filter, rec)) {
315
		__trace_event_discard_commit(buffer, event);
316 317 318 319
		return 1;
	}

	return 0;
320 321
}

322
static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
323 324 325 326
{
	u64 ts;

	/* Early boot up does not have a buffer yet */
327
	if (!buf->buffer)
328 329
		return trace_clock_local();

330 331
	ts = ring_buffer_time_stamp(buf->buffer, cpu);
	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
332 333 334

	return ts;
}
335

336 337 338 339 340
cycle_t ftrace_now(int cpu)
{
	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
}

341 342 343 344 345 346 347 348 349
/**
 * tracing_is_enabled - Show if global_trace has been disabled
 *
 * Shows if the global trace has been enabled or not. It uses the
 * mirror flag "buffer_disabled" to be used in fast paths such as for
 * the irqsoff tracer. But it may be inaccurate due to races. If you
 * need to know the accurate state, use tracing_is_on() which is a little
 * slower, but accurate.
 */
350 351
int tracing_is_enabled(void)
{
352 353 354 355 356 357 358
	/*
	 * For quick access (irqsoff uses this in fast path), just
	 * return the mirror variable of the state of the ring buffer.
	 * It's a little racy, but we don't really care.
	 */
	smp_rmb();
	return !global_trace.buffer_disabled;
359 360
}

S
Steven Rostedt 已提交
361
/*
362 363 364
 * trace_buf_size is the size in bytes that is allocated
 * for a buffer. Note, the number of bytes is always rounded
 * to page size.
365 366 367 368 369
 *
 * This number is purposely set to a low number of 16384.
 * If the dump on oops happens, it will be much appreciated
 * to not have to wait for all that output. Anyway this can be
 * boot time and run time configurable.
S
Steven Rostedt 已提交
370
 */
371
#define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
372

373
static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
374

S
Steven Rostedt 已提交
375
/* trace_types holds a link list of available tracers. */
376
static struct tracer		*trace_types __read_mostly;
S
Steven Rostedt 已提交
377 378 379 380

/*
 * trace_types_lock is used to protect the trace_types list.
 */
381
DEFINE_MUTEX(trace_types_lock);
S
Steven Rostedt 已提交
382

383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
/*
 * serialize the access of the ring buffer
 *
 * ring buffer serializes readers, but it is low level protection.
 * The validity of the events (which returns by ring_buffer_peek() ..etc)
 * are not protected by ring buffer.
 *
 * The content of events may become garbage if we allow other process consumes
 * these events concurrently:
 *   A) the page of the consumed events may become a normal page
 *      (not reader page) in ring buffer, and this page will be rewrited
 *      by events producer.
 *   B) The page of the consumed events may become a page for splice_read,
 *      and this page will be returned to system.
 *
 * These primitives allow multi process access to different cpu ring buffer
 * concurrently.
 *
 * These primitives don't distinguish read-only and read-consume access.
 * Multi read-only access are also serialized.
 */

#ifdef CONFIG_SMP
static DECLARE_RWSEM(all_cpu_access_lock);
static DEFINE_PER_CPU(struct mutex, cpu_access_lock);

static inline void trace_access_lock(int cpu)
{
411
	if (cpu == RING_BUFFER_ALL_CPUS) {
412 413 414 415 416
		/* gain it for accessing the whole ring buffer. */
		down_write(&all_cpu_access_lock);
	} else {
		/* gain it for accessing a cpu ring buffer. */

417
		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
418 419 420 421 422 423 424 425 426
		down_read(&all_cpu_access_lock);

		/* Secondly block other access to this @cpu ring buffer. */
		mutex_lock(&per_cpu(cpu_access_lock, cpu));
	}
}

static inline void trace_access_unlock(int cpu)
{
427
	if (cpu == RING_BUFFER_ALL_CPUS) {
428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
		up_write(&all_cpu_access_lock);
	} else {
		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
		up_read(&all_cpu_access_lock);
	}
}

static inline void trace_access_lock_init(void)
{
	int cpu;

	for_each_possible_cpu(cpu)
		mutex_init(&per_cpu(cpu_access_lock, cpu));
}

#else

static DEFINE_MUTEX(access_lock);

static inline void trace_access_lock(int cpu)
{
	(void)cpu;
	mutex_lock(&access_lock);
}

static inline void trace_access_unlock(int cpu)
{
	(void)cpu;
	mutex_unlock(&access_lock);
}

static inline void trace_access_lock_init(void)
{
}

#endif

465 466 467 468
#ifdef CONFIG_STACKTRACE
static void __ftrace_trace_stack(struct ring_buffer *buffer,
				 unsigned long flags,
				 int skip, int pc, struct pt_regs *regs);
469 470
static inline void ftrace_trace_stack(struct trace_array *tr,
				      struct ring_buffer *buffer,
471 472
				      unsigned long flags,
				      int skip, int pc, struct pt_regs *regs);
473

474 475 476 477 478 479
#else
static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
					unsigned long flags,
					int skip, int pc, struct pt_regs *regs)
{
}
480 481
static inline void ftrace_trace_stack(struct trace_array *tr,
				      struct ring_buffer *buffer,
482 483
				      unsigned long flags,
				      int skip, int pc, struct pt_regs *regs)
484 485 486
{
}

487 488
#endif

489
static void tracer_tracing_on(struct trace_array *tr)
490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
{
	if (tr->trace_buffer.buffer)
		ring_buffer_record_on(tr->trace_buffer.buffer);
	/*
	 * This flag is looked at when buffers haven't been allocated
	 * yet, or by some tracers (like irqsoff), that just want to
	 * know if the ring buffer has been disabled, but it can handle
	 * races of where it gets disabled but we still do a record.
	 * As the check is in the fast path of the tracers, it is more
	 * important to be fast than accurate.
	 */
	tr->buffer_disabled = 0;
	/* Make the flag seen by readers */
	smp_wmb();
}

506 507 508 509 510 511 512 513
/**
 * tracing_on - enable tracing buffers
 *
 * This function enables tracing buffers that may have been
 * disabled with tracing_off.
 */
void tracing_on(void)
{
514
	tracer_tracing_on(&global_trace);
515 516 517
}
EXPORT_SYMBOL_GPL(tracing_on);

518 519 520 521 522 523 524 525 526 527 528 529 530
/**
 * __trace_puts - write a constant string into the trace buffer.
 * @ip:	   The address of the caller
 * @str:   The constant string to write
 * @size:  The size of the string.
 */
int __trace_puts(unsigned long ip, const char *str, int size)
{
	struct ring_buffer_event *event;
	struct ring_buffer *buffer;
	struct print_entry *entry;
	unsigned long irq_flags;
	int alloc;
531 532
	int pc;

533
	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
534 535
		return 0;

536
	pc = preempt_count();
537

538 539 540
	if (unlikely(tracing_selftest_running || tracing_disabled))
		return 0;

541 542 543 544 545
	alloc = sizeof(*entry) + size + 2; /* possible \n added */

	local_save_flags(irq_flags);
	buffer = global_trace.trace_buffer.buffer;
	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
546
					  irq_flags, pc);
547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562
	if (!event)
		return 0;

	entry = ring_buffer_event_data(event);
	entry->ip = ip;

	memcpy(&entry->buf, str, size);

	/* Add a newline if necessary */
	if (entry->buf[size - 1] != '\n') {
		entry->buf[size] = '\n';
		entry->buf[size + 1] = '\0';
	} else
		entry->buf[size] = '\0';

	__buffer_unlock_commit(buffer, event);
563
	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580

	return size;
}
EXPORT_SYMBOL_GPL(__trace_puts);

/**
 * __trace_bputs - write the pointer to a constant string into trace buffer
 * @ip:	   The address of the caller
 * @str:   The constant string to write to the buffer to
 */
int __trace_bputs(unsigned long ip, const char *str)
{
	struct ring_buffer_event *event;
	struct ring_buffer *buffer;
	struct bputs_entry *entry;
	unsigned long irq_flags;
	int size = sizeof(struct bputs_entry);
581 582
	int pc;

583
	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
584 585
		return 0;

586
	pc = preempt_count();
587

588 589 590
	if (unlikely(tracing_selftest_running || tracing_disabled))
		return 0;

591 592 593
	local_save_flags(irq_flags);
	buffer = global_trace.trace_buffer.buffer;
	event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
594
					  irq_flags, pc);
595 596 597 598 599 600 601 602
	if (!event)
		return 0;

	entry = ring_buffer_event_data(event);
	entry->ip			= ip;
	entry->str			= str;

	__buffer_unlock_commit(buffer, event);
603
	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
604 605 606 607 608

	return 1;
}
EXPORT_SYMBOL_GPL(__trace_bputs);

609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629
#ifdef CONFIG_TRACER_SNAPSHOT
/**
 * trace_snapshot - take a snapshot of the current buffer.
 *
 * This causes a swap between the snapshot buffer and the current live
 * tracing buffer. You can use this to take snapshots of the live
 * trace when some condition is triggered, but continue to trace.
 *
 * Note, make sure to allocate the snapshot with either
 * a tracing_snapshot_alloc(), or by doing it manually
 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
 *
 * If the snapshot buffer is not allocated, it will stop tracing.
 * Basically making a permanent snapshot.
 */
void tracing_snapshot(void)
{
	struct trace_array *tr = &global_trace;
	struct tracer *tracer = tr->current_trace;
	unsigned long flags;

630 631 632 633 634 635
	if (in_nmi()) {
		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
		internal_trace_puts("*** snapshot is being ignored        ***\n");
		return;
	}

636
	if (!tr->allocated_snapshot) {
637 638
		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
		internal_trace_puts("*** stopping trace here!   ***\n");
639 640 641 642 643 644
		tracing_off();
		return;
	}

	/* Note, snapshot can not be used when the tracer uses it */
	if (tracer->use_max_tr) {
645 646
		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
647 648 649 650 651 652 653
		return;
	}

	local_irq_save(flags);
	update_max_tr(tr, current, smp_processor_id());
	local_irq_restore(flags);
}
654
EXPORT_SYMBOL_GPL(tracing_snapshot);
655 656 657

static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
					struct trace_buffer *size_buf, int cpu_id);
658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677
static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);

static int alloc_snapshot(struct trace_array *tr)
{
	int ret;

	if (!tr->allocated_snapshot) {

		/* allocate spare buffer */
		ret = resize_buffer_duplicate_size(&tr->max_buffer,
				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
		if (ret < 0)
			return ret;

		tr->allocated_snapshot = true;
	}

	return 0;
}

678
static void free_snapshot(struct trace_array *tr)
679 680 681 682 683 684 685 686 687 688 689
{
	/*
	 * We don't free the ring buffer. instead, resize it because
	 * The max_tr ring buffer has some state (e.g. ring->clock) and
	 * we want preserve it.
	 */
	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
	set_buffer_entries(&tr->max_buffer, 1);
	tracing_reset_online_cpus(&tr->max_buffer);
	tr->allocated_snapshot = false;
}
690

691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712
/**
 * tracing_alloc_snapshot - allocate snapshot buffer.
 *
 * This only allocates the snapshot buffer if it isn't already
 * allocated - it doesn't also take a snapshot.
 *
 * This is meant to be used in cases where the snapshot buffer needs
 * to be set up for events that can't sleep but need to be able to
 * trigger a snapshot.
 */
int tracing_alloc_snapshot(void)
{
	struct trace_array *tr = &global_trace;
	int ret;

	ret = alloc_snapshot(tr);
	WARN_ON(ret < 0);

	return ret;
}
EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);

713 714 715 716 717 718 719 720 721 722 723 724 725 726 727
/**
 * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
 *
 * This is similar to trace_snapshot(), but it will allocate the
 * snapshot buffer if it isn't already allocated. Use this only
 * where it is safe to sleep, as the allocation may sleep.
 *
 * This causes a swap between the snapshot buffer and the current live
 * tracing buffer. You can use this to take snapshots of the live
 * trace when some condition is triggered, but continue to trace.
 */
void tracing_snapshot_alloc(void)
{
	int ret;

728 729
	ret = tracing_alloc_snapshot();
	if (ret < 0)
730
		return;
731 732 733

	tracing_snapshot();
}
734
EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
735 736 737 738 739
#else
void tracing_snapshot(void)
{
	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
}
740
EXPORT_SYMBOL_GPL(tracing_snapshot);
741 742 743 744 745 746
int tracing_alloc_snapshot(void)
{
	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
	return -ENODEV;
}
EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
747 748 749 750 751
void tracing_snapshot_alloc(void)
{
	/* Give warning */
	tracing_snapshot();
}
752
EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
753 754
#endif /* CONFIG_TRACER_SNAPSHOT */

755
static void tracer_tracing_off(struct trace_array *tr)
756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771
{
	if (tr->trace_buffer.buffer)
		ring_buffer_record_off(tr->trace_buffer.buffer);
	/*
	 * This flag is looked at when buffers haven't been allocated
	 * yet, or by some tracers (like irqsoff), that just want to
	 * know if the ring buffer has been disabled, but it can handle
	 * races of where it gets disabled but we still do a record.
	 * As the check is in the fast path of the tracers, it is more
	 * important to be fast than accurate.
	 */
	tr->buffer_disabled = 1;
	/* Make the flag seen by readers */
	smp_wmb();
}

772 773 774 775 776 777 778 779 780 781
/**
 * tracing_off - turn off tracing buffers
 *
 * This function stops the tracing buffers from recording data.
 * It does not disable any overhead the tracers themselves may
 * be causing. This function simply causes all recording to
 * the ring buffers to fail.
 */
void tracing_off(void)
{
782
	tracer_tracing_off(&global_trace);
783 784 785
}
EXPORT_SYMBOL_GPL(tracing_off);

786 787 788 789 790 791
void disable_trace_on_warning(void)
{
	if (__disable_trace_on_warning)
		tracing_off();
}

792 793 794 795 796 797
/**
 * tracer_tracing_is_on - show real state of ring buffer enabled
 * @tr : the trace array to know if ring buffer is enabled
 *
 * Shows real state of the ring buffer if it is enabled or not.
 */
798
static int tracer_tracing_is_on(struct trace_array *tr)
799 800 801 802 803 804
{
	if (tr->trace_buffer.buffer)
		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
	return !tr->buffer_disabled;
}

805 806 807 808 809
/**
 * tracing_is_on - show state of ring buffers enabled
 */
int tracing_is_on(void)
{
810
	return tracer_tracing_is_on(&global_trace);
811 812 813
}
EXPORT_SYMBOL_GPL(tracing_is_on);

814
static int __init set_buf_size(char *str)
815
{
816
	unsigned long buf_size;
817

818 819
	if (!str)
		return 0;
820
	buf_size = memparse(str, &str);
821
	/* nr_entries can not be zero */
822
	if (buf_size == 0)
823
		return 0;
824
	trace_buf_size = buf_size;
825 826
	return 1;
}
827
__setup("trace_buf_size=", set_buf_size);
828

829 830
static int __init set_tracing_thresh(char *str)
{
831
	unsigned long threshold;
832 833 834 835
	int ret;

	if (!str)
		return 0;
836
	ret = kstrtoul(str, 0, &threshold);
837 838
	if (ret < 0)
		return 0;
839
	tracing_thresh = threshold * 1000;
840 841 842 843
	return 1;
}
__setup("tracing_thresh=", set_tracing_thresh);

S
Steven Rostedt 已提交
844 845 846 847 848
unsigned long nsecs_to_usecs(unsigned long nsecs)
{
	return nsecs / 1000;
}

849 850 851 852 853 854 855 856 857
/*
 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
 * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
 * of strings in the order that the enums were defined.
 */
#undef C
#define C(a, b) b

S
Steven Rostedt 已提交
858
/* These must match the bit postions in trace_iterator_flags */
859
static const char *trace_options[] = {
860
	TRACE_FLAGS
861 862 863
	NULL
};

864 865 866
static struct {
	u64 (*func)(void);
	const char *name;
867
	int in_ns;		/* is this clock in nanoseconds? */
868
} trace_clocks[] = {
869 870 871
	{ trace_clock_local,		"local",	1 },
	{ trace_clock_global,		"global",	1 },
	{ trace_clock_counter,		"counter",	0 },
872
	{ trace_clock_jiffies,		"uptime",	0 },
873 874
	{ trace_clock,			"perf",		1 },
	{ ktime_get_mono_fast_ns,	"mono",		1 },
875
	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
D
David Sharp 已提交
876
	ARCH_TRACE_CLOCKS
877 878
};

879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955
/*
 * trace_parser_get_init - gets the buffer for trace parser
 */
int trace_parser_get_init(struct trace_parser *parser, int size)
{
	memset(parser, 0, sizeof(*parser));

	parser->buffer = kmalloc(size, GFP_KERNEL);
	if (!parser->buffer)
		return 1;

	parser->size = size;
	return 0;
}

/*
 * trace_parser_put - frees the buffer for trace parser
 */
void trace_parser_put(struct trace_parser *parser)
{
	kfree(parser->buffer);
}

/*
 * trace_get_user - reads the user input string separated by  space
 * (matched by isspace(ch))
 *
 * For each string found the 'struct trace_parser' is updated,
 * and the function returns.
 *
 * Returns number of bytes read.
 *
 * See kernel/trace/trace.h for 'struct trace_parser' details.
 */
int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
	size_t cnt, loff_t *ppos)
{
	char ch;
	size_t read = 0;
	ssize_t ret;

	if (!*ppos)
		trace_parser_clear(parser);

	ret = get_user(ch, ubuf++);
	if (ret)
		goto out;

	read++;
	cnt--;

	/*
	 * The parser is not finished with the last write,
	 * continue reading the user input without skipping spaces.
	 */
	if (!parser->cont) {
		/* skip white space */
		while (cnt && isspace(ch)) {
			ret = get_user(ch, ubuf++);
			if (ret)
				goto out;
			read++;
			cnt--;
		}

		/* only spaces were written */
		if (isspace(ch)) {
			*ppos += read;
			ret = read;
			goto out;
		}

		parser->idx = 0;
	}

	/* read the non-space input */
	while (cnt && !isspace(ch)) {
956
		if (parser->idx < parser->size - 1)
957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972
			parser->buffer[parser->idx++] = ch;
		else {
			ret = -EINVAL;
			goto out;
		}
		ret = get_user(ch, ubuf++);
		if (ret)
			goto out;
		read++;
		cnt--;
	}

	/* We either got finished input or we have to wait for another call. */
	if (isspace(ch)) {
		parser->buffer[parser->idx] = 0;
		parser->cont = false;
973
	} else if (parser->idx < parser->size - 1) {
974 975
		parser->cont = true;
		parser->buffer[parser->idx++] = ch;
976 977 978
	} else {
		ret = -EINVAL;
		goto out;
979 980 981 982 983 984 985 986 987
	}

	*ppos += read;
	ret = read;

out:
	return ret;
}

988
/* TODO add a seq_buf_to_buffer() */
989
static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
990 991 992
{
	int len;

993
	if (trace_seq_used(s) <= s->seq.readpos)
994 995
		return -EBUSY;

996
	len = trace_seq_used(s) - s->seq.readpos;
997 998
	if (cnt > len)
		cnt = len;
999
	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1000

1001
	s->seq.readpos += cnt;
1002 1003 1004
	return cnt;
}

1005 1006
unsigned long __read_mostly	tracing_thresh;

1007 1008 1009 1010 1011 1012 1013 1014 1015
#ifdef CONFIG_TRACER_MAX_TRACE
/*
 * Copy the new maximum trace into the separate maximum-trace
 * structure. (this way the maximum trace is permanently saved,
 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
 */
static void
__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
{
1016 1017 1018 1019
	struct trace_buffer *trace_buf = &tr->trace_buffer;
	struct trace_buffer *max_buf = &tr->max_buffer;
	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1020

1021 1022
	max_buf->cpu = cpu;
	max_buf->time_start = data->preempt_timestamp;
1023

1024
	max_data->saved_latency = tr->max_latency;
1025 1026
	max_data->critical_start = data->critical_start;
	max_data->critical_end = data->critical_end;
1027

1028
	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1029
	max_data->pid = tsk->pid;
1030 1031 1032 1033 1034 1035 1036 1037 1038
	/*
	 * If tsk == current, then use current_uid(), as that does not use
	 * RCU. The irq tracer can be called out of RCU scope.
	 */
	if (tsk == current)
		max_data->uid = current_uid();
	else
		max_data->uid = task_uid(tsk);

1039 1040 1041
	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
	max_data->policy = tsk->policy;
	max_data->rt_priority = tsk->rt_priority;
1042 1043 1044 1045 1046

	/* record this tasks comm */
	tracing_record_cmdline(tsk);
}

S
Steven Rostedt 已提交
1047 1048 1049 1050 1051 1052 1053 1054 1055
/**
 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
 * @tr: tracer
 * @tsk: the task with the latency
 * @cpu: The cpu that initiated the trace.
 *
 * Flip the buffers between the @tr and the max_tr and record information
 * about which task was the cause of this latency.
 */
I
Ingo Molnar 已提交
1056
void
1057 1058
update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
{
1059
	struct ring_buffer *buf;
1060

1061
	if (tr->stop_count)
1062 1063
		return;

1064
	WARN_ON_ONCE(!irqs_disabled());
1065

1066
	if (!tr->allocated_snapshot) {
1067
		/* Only the nop tracer should hit this when disabling */
1068
		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1069
		return;
1070
	}
1071

1072
	arch_spin_lock(&tr->max_lock);
1073

1074 1075 1076
	buf = tr->trace_buffer.buffer;
	tr->trace_buffer.buffer = tr->max_buffer.buffer;
	tr->max_buffer.buffer = buf;
1077

1078
	__update_max_tr(tr, tsk, cpu);
1079
	arch_spin_unlock(&tr->max_lock);
1080 1081 1082 1083 1084 1085 1086
}

/**
 * update_max_tr_single - only copy one trace over, and reset the rest
 * @tr - tracer
 * @tsk - task with the latency
 * @cpu - the cpu of the buffer to copy.
S
Steven Rostedt 已提交
1087 1088
 *
 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1089
 */
I
Ingo Molnar 已提交
1090
void
1091 1092
update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
{
1093
	int ret;
1094

1095
	if (tr->stop_count)
1096 1097
		return;

1098
	WARN_ON_ONCE(!irqs_disabled());
S
Steven Rostedt 已提交
1099
	if (!tr->allocated_snapshot) {
1100
		/* Only the nop tracer should hit this when disabling */
1101
		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1102
		return;
1103
	}
1104

1105
	arch_spin_lock(&tr->max_lock);
1106

1107
	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1108

1109 1110 1111 1112 1113 1114 1115
	if (ret == -EBUSY) {
		/*
		 * We failed to swap the buffer due to a commit taking
		 * place on this CPU. We fail to record, but we reset
		 * the max trace buffer (no one writes directly to it)
		 * and flag that it failed.
		 */
1116
		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1117 1118 1119 1120
			"Failed to swap buffers due to commit in progress\n");
	}

	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1121 1122

	__update_max_tr(tr, tsk, cpu);
1123
	arch_spin_unlock(&tr->max_lock);
1124
}
1125
#endif /* CONFIG_TRACER_MAX_TRACE */
1126

1127
static int wait_on_pipe(struct trace_iterator *iter, bool full)
1128
{
1129 1130
	/* Iterators are static, they should be filled or empty */
	if (trace_buffer_iter(iter, iter->cpu_file))
1131
		return 0;
1132

1133 1134
	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
				full);
1135 1136
}

1137 1138 1139 1140 1141 1142
#ifdef CONFIG_FTRACE_STARTUP_TEST
static int run_tracer_selftest(struct tracer *type)
{
	struct trace_array *tr = &global_trace;
	struct tracer *saved_tracer = tr->current_trace;
	int ret;
1143

1144 1145
	if (!type->selftest || tracing_selftest_disabled)
		return 0;
1146 1147

	/*
1148 1149 1150 1151 1152
	 * Run a selftest on this tracer.
	 * Here we reset the trace buffer, and set the current
	 * tracer to be this tracer. The tracer can then run some
	 * internal tracing to verify that everything is in order.
	 * If we fail, we do not register this tracer.
1153
	 */
1154
	tracing_reset_online_cpus(&tr->trace_buffer);
1155

1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184
	tr->current_trace = type;

#ifdef CONFIG_TRACER_MAX_TRACE
	if (type->use_max_tr) {
		/* If we expanded the buffers, make sure the max is expanded too */
		if (ring_buffer_expanded)
			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
					   RING_BUFFER_ALL_CPUS);
		tr->allocated_snapshot = true;
	}
#endif

	/* the test is responsible for initializing and enabling */
	pr_info("Testing tracer %s: ", type->name);
	ret = type->selftest(type, tr);
	/* the test is responsible for resetting too */
	tr->current_trace = saved_tracer;
	if (ret) {
		printk(KERN_CONT "FAILED!\n");
		/* Add the warning after printing 'FAILED' */
		WARN_ON(1);
		return -1;
	}
	/* Only reset on passing, to avoid touching corrupted buffers */
	tracing_reset_online_cpus(&tr->trace_buffer);

#ifdef CONFIG_TRACER_MAX_TRACE
	if (type->use_max_tr) {
		tr->allocated_snapshot = false;
1185

1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199
		/* Shrink the max buffer again */
		if (ring_buffer_expanded)
			ring_buffer_resize(tr->max_buffer.buffer, 1,
					   RING_BUFFER_ALL_CPUS);
	}
#endif

	printk(KERN_CONT "PASSED\n");
	return 0;
}
#else
static inline int run_tracer_selftest(struct tracer *type)
{
	return 0;
1200
}
1201
#endif /* CONFIG_FTRACE_STARTUP_TEST */
1202

1203 1204
static void add_tracer_options(struct trace_array *tr, struct tracer *t);

1205 1206
static void __init apply_trace_boot_options(void);

S
Steven Rostedt 已提交
1207 1208 1209 1210 1211 1212
/**
 * register_tracer - register a tracer with the ftrace system.
 * @type - the plugin for the tracer
 *
 * Register a new plugin tracer.
 */
1213
int __init register_tracer(struct tracer *type)
1214 1215 1216 1217 1218 1219 1220 1221 1222
{
	struct tracer *t;
	int ret = 0;

	if (!type->name) {
		pr_info("Tracer must have a name\n");
		return -1;
	}

1223
	if (strlen(type->name) >= MAX_TRACER_SIZE) {
L
Li Zefan 已提交
1224 1225 1226 1227
		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
		return -1;
	}

1228
	mutex_lock(&trace_types_lock);
I
Ingo Molnar 已提交
1229

1230 1231
	tracing_selftest_running = true;

1232 1233 1234
	for (t = trace_types; t; t = t->next) {
		if (strcmp(type->name, t->name) == 0) {
			/* already found */
L
Li Zefan 已提交
1235
			pr_info("Tracer %s already registered\n",
1236 1237 1238 1239 1240 1241
				type->name);
			ret = -1;
			goto out;
		}
	}

1242 1243
	if (!type->set_flag)
		type->set_flag = &dummy_set_flag;
1244 1245 1246
	if (!type->flags) {
		/*allocate a dummy tracer_flags*/
		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1247 1248 1249 1250
		if (!type->flags) {
			ret = -ENOMEM;
			goto out;
		}
1251 1252 1253
		type->flags->val = 0;
		type->flags->opts = dummy_tracer_opt;
	} else
1254 1255
		if (!type->flags->opts)
			type->flags->opts = dummy_tracer_opt;
1256

1257 1258 1259
	/* store the tracer for __set_tracer_option */
	type->flags->trace = type;

1260 1261 1262
	ret = run_tracer_selftest(type);
	if (ret < 0)
		goto out;
S
Steven Rostedt 已提交
1263

1264 1265
	type->next = trace_types;
	trace_types = type;
1266
	add_tracer_options(&global_trace, type);
S
Steven Rostedt 已提交
1267

1268
 out:
1269
	tracing_selftest_running = false;
1270 1271
	mutex_unlock(&trace_types_lock);

S
Steven Rostedt 已提交
1272 1273 1274
	if (ret || !default_bootup_tracer)
		goto out_unlock;

L
Li Zefan 已提交
1275
	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
S
Steven Rostedt 已提交
1276 1277 1278 1279
		goto out_unlock;

	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
	/* Do we want this tracer to start on bootup? */
1280
	tracing_set_tracer(&global_trace, type->name);
S
Steven Rostedt 已提交
1281
	default_bootup_tracer = NULL;
1282 1283 1284

	apply_trace_boot_options();

S
Steven Rostedt 已提交
1285
	/* disable other selftests, since this will break it. */
1286
	tracing_selftest_disabled = true;
1287
#ifdef CONFIG_FTRACE_STARTUP_TEST
S
Steven Rostedt 已提交
1288 1289
	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
	       type->name);
1290 1291
#endif

S
Steven Rostedt 已提交
1292
 out_unlock:
1293 1294 1295
	return ret;
}

1296
void tracing_reset(struct trace_buffer *buf, int cpu)
1297
{
1298
	struct ring_buffer *buffer = buf->buffer;
1299

1300 1301 1302
	if (!buffer)
		return;

1303 1304 1305 1306
	ring_buffer_record_disable(buffer);

	/* Make sure all commits have finished */
	synchronize_sched();
1307
	ring_buffer_reset_cpu(buffer, cpu);
1308 1309 1310 1311

	ring_buffer_record_enable(buffer);
}

1312
void tracing_reset_online_cpus(struct trace_buffer *buf)
1313
{
1314
	struct ring_buffer *buffer = buf->buffer;
1315 1316
	int cpu;

1317 1318 1319
	if (!buffer)
		return;

1320 1321 1322 1323 1324
	ring_buffer_record_disable(buffer);

	/* Make sure all commits have finished */
	synchronize_sched();

1325
	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1326 1327

	for_each_online_cpu(cpu)
1328
		ring_buffer_reset_cpu(buffer, cpu);
1329 1330

	ring_buffer_record_enable(buffer);
1331 1332
}

1333
/* Must have trace_types_lock held */
1334
void tracing_reset_all_online_cpus(void)
1335
{
1336 1337 1338
	struct trace_array *tr;

	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1339 1340 1341 1342
		tracing_reset_online_cpus(&tr->trace_buffer);
#ifdef CONFIG_TRACER_MAX_TRACE
		tracing_reset_online_cpus(&tr->max_buffer);
#endif
1343
	}
1344 1345
}

1346
#define SAVED_CMDLINES_DEFAULT 128
1347
#define NO_CMDLINE_MAP UINT_MAX
1348
static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1349 1350 1351 1352 1353 1354 1355 1356
struct saved_cmdlines_buffer {
	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
	unsigned *map_cmdline_to_pid;
	unsigned cmdline_num;
	int cmdline_idx;
	char *saved_cmdlines;
};
static struct saved_cmdlines_buffer *savedcmd;
1357 1358

/* temporary disable recording */
1359
static atomic_t trace_record_cmdline_disabled __read_mostly;
1360

1361 1362 1363 1364 1365 1366
static inline char *get_saved_cmdlines(int idx)
{
	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
}

static inline void set_cmdline(int idx, const char *cmdline)
1367
{
1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398
	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
}

static int allocate_cmdlines_buffer(unsigned int val,
				    struct saved_cmdlines_buffer *s)
{
	s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
					GFP_KERNEL);
	if (!s->map_cmdline_to_pid)
		return -ENOMEM;

	s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
	if (!s->saved_cmdlines) {
		kfree(s->map_cmdline_to_pid);
		return -ENOMEM;
	}

	s->cmdline_idx = 0;
	s->cmdline_num = val;
	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
	       sizeof(s->map_pid_to_cmdline));
	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
	       val * sizeof(*s->map_cmdline_to_pid));

	return 0;
}

static int trace_create_savedcmd(void)
{
	int ret;

1399
	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410
	if (!savedcmd)
		return -ENOMEM;

	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
	if (ret < 0) {
		kfree(savedcmd);
		savedcmd = NULL;
		return -ENOMEM;
	}

	return 0;
1411 1412
}

1413 1414
int is_tracing_stopped(void)
{
1415
	return global_trace.stop_count;
1416 1417
}

1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431
/**
 * tracing_start - quick start of the tracer
 *
 * If tracing is enabled but was stopped by tracing_stop,
 * this will start the tracer back up.
 */
void tracing_start(void)
{
	struct ring_buffer *buffer;
	unsigned long flags;

	if (tracing_disabled)
		return;

1432 1433 1434
	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
	if (--global_trace.stop_count) {
		if (global_trace.stop_count < 0) {
1435 1436
			/* Someone screwed up their debugging */
			WARN_ON_ONCE(1);
1437
			global_trace.stop_count = 0;
1438
		}
1439 1440 1441
		goto out;
	}

1442
	/* Prevent the buffers from switching */
1443
	arch_spin_lock(&global_trace.max_lock);
1444

1445
	buffer = global_trace.trace_buffer.buffer;
1446 1447 1448
	if (buffer)
		ring_buffer_record_enable(buffer);

1449 1450
#ifdef CONFIG_TRACER_MAX_TRACE
	buffer = global_trace.max_buffer.buffer;
1451 1452
	if (buffer)
		ring_buffer_record_enable(buffer);
1453
#endif
1454

1455
	arch_spin_unlock(&global_trace.max_lock);
1456

1457
 out:
1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483
	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
}

static void tracing_start_tr(struct trace_array *tr)
{
	struct ring_buffer *buffer;
	unsigned long flags;

	if (tracing_disabled)
		return;

	/* If global, we need to also start the max tracer */
	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
		return tracing_start();

	raw_spin_lock_irqsave(&tr->start_lock, flags);

	if (--tr->stop_count) {
		if (tr->stop_count < 0) {
			/* Someone screwed up their debugging */
			WARN_ON_ONCE(1);
			tr->stop_count = 0;
		}
		goto out;
	}

1484
	buffer = tr->trace_buffer.buffer;
1485 1486 1487 1488 1489
	if (buffer)
		ring_buffer_record_enable(buffer);

 out:
	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502
}

/**
 * tracing_stop - quick stop of the tracer
 *
 * Light weight way to stop tracing. Use in conjunction with
 * tracing_start.
 */
void tracing_stop(void)
{
	struct ring_buffer *buffer;
	unsigned long flags;

1503 1504
	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
	if (global_trace.stop_count++)
1505 1506
		goto out;

1507
	/* Prevent the buffers from switching */
1508
	arch_spin_lock(&global_trace.max_lock);
1509

1510
	buffer = global_trace.trace_buffer.buffer;
1511 1512 1513
	if (buffer)
		ring_buffer_record_disable(buffer);

1514 1515
#ifdef CONFIG_TRACER_MAX_TRACE
	buffer = global_trace.max_buffer.buffer;
1516 1517
	if (buffer)
		ring_buffer_record_disable(buffer);
1518
#endif
1519

1520
	arch_spin_unlock(&global_trace.max_lock);
1521

1522
 out:
1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538
	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
}

static void tracing_stop_tr(struct trace_array *tr)
{
	struct ring_buffer *buffer;
	unsigned long flags;

	/* If global, we need to also stop the max tracer */
	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
		return tracing_stop();

	raw_spin_lock_irqsave(&tr->start_lock, flags);
	if (tr->stop_count++)
		goto out;

1539
	buffer = tr->trace_buffer.buffer;
1540 1541 1542 1543 1544
	if (buffer)
		ring_buffer_record_disable(buffer);

 out:
	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1545 1546
}

I
Ingo Molnar 已提交
1547
void trace_stop_cmdline_recording(void);
1548

1549
static int trace_save_cmdline(struct task_struct *tsk)
1550
{
1551
	unsigned pid, idx;
1552 1553

	if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1554
		return 0;
1555 1556 1557 1558 1559 1560 1561

	/*
	 * It's not the end of the world if we don't get
	 * the lock, but we also don't want to spin
	 * nor do we want to disable interrupts,
	 * so if we miss here, then better luck next time.
	 */
1562
	if (!arch_spin_trylock(&trace_cmdline_lock))
1563
		return 0;
1564

1565
	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1566
	if (idx == NO_CMDLINE_MAP) {
1567
		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1568

1569 1570 1571 1572 1573 1574
		/*
		 * Check whether the cmdline buffer at idx has a pid
		 * mapped. We are going to overwrite that entry so we
		 * need to clear the map_pid_to_cmdline. Otherwise we
		 * would read the new comm for the old pid.
		 */
1575
		pid = savedcmd->map_cmdline_to_pid[idx];
1576
		if (pid != NO_CMDLINE_MAP)
1577
			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1578

1579 1580
		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1581

1582
		savedcmd->cmdline_idx = idx;
1583 1584
	}

1585
	set_cmdline(idx, tsk->comm);
1586

1587
	arch_spin_unlock(&trace_cmdline_lock);
1588 1589

	return 1;
1590 1591
}

1592
static void __trace_find_cmdline(int pid, char comm[])
1593 1594 1595
{
	unsigned map;

1596 1597 1598 1599
	if (!pid) {
		strcpy(comm, "<idle>");
		return;
	}
1600

1601 1602 1603 1604 1605
	if (WARN_ON_ONCE(pid < 0)) {
		strcpy(comm, "<XXX>");
		return;
	}

1606 1607 1608 1609
	if (pid > PID_MAX_DEFAULT) {
		strcpy(comm, "<...>");
		return;
	}
1610

1611
	map = savedcmd->map_pid_to_cmdline[pid];
1612
	if (map != NO_CMDLINE_MAP)
1613
		strcpy(comm, get_saved_cmdlines(map));
1614 1615
	else
		strcpy(comm, "<...>");
1616 1617 1618 1619 1620 1621 1622 1623
}

void trace_find_cmdline(int pid, char comm[])
{
	preempt_disable();
	arch_spin_lock(&trace_cmdline_lock);

	__trace_find_cmdline(pid, comm);
1624

1625
	arch_spin_unlock(&trace_cmdline_lock);
1626
	preempt_enable();
1627 1628
}

I
Ingo Molnar 已提交
1629
void tracing_record_cmdline(struct task_struct *tsk)
1630
{
1631
	if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1632 1633
		return;

1634 1635 1636
	if (!__this_cpu_read(trace_cmdline_save))
		return;

1637 1638
	if (trace_save_cmdline(tsk))
		__this_cpu_write(trace_cmdline_save, false);
1639 1640
}

1641
void
1642 1643
tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
			     int pc)
1644 1645 1646
{
	struct task_struct *tsk = current;

1647 1648 1649
	entry->preempt_count		= pc & 0xff;
	entry->pid			= (tsk) ? tsk->pid : 0;
	entry->flags =
1650
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1651
		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1652 1653 1654
#else
		TRACE_FLAG_IRQS_NOSUPPORT |
#endif
1655
		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1656 1657
		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1658 1659
		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1660
}
1661
EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1662

1663 1664 1665 1666 1667 1668 1669 1670 1671 1672
static __always_inline void
trace_event_setup(struct ring_buffer_event *event,
		  int type, unsigned long flags, int pc)
{
	struct trace_entry *ent = ring_buffer_event_data(event);

	tracing_generic_entry_update(ent, flags, pc);
	ent->type = type;
}

1673 1674 1675 1676 1677
struct ring_buffer_event *
trace_buffer_lock_reserve(struct ring_buffer *buffer,
			  int type,
			  unsigned long len,
			  unsigned long flags, int pc)
1678 1679 1680
{
	struct ring_buffer_event *event;

1681
	event = ring_buffer_lock_reserve(buffer, len);
1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710
	if (event != NULL)
		trace_event_setup(event, type, flags, pc);

	return event;
}

DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
DEFINE_PER_CPU(int, trace_buffered_event_cnt);
static int trace_buffered_event_ref;

/**
 * trace_buffered_event_enable - enable buffering events
 *
 * When events are being filtered, it is quicker to use a temporary
 * buffer to write the event data into if there's a likely chance
 * that it will not be committed. The discard of the ring buffer
 * is not as fast as committing, and is much slower than copying
 * a commit.
 *
 * When an event is to be filtered, allocate per cpu buffers to
 * write the event data into, and if the event is filtered and discarded
 * it is simply dropped, otherwise, the entire data is to be committed
 * in one shot.
 */
void trace_buffered_event_enable(void)
{
	struct ring_buffer_event *event;
	struct page *page;
	int cpu;
1711

1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733
	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));

	if (trace_buffered_event_ref++)
		return;

	for_each_tracing_cpu(cpu) {
		page = alloc_pages_node(cpu_to_node(cpu),
					GFP_KERNEL | __GFP_NORETRY, 0);
		if (!page)
			goto failed;

		event = page_address(page);
		memset(event, 0, sizeof(*event));

		per_cpu(trace_buffered_event, cpu) = event;

		preempt_disable();
		if (cpu == smp_processor_id() &&
		    this_cpu_read(trace_buffered_event) !=
		    per_cpu(trace_buffered_event, cpu))
			WARN_ON_ONCE(1);
		preempt_enable();
1734 1735
	}

1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796
	return;
 failed:
	trace_buffered_event_disable();
}

static void enable_trace_buffered_event(void *data)
{
	/* Probably not needed, but do it anyway */
	smp_rmb();
	this_cpu_dec(trace_buffered_event_cnt);
}

static void disable_trace_buffered_event(void *data)
{
	this_cpu_inc(trace_buffered_event_cnt);
}

/**
 * trace_buffered_event_disable - disable buffering events
 *
 * When a filter is removed, it is faster to not use the buffered
 * events, and to commit directly into the ring buffer. Free up
 * the temp buffers when there are no more users. This requires
 * special synchronization with current events.
 */
void trace_buffered_event_disable(void)
{
	int cpu;

	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));

	if (WARN_ON_ONCE(!trace_buffered_event_ref))
		return;

	if (--trace_buffered_event_ref)
		return;

	preempt_disable();
	/* For each CPU, set the buffer as used. */
	smp_call_function_many(tracing_buffer_mask,
			       disable_trace_buffered_event, NULL, 1);
	preempt_enable();

	/* Wait for all current users to finish */
	synchronize_sched();

	for_each_tracing_cpu(cpu) {
		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
		per_cpu(trace_buffered_event, cpu) = NULL;
	}
	/*
	 * Make sure trace_buffered_event is NULL before clearing
	 * trace_buffered_event_cnt.
	 */
	smp_wmb();

	preempt_disable();
	/* Do the work on each cpu */
	smp_call_function_many(tracing_buffer_mask,
			       enable_trace_buffered_event, NULL, 1);
	preempt_enable();
1797 1798
}

1799 1800 1801 1802
void
__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
{
	__this_cpu_write(trace_cmdline_save, true);
1803 1804 1805 1806 1807 1808 1809 1810 1811

	/* If this is the temp buffer, we need to commit fully */
	if (this_cpu_read(trace_buffered_event) == event) {
		/* Length is in event->array[0] */
		ring_buffer_write(buffer, event->array[0], &event->array[1]);
		/* Release the temp buffer */
		this_cpu_dec(trace_buffered_event_cnt);
	} else
		ring_buffer_unlock_commit(buffer, event);
1812 1813
}

1814 1815
static struct ring_buffer *temp_buffer;

1816 1817
struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1818
			  struct trace_event_file *trace_file,
1819 1820 1821
			  int type, unsigned long len,
			  unsigned long flags, int pc)
{
1822
	struct ring_buffer_event *entry;
1823
	int val;
1824

1825
	*current_rb = trace_file->tr->trace_buffer.buffer;
1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839

	if ((trace_file->flags &
	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
	    (entry = this_cpu_read(trace_buffered_event))) {
		/* Try to use the per cpu buffer first */
		val = this_cpu_inc_return(trace_buffered_event_cnt);
		if (val == 1) {
			trace_event_setup(entry, type, flags, pc);
			entry->array[0] = len;
			return entry;
		}
		this_cpu_dec(trace_buffered_event_cnt);
	}

1840
	entry = trace_buffer_lock_reserve(*current_rb,
1841
					 type, len, flags, pc);
1842 1843 1844 1845 1846 1847
	/*
	 * If tracing is off, but we have triggers enabled
	 * we still need to look at the event data. Use the temp_buffer
	 * to store the trace event for the tigger to use. It's recusive
	 * safe and will not be recorded anywhere.
	 */
1848
	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
1849 1850 1851 1852 1853
		*current_rb = temp_buffer;
		entry = trace_buffer_lock_reserve(*current_rb,
						  type, len, flags, pc);
	}
	return entry;
1854 1855 1856
}
EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);

1857 1858
void trace_buffer_unlock_commit_regs(struct trace_array *tr,
				     struct ring_buffer *buffer,
1859 1860 1861
				     struct ring_buffer_event *event,
				     unsigned long flags, int pc,
				     struct pt_regs *regs)
1862
{
1863
	__buffer_unlock_commit(buffer, event);
1864

1865
	ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
1866 1867 1868
	ftrace_trace_userstack(buffer, flags, pc);
}

I
Ingo Molnar 已提交
1869
void
1870
trace_function(struct trace_array *tr,
1871 1872
	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
	       int pc)
1873
{
1874
	struct trace_event_call *call = &event_function;
1875
	struct ring_buffer *buffer = tr->trace_buffer.buffer;
1876
	struct ring_buffer_event *event;
1877
	struct ftrace_entry *entry;
1878

1879
	event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1880
					  flags, pc);
1881 1882 1883
	if (!event)
		return;
	entry	= ring_buffer_event_data(event);
1884 1885
	entry->ip			= ip;
	entry->parent_ip		= parent_ip;
1886

1887
	if (!call_filter_check_discard(call, entry, buffer, event))
1888
		__buffer_unlock_commit(buffer, event);
1889 1890
}

1891
#ifdef CONFIG_STACKTRACE
1892 1893 1894 1895 1896 1897 1898 1899 1900

#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
struct ftrace_stack {
	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
};

static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
static DEFINE_PER_CPU(int, ftrace_stack_reserve);

1901
static void __ftrace_trace_stack(struct ring_buffer *buffer,
1902
				 unsigned long flags,
1903
				 int skip, int pc, struct pt_regs *regs)
I
Ingo Molnar 已提交
1904
{
1905
	struct trace_event_call *call = &event_kernel_stack;
1906
	struct ring_buffer_event *event;
1907
	struct stack_entry *entry;
I
Ingo Molnar 已提交
1908
	struct stack_trace trace;
1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922
	int use_stack;
	int size = FTRACE_STACK_ENTRIES;

	trace.nr_entries	= 0;
	trace.skip		= skip;

	/*
	 * Since events can happen in NMIs there's no safe way to
	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
	 * or NMI comes in, it will just have to use the default
	 * FTRACE_STACK_SIZE.
	 */
	preempt_disable_notrace();

1923
	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1924 1925 1926 1927 1928 1929 1930 1931 1932
	/*
	 * We don't need any atomic variables, just a barrier.
	 * If an interrupt comes in, we don't care, because it would
	 * have exited and put the counter back to what we want.
	 * We just need a barrier to keep gcc from moving things
	 * around.
	 */
	barrier();
	if (use_stack == 1) {
1933
		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947
		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;

		if (regs)
			save_stack_trace_regs(regs, &trace);
		else
			save_stack_trace(&trace);

		if (trace.nr_entries > size)
			size = trace.nr_entries;
	} else
		/* From now on, use_stack is a boolean */
		use_stack = 0;

	size *= sizeof(unsigned long);
I
Ingo Molnar 已提交
1948

1949
	event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1950
					  sizeof(*entry) + size, flags, pc);
1951
	if (!event)
1952 1953
		goto out;
	entry = ring_buffer_event_data(event);
I
Ingo Molnar 已提交
1954

1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969
	memset(&entry->caller, 0, size);

	if (use_stack)
		memcpy(&entry->caller, trace.entries,
		       trace.nr_entries * sizeof(unsigned long));
	else {
		trace.max_entries	= FTRACE_STACK_ENTRIES;
		trace.entries		= entry->caller;
		if (regs)
			save_stack_trace_regs(regs, &trace);
		else
			save_stack_trace(&trace);
	}

	entry->size = trace.nr_entries;
I
Ingo Molnar 已提交
1970

1971
	if (!call_filter_check_discard(call, entry, buffer, event))
1972
		__buffer_unlock_commit(buffer, event);
1973 1974 1975 1976

 out:
	/* Again, don't let gcc optimize things here */
	barrier();
1977
	__this_cpu_dec(ftrace_stack_reserve);
1978 1979
	preempt_enable_notrace();

I
Ingo Molnar 已提交
1980 1981
}

1982 1983
static inline void ftrace_trace_stack(struct trace_array *tr,
				      struct ring_buffer *buffer,
1984 1985
				      unsigned long flags,
				      int skip, int pc, struct pt_regs *regs)
1986
{
1987
	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
1988 1989
		return;

1990
	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
1991 1992
}

1993 1994
void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
		   int pc)
1995
{
1996
	__ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1997 1998
}

S
Steven Rostedt 已提交
1999 2000
/**
 * trace_dump_stack - record a stack back trace in the trace buffer
2001
 * @skip: Number of functions to skip (helper handlers)
S
Steven Rostedt 已提交
2002
 */
2003
void trace_dump_stack(int skip)
S
Steven Rostedt 已提交
2004 2005 2006 2007
{
	unsigned long flags;

	if (tracing_disabled || tracing_selftest_running)
2008
		return;
S
Steven Rostedt 已提交
2009 2010 2011

	local_save_flags(flags);

2012 2013 2014 2015 2016 2017 2018
	/*
	 * Skip 3 more, seems to get us at the caller of
	 * this function.
	 */
	skip += 3;
	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
			     flags, skip, preempt_count(), NULL);
S
Steven Rostedt 已提交
2019 2020
}

2021 2022
static DEFINE_PER_CPU(int, user_stack_count);

2023 2024
void
ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2025
{
2026
	struct trace_event_call *call = &event_user_stack;
2027
	struct ring_buffer_event *event;
2028 2029 2030
	struct userstack_entry *entry;
	struct stack_trace trace;

2031
	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2032 2033
		return;

2034 2035 2036 2037 2038 2039
	/*
	 * NMIs can not handle page faults, even with fix ups.
	 * The save user stack can (and often does) fault.
	 */
	if (unlikely(in_nmi()))
		return;
2040

2041 2042 2043 2044 2045 2046 2047 2048 2049 2050
	/*
	 * prevent recursion, since the user stack tracing may
	 * trigger other kernel events.
	 */
	preempt_disable();
	if (__this_cpu_read(user_stack_count))
		goto out;

	__this_cpu_inc(user_stack_count);

2051
	event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2052
					  sizeof(*entry), flags, pc);
2053
	if (!event)
L
Li Zefan 已提交
2054
		goto out_drop_count;
2055 2056
	entry	= ring_buffer_event_data(event);

2057
	entry->tgid		= current->tgid;
2058 2059 2060 2061 2062 2063 2064 2065
	memset(&entry->caller, 0, sizeof(entry->caller));

	trace.nr_entries	= 0;
	trace.max_entries	= FTRACE_STACK_ENTRIES;
	trace.skip		= 0;
	trace.entries		= entry->caller;

	save_stack_trace_user(&trace);
2066
	if (!call_filter_check_discard(call, entry, buffer, event))
2067
		__buffer_unlock_commit(buffer, event);
2068

L
Li Zefan 已提交
2069
 out_drop_count:
2070 2071 2072
	__this_cpu_dec(user_stack_count);
 out:
	preempt_enable();
2073 2074
}

2075 2076
#ifdef UNUSED
static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2077
{
2078
	ftrace_trace_userstack(tr, flags, preempt_count());
2079
}
2080
#endif /* UNUSED */
2081

2082 2083
#endif /* CONFIG_STACKTRACE */

2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120
/* created for use with alloc_percpu */
struct trace_buffer_struct {
	char buffer[TRACE_BUF_SIZE];
};

static struct trace_buffer_struct *trace_percpu_buffer;
static struct trace_buffer_struct *trace_percpu_sirq_buffer;
static struct trace_buffer_struct *trace_percpu_irq_buffer;
static struct trace_buffer_struct *trace_percpu_nmi_buffer;

/*
 * The buffer used is dependent on the context. There is a per cpu
 * buffer for normal context, softirq contex, hard irq context and
 * for NMI context. Thise allows for lockless recording.
 *
 * Note, if the buffers failed to be allocated, then this returns NULL
 */
static char *get_trace_buf(void)
{
	struct trace_buffer_struct *percpu_buffer;

	/*
	 * If we have allocated per cpu buffers, then we do not
	 * need to do any locking.
	 */
	if (in_nmi())
		percpu_buffer = trace_percpu_nmi_buffer;
	else if (in_irq())
		percpu_buffer = trace_percpu_irq_buffer;
	else if (in_softirq())
		percpu_buffer = trace_percpu_sirq_buffer;
	else
		percpu_buffer = trace_percpu_buffer;

	if (!percpu_buffer)
		return NULL;

2121
	return this_cpu_ptr(&percpu_buffer->buffer[0]);
2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164
}

static int alloc_percpu_trace_buffer(void)
{
	struct trace_buffer_struct *buffers;
	struct trace_buffer_struct *sirq_buffers;
	struct trace_buffer_struct *irq_buffers;
	struct trace_buffer_struct *nmi_buffers;

	buffers = alloc_percpu(struct trace_buffer_struct);
	if (!buffers)
		goto err_warn;

	sirq_buffers = alloc_percpu(struct trace_buffer_struct);
	if (!sirq_buffers)
		goto err_sirq;

	irq_buffers = alloc_percpu(struct trace_buffer_struct);
	if (!irq_buffers)
		goto err_irq;

	nmi_buffers = alloc_percpu(struct trace_buffer_struct);
	if (!nmi_buffers)
		goto err_nmi;

	trace_percpu_buffer = buffers;
	trace_percpu_sirq_buffer = sirq_buffers;
	trace_percpu_irq_buffer = irq_buffers;
	trace_percpu_nmi_buffer = nmi_buffers;

	return 0;

 err_nmi:
	free_percpu(irq_buffers);
 err_irq:
	free_percpu(sirq_buffers);
 err_sirq:
	free_percpu(buffers);
 err_warn:
	WARN(1, "Could not allocate percpu trace_printk buffer");
	return -ENOMEM;
}

2165 2166
static int buffers_allocated;

2167 2168 2169 2170 2171 2172 2173 2174
void trace_printk_init_buffers(void)
{
	if (buffers_allocated)
		return;

	if (alloc_percpu_trace_buffer())
		return;

2175 2176
	/* trace_printk() is for debug use only. Don't use it in production. */

2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190
	pr_warn("\n");
	pr_warn("**********************************************************\n");
	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
	pr_warn("**                                                      **\n");
	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
	pr_warn("**                                                      **\n");
	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
	pr_warn("** unsafe for production use.                           **\n");
	pr_warn("**                                                      **\n");
	pr_warn("** If you see this message and you are not debugging    **\n");
	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
	pr_warn("**                                                      **\n");
	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
	pr_warn("**********************************************************\n");
2191

2192 2193 2194
	/* Expand the buffers to set size */
	tracing_update_buffers();

2195
	buffers_allocated = 1;
2196 2197 2198 2199 2200 2201 2202

	/*
	 * trace_printk_init_buffers() can be called by modules.
	 * If that happens, then we need to start cmdline recording
	 * directly here. If the global_trace.buffer is already
	 * allocated here, then this was called by module code.
	 */
2203
	if (global_trace.trace_buffer.buffer)
2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223
		tracing_start_cmdline_record();
}

void trace_printk_start_comm(void)
{
	/* Start tracing comms if trace printk is set */
	if (!buffers_allocated)
		return;
	tracing_start_cmdline_record();
}

static void trace_printk_start_stop_comm(int enabled)
{
	if (!buffers_allocated)
		return;

	if (enabled)
		tracing_start_cmdline_record();
	else
		tracing_stop_cmdline_record();
2224 2225
}

2226
/**
2227
 * trace_vbprintk - write binary msg to tracing buffer
2228 2229
 *
 */
2230
int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2231
{
2232
	struct trace_event_call *call = &event_bprint;
2233
	struct ring_buffer_event *event;
2234
	struct ring_buffer *buffer;
2235
	struct trace_array *tr = &global_trace;
2236
	struct bprint_entry *entry;
2237
	unsigned long flags;
2238 2239
	char *tbuffer;
	int len = 0, size, pc;
2240 2241 2242 2243 2244 2245 2246 2247

	if (unlikely(tracing_selftest_running || tracing_disabled))
		return 0;

	/* Don't pollute graph traces with trace_vprintk internals */
	pause_graph_tracing();

	pc = preempt_count();
2248
	preempt_disable_notrace();
2249

2250 2251 2252
	tbuffer = get_trace_buf();
	if (!tbuffer) {
		len = 0;
2253
		goto out;
2254
	}
2255

2256
	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2257

2258 2259
	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
		goto out;
2260

2261
	local_save_flags(flags);
2262
	size = sizeof(*entry) + sizeof(u32) * len;
2263
	buffer = tr->trace_buffer.buffer;
2264 2265
	event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
					  flags, pc);
2266
	if (!event)
2267
		goto out;
2268 2269 2270 2271
	entry = ring_buffer_event_data(event);
	entry->ip			= ip;
	entry->fmt			= fmt;

2272
	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2273
	if (!call_filter_check_discard(call, entry, buffer, event)) {
2274
		__buffer_unlock_commit(buffer, event);
2275
		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2276
	}
2277 2278

out:
2279
	preempt_enable_notrace();
2280 2281 2282 2283
	unpause_graph_tracing();

	return len;
}
2284 2285
EXPORT_SYMBOL_GPL(trace_vbprintk);

2286 2287 2288
static int
__trace_array_vprintk(struct ring_buffer *buffer,
		      unsigned long ip, const char *fmt, va_list args)
2289
{
2290
	struct trace_event_call *call = &event_print;
2291
	struct ring_buffer_event *event;
2292
	int len = 0, size, pc;
2293
	struct print_entry *entry;
2294 2295
	unsigned long flags;
	char *tbuffer;
2296 2297 2298 2299

	if (tracing_disabled || tracing_selftest_running)
		return 0;

2300 2301 2302
	/* Don't pollute graph traces with trace_vprintk internals */
	pause_graph_tracing();

2303 2304 2305
	pc = preempt_count();
	preempt_disable_notrace();

2306 2307 2308 2309

	tbuffer = get_trace_buf();
	if (!tbuffer) {
		len = 0;
2310
		goto out;
2311
	}
2312

2313
	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2314

2315
	local_save_flags(flags);
2316
	size = sizeof(*entry) + len + 1;
2317
	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2318
					  flags, pc);
2319
	if (!event)
2320
		goto out;
2321
	entry = ring_buffer_event_data(event);
C
Carsten Emde 已提交
2322
	entry->ip = ip;
2323

2324
	memcpy(&entry->buf, tbuffer, len + 1);
2325
	if (!call_filter_check_discard(call, entry, buffer, event)) {
2326
		__buffer_unlock_commit(buffer, event);
2327
		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2328
	}
2329 2330
 out:
	preempt_enable_notrace();
2331
	unpause_graph_tracing();
2332 2333 2334

	return len;
}
2335

2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347
int trace_array_vprintk(struct trace_array *tr,
			unsigned long ip, const char *fmt, va_list args)
{
	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
}

int trace_array_printk(struct trace_array *tr,
		       unsigned long ip, const char *fmt, ...)
{
	int ret;
	va_list ap;

2348
	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362
		return 0;

	va_start(ap, fmt);
	ret = trace_array_vprintk(tr, ip, fmt, ap);
	va_end(ap);
	return ret;
}

int trace_array_printk_buf(struct ring_buffer *buffer,
			   unsigned long ip, const char *fmt, ...)
{
	int ret;
	va_list ap;

2363
	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2364 2365 2366 2367 2368 2369 2370 2371
		return 0;

	va_start(ap, fmt);
	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
	va_end(ap);
	return ret;
}

2372 2373
int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
{
S
Steven Rostedt 已提交
2374
	return trace_array_vprintk(&global_trace, ip, fmt, args);
2375
}
2376 2377
EXPORT_SYMBOL_GPL(trace_vprintk);

2378
static void trace_iterator_increment(struct trace_iterator *iter)
S
Steven Rostedt 已提交
2379
{
2380 2381
	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);

S
Steven Rostedt 已提交
2382
	iter->idx++;
2383 2384
	if (buf_iter)
		ring_buffer_read(buf_iter, NULL);
S
Steven Rostedt 已提交
2385 2386
}

I
Ingo Molnar 已提交
2387
static struct trace_entry *
2388 2389
peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
		unsigned long *lost_events)
2390
{
2391
	struct ring_buffer_event *event;
2392
	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2393

2394 2395 2396
	if (buf_iter)
		event = ring_buffer_iter_peek(buf_iter, ts);
	else
2397
		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2398
					 lost_events);
2399

2400 2401 2402 2403 2404 2405
	if (event) {
		iter->ent_size = ring_buffer_event_length(event);
		return ring_buffer_event_data(event);
	}
	iter->ent_size = 0;
	return NULL;
2406
}
2407

2408
static struct trace_entry *
2409 2410
__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
		  unsigned long *missing_events, u64 *ent_ts)
2411
{
2412
	struct ring_buffer *buffer = iter->trace_buffer->buffer;
2413
	struct trace_entry *ent, *next = NULL;
2414
	unsigned long lost_events = 0, next_lost = 0;
2415
	int cpu_file = iter->cpu_file;
2416
	u64 next_ts = 0, ts;
2417
	int next_cpu = -1;
2418
	int next_size = 0;
2419 2420
	int cpu;

2421 2422 2423 2424
	/*
	 * If we are in a per_cpu trace file, don't bother by iterating over
	 * all cpu and peek directly.
	 */
2425
	if (cpu_file > RING_BUFFER_ALL_CPUS) {
2426 2427
		if (ring_buffer_empty_cpu(buffer, cpu_file))
			return NULL;
2428
		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2429 2430 2431 2432 2433 2434
		if (ent_cpu)
			*ent_cpu = cpu_file;

		return ent;
	}

2435
	for_each_tracing_cpu(cpu) {
2436

2437 2438
		if (ring_buffer_empty_cpu(buffer, cpu))
			continue;
2439

2440
		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2441

I
Ingo Molnar 已提交
2442 2443 2444
		/*
		 * Pick the entry with the smallest timestamp:
		 */
2445
		if (ent && (!next || ts < next_ts)) {
2446 2447
			next = ent;
			next_cpu = cpu;
2448
			next_ts = ts;
2449
			next_lost = lost_events;
2450
			next_size = iter->ent_size;
2451 2452 2453
		}
	}

2454 2455
	iter->ent_size = next_size;

2456 2457 2458
	if (ent_cpu)
		*ent_cpu = next_cpu;

2459 2460 2461
	if (ent_ts)
		*ent_ts = next_ts;

2462 2463 2464
	if (missing_events)
		*missing_events = next_lost;

2465 2466 2467
	return next;
}

2468
/* Find the next real entry, without updating the iterator itself */
2469 2470
struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
					  int *ent_cpu, u64 *ent_ts)
2471
{
2472
	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2473 2474 2475
}

/* Find the next real entry, and increment the iterator to the next entry */
2476
void *trace_find_next_entry_inc(struct trace_iterator *iter)
2477
{
2478 2479
	iter->ent = __find_next_entry(iter, &iter->cpu,
				      &iter->lost_events, &iter->ts);
2480

2481
	if (iter->ent)
2482
		trace_iterator_increment(iter);
2483

2484
	return iter->ent ? iter : NULL;
2485
}
2486

I
Ingo Molnar 已提交
2487
static void trace_consume(struct trace_iterator *iter)
2488
{
2489
	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2490
			    &iter->lost_events);
2491 2492
}

I
Ingo Molnar 已提交
2493
static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2494 2495 2496
{
	struct trace_iterator *iter = m->private;
	int i = (int)*pos;
I
Ingo Molnar 已提交
2497
	void *ent;
2498

2499 2500
	WARN_ON_ONCE(iter->leftover);

2501 2502 2503 2504 2505 2506 2507
	(*pos)++;

	/* can't go backwards */
	if (iter->idx > i)
		return NULL;

	if (iter->idx < 0)
2508
		ent = trace_find_next_entry_inc(iter);
2509 2510 2511 2512
	else
		ent = iter;

	while (ent && iter->idx < i)
2513
		ent = trace_find_next_entry_inc(iter);
2514 2515 2516 2517 2518 2519

	iter->pos = *pos;

	return ent;
}

2520
void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2521 2522 2523 2524 2525 2526
{
	struct ring_buffer_event *event;
	struct ring_buffer_iter *buf_iter;
	unsigned long entries = 0;
	u64 ts;

2527
	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2528

2529 2530
	buf_iter = trace_buffer_iter(iter, cpu);
	if (!buf_iter)
2531 2532 2533 2534 2535 2536 2537 2538 2539 2540
		return;

	ring_buffer_iter_reset(buf_iter);

	/*
	 * We could have the case with the max latency tracers
	 * that a reset never took place on a cpu. This is evident
	 * by the timestamp being before the start of the buffer.
	 */
	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2541
		if (ts >= iter->trace_buffer->time_start)
2542 2543 2544 2545 2546
			break;
		entries++;
		ring_buffer_read(buf_iter, NULL);
	}

2547
	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2548 2549
}

2550 2551 2552 2553
/*
 * The current tracer is copied to avoid a global locking
 * all around.
 */
2554 2555 2556
static void *s_start(struct seq_file *m, loff_t *pos)
{
	struct trace_iterator *iter = m->private;
2557
	struct trace_array *tr = iter->tr;
2558
	int cpu_file = iter->cpu_file;
2559 2560
	void *p = NULL;
	loff_t l = 0;
2561
	int cpu;
2562

2563 2564 2565 2566 2567 2568
	/*
	 * copy the tracer to avoid using a global lock all around.
	 * iter->trace is a copy of current_trace, the pointer to the
	 * name may be used instead of a strcmp(), as iter->trace->name
	 * will point to the same string as current_trace->name.
	 */
2569
	mutex_lock(&trace_types_lock);
2570 2571
	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
		*iter->trace = *tr->current_trace;
2572
	mutex_unlock(&trace_types_lock);
2573

2574
#ifdef CONFIG_TRACER_MAX_TRACE
2575 2576
	if (iter->snapshot && iter->trace->use_max_tr)
		return ERR_PTR(-EBUSY);
2577
#endif
2578 2579 2580

	if (!iter->snapshot)
		atomic_inc(&trace_record_cmdline_disabled);
2581 2582 2583 2584 2585 2586

	if (*pos != iter->pos) {
		iter->ent = NULL;
		iter->cpu = 0;
		iter->idx = -1;

2587
		if (cpu_file == RING_BUFFER_ALL_CPUS) {
2588
			for_each_tracing_cpu(cpu)
2589
				tracing_iter_reset(iter, cpu);
2590
		} else
2591
			tracing_iter_reset(iter, cpu_file);
2592

2593
		iter->leftover = 0;
2594 2595 2596 2597
		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
			;

	} else {
2598 2599 2600 2601 2602 2603 2604 2605 2606 2607
		/*
		 * If we overflowed the seq_file before, then we want
		 * to just reuse the trace_seq buffer again.
		 */
		if (iter->leftover)
			p = iter;
		else {
			l = *pos - 1;
			p = s_next(m, p, &l);
		}
2608 2609
	}

2610
	trace_event_read_lock();
2611
	trace_access_lock(cpu_file);
2612 2613 2614 2615 2616
	return p;
}

static void s_stop(struct seq_file *m, void *p)
{
2617 2618
	struct trace_iterator *iter = m->private;

2619
#ifdef CONFIG_TRACER_MAX_TRACE
2620 2621
	if (iter->snapshot && iter->trace->use_max_tr)
		return;
2622
#endif
2623 2624 2625

	if (!iter->snapshot)
		atomic_dec(&trace_record_cmdline_disabled);
2626

2627
	trace_access_unlock(iter->cpu_file);
2628
	trace_event_read_unlock();
2629 2630
}

2631
static void
2632 2633
get_total_entries(struct trace_buffer *buf,
		  unsigned long *total, unsigned long *entries)
2634 2635 2636 2637 2638 2639 2640 2641
{
	unsigned long count;
	int cpu;

	*total = 0;
	*entries = 0;

	for_each_tracing_cpu(cpu) {
2642
		count = ring_buffer_entries_cpu(buf->buffer, cpu);
2643 2644 2645 2646 2647
		/*
		 * If this buffer has skipped entries, then we hold all
		 * entries for the trace and we need to ignore the
		 * ones before the time stamp.
		 */
2648 2649
		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2650 2651 2652 2653
			/* total is the same as the entries */
			*total += count;
		} else
			*total += count +
2654
				ring_buffer_overrun_cpu(buf->buffer, cpu);
2655 2656 2657 2658
		*entries += count;
	}
}

I
Ingo Molnar 已提交
2659
static void print_lat_help_header(struct seq_file *m)
2660
{
2661 2662 2663 2664 2665 2666 2667 2668
	seq_puts(m, "#                  _------=> CPU#            \n"
		    "#                 / _-----=> irqs-off        \n"
		    "#                | / _----=> need-resched    \n"
		    "#                || / _---=> hardirq/softirq \n"
		    "#                ||| / _--=> preempt-depth   \n"
		    "#                |||| /     delay            \n"
		    "#  cmd     pid   ||||| time  |   caller      \n"
		    "#     \\   /      |||||  \\    |   /         \n");
2669 2670
}

2671
static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2672
{
2673 2674 2675
	unsigned long total;
	unsigned long entries;

2676
	get_total_entries(buf, &total, &entries);
2677 2678 2679 2680 2681
	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
		   entries, total, num_online_cpus());
	seq_puts(m, "#\n");
}

2682
static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2683
{
2684
	print_event_info(buf, m);
2685 2686
	seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
		    "#              | |       |          |         |\n");
2687 2688
}

2689
static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2690
{
2691
	print_event_info(buf, m);
2692 2693 2694 2695 2696 2697 2698
	seq_puts(m, "#                              _-----=> irqs-off\n"
		    "#                             / _----=> need-resched\n"
		    "#                            | / _---=> hardirq/softirq\n"
		    "#                            || / _--=> preempt-depth\n"
		    "#                            ||| /     delay\n"
		    "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
		    "#              | |       |   ||||       |         |\n");
2699
}
2700

2701
void
2702 2703
print_trace_header(struct seq_file *m, struct trace_iterator *iter)
{
2704
	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2705 2706
	struct trace_buffer *buf = iter->trace_buffer;
	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2707
	struct tracer *type = iter->trace;
2708 2709
	unsigned long entries;
	unsigned long total;
2710 2711
	const char *name = "preemption";

2712
	name = type->name;
2713

2714
	get_total_entries(buf, &total, &entries);
2715

2716
	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2717
		   name, UTS_RELEASE);
2718
	seq_puts(m, "# -----------------------------------"
2719
		 "---------------------------------\n");
2720
	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2721
		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
S
Steven Rostedt 已提交
2722
		   nsecs_to_usecs(data->saved_latency),
2723
		   entries,
2724
		   total,
2725
		   buf->cpu,
2726 2727 2728 2729
#if defined(CONFIG_PREEMPT_NONE)
		   "server",
#elif defined(CONFIG_PREEMPT_VOLUNTARY)
		   "desktop",
2730
#elif defined(CONFIG_PREEMPT)
2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741
		   "preempt",
#else
		   "unknown",
#endif
		   /* These are reserved for later use */
		   0, 0, 0, 0);
#ifdef CONFIG_SMP
	seq_printf(m, " #P:%d)\n", num_online_cpus());
#else
	seq_puts(m, ")\n");
#endif
2742 2743
	seq_puts(m, "#    -----------------\n");
	seq_printf(m, "#    | task: %.16s-%d "
2744
		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2745 2746
		   data->comm, data->pid,
		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2747
		   data->policy, data->rt_priority);
2748
	seq_puts(m, "#    -----------------\n");
2749 2750

	if (data->critical_start) {
2751
		seq_puts(m, "#  => started at: ");
S
Steven Rostedt 已提交
2752 2753
		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
		trace_print_seq(m, &iter->seq);
2754
		seq_puts(m, "\n#  => ended at:   ");
S
Steven Rostedt 已提交
2755 2756
		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
		trace_print_seq(m, &iter->seq);
2757
		seq_puts(m, "\n#\n");
2758 2759
	}

2760
	seq_puts(m, "#\n");
2761 2762
}

2763 2764 2765
static void test_cpu_buff_start(struct trace_iterator *iter)
{
	struct trace_seq *s = &iter->seq;
2766
	struct trace_array *tr = iter->tr;
2767

2768
	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
2769 2770 2771 2772 2773
		return;

	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
		return;

2774
	if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
2775 2776
		return;

2777
	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2778 2779
		return;

2780 2781
	if (iter->started)
		cpumask_set_cpu(iter->cpu, iter->started);
2782 2783 2784 2785 2786

	/* Don't print started cpu buffer for the first entry of the trace */
	if (iter->idx > 1)
		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
				iter->cpu);
2787 2788
}

2789
static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2790
{
2791
	struct trace_array *tr = iter->tr;
S
Steven Rostedt 已提交
2792
	struct trace_seq *s = &iter->seq;
2793
	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
I
Ingo Molnar 已提交
2794
	struct trace_entry *entry;
2795
	struct trace_event *event;
2796

I
Ingo Molnar 已提交
2797
	entry = iter->ent;
2798

2799 2800
	test_cpu_buff_start(iter);

2801
	event = ftrace_find_event(entry->type);
2802

2803
	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2804 2805 2806 2807
		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
			trace_print_lat_context(iter);
		else
			trace_print_context(iter);
2808
	}
2809

2810 2811 2812
	if (trace_seq_has_overflowed(s))
		return TRACE_TYPE_PARTIAL_LINE;

2813
	if (event)
2814
		return event->funcs->trace(iter, sym_flags, event);
2815

2816
	trace_seq_printf(s, "Unknown type %d\n", entry->type);
2817

2818
	return trace_handle_return(s);
2819 2820
}

2821
static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
I
Ingo Molnar 已提交
2822
{
2823
	struct trace_array *tr = iter->tr;
I
Ingo Molnar 已提交
2824 2825
	struct trace_seq *s = &iter->seq;
	struct trace_entry *entry;
2826
	struct trace_event *event;
I
Ingo Molnar 已提交
2827 2828

	entry = iter->ent;
2829

2830
	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
2831 2832 2833 2834 2835
		trace_seq_printf(s, "%d %d %llu ",
				 entry->pid, iter->cpu, iter->ts);

	if (trace_seq_has_overflowed(s))
		return TRACE_TYPE_PARTIAL_LINE;
I
Ingo Molnar 已提交
2836

2837
	event = ftrace_find_event(entry->type);
2838
	if (event)
2839
		return event->funcs->raw(iter, 0, event);
2840

2841
	trace_seq_printf(s, "%d ?\n", entry->type);
2842

2843
	return trace_handle_return(s);
I
Ingo Molnar 已提交
2844 2845
}

2846
static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2847
{
2848
	struct trace_array *tr = iter->tr;
2849 2850 2851
	struct trace_seq *s = &iter->seq;
	unsigned char newline = '\n';
	struct trace_entry *entry;
2852
	struct trace_event *event;
2853 2854

	entry = iter->ent;
2855

2856
	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2857 2858 2859 2860 2861
		SEQ_PUT_HEX_FIELD(s, entry->pid);
		SEQ_PUT_HEX_FIELD(s, iter->cpu);
		SEQ_PUT_HEX_FIELD(s, iter->ts);
		if (trace_seq_has_overflowed(s))
			return TRACE_TYPE_PARTIAL_LINE;
2862
	}
2863

2864
	event = ftrace_find_event(entry->type);
2865
	if (event) {
2866
		enum print_line_t ret = event->funcs->hex(iter, 0, event);
2867 2868 2869
		if (ret != TRACE_TYPE_HANDLED)
			return ret;
	}
S
Steven Rostedt 已提交
2870

2871
	SEQ_PUT_FIELD(s, newline);
2872

2873
	return trace_handle_return(s);
2874 2875
}

2876
static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
I
Ingo Molnar 已提交
2877
{
2878
	struct trace_array *tr = iter->tr;
I
Ingo Molnar 已提交
2879 2880
	struct trace_seq *s = &iter->seq;
	struct trace_entry *entry;
2881
	struct trace_event *event;
I
Ingo Molnar 已提交
2882 2883

	entry = iter->ent;
2884

2885
	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2886 2887 2888 2889 2890
		SEQ_PUT_FIELD(s, entry->pid);
		SEQ_PUT_FIELD(s, iter->cpu);
		SEQ_PUT_FIELD(s, iter->ts);
		if (trace_seq_has_overflowed(s))
			return TRACE_TYPE_PARTIAL_LINE;
2891
	}
I
Ingo Molnar 已提交
2892

2893
	event = ftrace_find_event(entry->type);
2894 2895
	return event ? event->funcs->binary(iter, 0, event) :
		TRACE_TYPE_HANDLED;
I
Ingo Molnar 已提交
2896 2897
}

2898
int trace_empty(struct trace_iterator *iter)
2899
{
2900
	struct ring_buffer_iter *buf_iter;
2901 2902
	int cpu;

2903
	/* If we are looking at one CPU buffer, only check that one */
2904
	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2905
		cpu = iter->cpu_file;
2906 2907 2908
		buf_iter = trace_buffer_iter(iter, cpu);
		if (buf_iter) {
			if (!ring_buffer_iter_empty(buf_iter))
2909 2910
				return 0;
		} else {
2911
			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2912 2913 2914 2915 2916
				return 0;
		}
		return 1;
	}

2917
	for_each_tracing_cpu(cpu) {
2918 2919 2920
		buf_iter = trace_buffer_iter(iter, cpu);
		if (buf_iter) {
			if (!ring_buffer_iter_empty(buf_iter))
2921 2922
				return 0;
		} else {
2923
			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2924 2925
				return 0;
		}
2926
	}
2927

2928
	return 1;
2929 2930
}

2931
/*  Called with trace_event_read_lock() held. */
2932
enum print_line_t print_trace_line(struct trace_iterator *iter)
I
Ingo Molnar 已提交
2933
{
2934 2935
	struct trace_array *tr = iter->tr;
	unsigned long trace_flags = tr->trace_flags;
2936 2937
	enum print_line_t ret;

2938 2939 2940 2941 2942 2943
	if (iter->lost_events) {
		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
				 iter->cpu, iter->lost_events);
		if (trace_seq_has_overflowed(&iter->seq))
			return TRACE_TYPE_PARTIAL_LINE;
	}
2944

2945 2946 2947 2948 2949
	if (iter->trace && iter->trace->print_line) {
		ret = iter->trace->print_line(iter);
		if (ret != TRACE_TYPE_UNHANDLED)
			return ret;
	}
2950

2951 2952 2953 2954 2955
	if (iter->ent->type == TRACE_BPUTS &&
			trace_flags & TRACE_ITER_PRINTK &&
			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
		return trace_print_bputs_msg_only(iter);

2956 2957 2958
	if (iter->ent->type == TRACE_BPRINT &&
			trace_flags & TRACE_ITER_PRINTK &&
			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2959
		return trace_print_bprintk_msg_only(iter);
2960

2961 2962 2963
	if (iter->ent->type == TRACE_PRINT &&
			trace_flags & TRACE_ITER_PRINTK &&
			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2964
		return trace_print_printk_msg_only(iter);
2965

I
Ingo Molnar 已提交
2966 2967 2968
	if (trace_flags & TRACE_ITER_BIN)
		return print_bin_fmt(iter);

2969 2970 2971
	if (trace_flags & TRACE_ITER_HEX)
		return print_hex_fmt(iter);

I
Ingo Molnar 已提交
2972 2973 2974 2975 2976 2977
	if (trace_flags & TRACE_ITER_RAW)
		return print_raw_fmt(iter);

	return print_trace_fmt(iter);
}

2978 2979 2980
void trace_latency_header(struct seq_file *m)
{
	struct trace_iterator *iter = m->private;
2981
	struct trace_array *tr = iter->tr;
2982 2983 2984 2985 2986 2987 2988 2989

	/* print nothing if the buffers are empty */
	if (trace_empty(iter))
		return;

	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
		print_trace_header(m, iter);

2990
	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
2991 2992 2993
		print_lat_help_header(m);
}

2994 2995 2996
void trace_default_header(struct seq_file *m)
{
	struct trace_iterator *iter = m->private;
2997 2998
	struct trace_array *tr = iter->tr;
	unsigned long trace_flags = tr->trace_flags;
2999

3000 3001 3002
	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
		return;

3003 3004 3005 3006 3007 3008 3009 3010
	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
		/* print nothing if the buffers are empty */
		if (trace_empty(iter))
			return;
		print_trace_header(m, iter);
		if (!(trace_flags & TRACE_ITER_VERBOSE))
			print_lat_help_header(m);
	} else {
3011 3012
		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
			if (trace_flags & TRACE_ITER_IRQ_INFO)
3013
				print_func_help_header_irq(iter->trace_buffer, m);
3014
			else
3015
				print_func_help_header(iter->trace_buffer, m);
3016
		}
3017 3018 3019
	}
}

3020 3021 3022 3023
static void test_ftrace_alive(struct seq_file *m)
{
	if (!ftrace_is_dead())
		return;
3024 3025
	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3026 3027
}

3028
#ifdef CONFIG_TRACER_MAX_TRACE
3029
static void show_snapshot_main_help(struct seq_file *m)
3030
{
3031 3032 3033 3034 3035 3036
	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
		    "#                      Takes a snapshot of the main buffer.\n"
		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
		    "#                      (Doesn't have to be '2' works with any number that\n"
		    "#                       is not a '0' or '1')\n");
3037
}
3038 3039 3040

static void show_snapshot_percpu_help(struct seq_file *m)
{
3041
	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3042
#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3043 3044
	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3045
#else
3046 3047
	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
		    "#                     Must use main snapshot file to allocate.\n");
3048
#endif
3049 3050 3051
	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
		    "#                      (Doesn't have to be '2' works with any number that\n"
		    "#                       is not a '0' or '1')\n");
3052 3053
}

3054 3055
static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
{
3056
	if (iter->tr->allocated_snapshot)
3057
		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3058
	else
3059
		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3060

3061
	seq_puts(m, "# Snapshot commands:\n");
3062 3063 3064 3065
	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
		show_snapshot_main_help(m);
	else
		show_snapshot_percpu_help(m);
3066 3067 3068 3069 3070 3071
}
#else
/* Should never be called */
static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
#endif

3072 3073 3074
static int s_show(struct seq_file *m, void *v)
{
	struct trace_iterator *iter = v;
3075
	int ret;
3076 3077 3078 3079 3080

	if (iter->ent == NULL) {
		if (iter->tr) {
			seq_printf(m, "# tracer: %s\n", iter->trace->name);
			seq_puts(m, "#\n");
3081
			test_ftrace_alive(m);
3082
		}
3083 3084 3085
		if (iter->snapshot && trace_empty(iter))
			print_snapshot_help(m, iter);
		else if (iter->trace && iter->trace->print_header)
3086
			iter->trace->print_header(m);
3087 3088 3089
		else
			trace_default_header(m);

3090 3091 3092 3093 3094 3095 3096 3097 3098 3099
	} else if (iter->leftover) {
		/*
		 * If we filled the seq_file buffer earlier, we
		 * want to just show it now.
		 */
		ret = trace_print_seq(m, &iter->seq);

		/* ret should this time be zero, but you never know */
		iter->leftover = ret;

3100
	} else {
I
Ingo Molnar 已提交
3101
		print_trace_line(iter);
3102 3103 3104 3105 3106 3107 3108 3109 3110
		ret = trace_print_seq(m, &iter->seq);
		/*
		 * If we overflow the seq_file buffer, then it will
		 * ask us for this data again at start up.
		 * Use that instead.
		 *  ret is 0 if seq_file write succeeded.
		 *        -1 otherwise.
		 */
		iter->leftover = ret;
3111 3112 3113 3114 3115
	}

	return 0;
}

3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126
/*
 * Should be used after trace_array_get(), trace_types_lock
 * ensures that i_cdev was already initialized.
 */
static inline int tracing_get_cpu(struct inode *inode)
{
	if (inode->i_cdev) /* See trace_create_cpu_file() */
		return (long)inode->i_cdev - 1;
	return RING_BUFFER_ALL_CPUS;
}

J
James Morris 已提交
3127
static const struct seq_operations tracer_seq_ops = {
I
Ingo Molnar 已提交
3128 3129 3130 3131
	.start		= s_start,
	.next		= s_next,
	.stop		= s_stop,
	.show		= s_show,
3132 3133
};

I
Ingo Molnar 已提交
3134
static struct trace_iterator *
3135
__tracing_open(struct inode *inode, struct file *file, bool snapshot)
3136
{
3137
	struct trace_array *tr = inode->i_private;
3138
	struct trace_iterator *iter;
3139
	int cpu;
3140

3141 3142
	if (tracing_disabled)
		return ERR_PTR(-ENODEV);
S
Steven Rostedt 已提交
3143

3144
	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3145 3146
	if (!iter)
		return ERR_PTR(-ENOMEM);
3147

3148
	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3149
				    GFP_KERNEL);
3150 3151 3152
	if (!iter->buffer_iter)
		goto release;

3153 3154 3155 3156
	/*
	 * We make a copy of the current tracer to avoid concurrent
	 * changes on it while we are reading.
	 */
3157
	mutex_lock(&trace_types_lock);
3158
	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3159
	if (!iter->trace)
3160
		goto fail;
3161

3162
	*iter->trace = *tr->current_trace;
3163

3164
	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3165 3166
		goto fail;

3167 3168 3169
	iter->tr = tr;

#ifdef CONFIG_TRACER_MAX_TRACE
3170 3171
	/* Currently only the top directory has a snapshot */
	if (tr->current_trace->print_max || snapshot)
3172
		iter->trace_buffer = &tr->max_buffer;
3173
	else
3174 3175
#endif
		iter->trace_buffer = &tr->trace_buffer;
3176
	iter->snapshot = snapshot;
3177
	iter->pos = -1;
3178
	iter->cpu_file = tracing_get_cpu(inode);
3179
	mutex_init(&iter->mutex);
3180

3181 3182
	/* Notify the tracer early; before we stop tracing. */
	if (iter->trace && iter->trace->open)
3183
		iter->trace->open(iter);
3184

3185
	/* Annotate start of buffers if we had overruns */
3186
	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3187 3188
		iter->iter_flags |= TRACE_FILE_ANNOTATE;

3189
	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3190
	if (trace_clocks[tr->clock_id].in_ns)
3191 3192
		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;

3193 3194
	/* stop the trace while dumping if we are not opening "snapshot" */
	if (!iter->snapshot)
3195
		tracing_stop_tr(tr);
3196

3197
	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3198 3199
		for_each_tracing_cpu(cpu) {
			iter->buffer_iter[cpu] =
3200
				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3201 3202 3203 3204
		}
		ring_buffer_read_prepare_sync();
		for_each_tracing_cpu(cpu) {
			ring_buffer_read_start(iter->buffer_iter[cpu]);
3205
			tracing_iter_reset(iter, cpu);
3206 3207 3208
		}
	} else {
		cpu = iter->cpu_file;
3209
		iter->buffer_iter[cpu] =
3210
			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3211 3212
		ring_buffer_read_prepare_sync();
		ring_buffer_read_start(iter->buffer_iter[cpu]);
3213
		tracing_iter_reset(iter, cpu);
3214 3215
	}

3216 3217 3218
	mutex_unlock(&trace_types_lock);

	return iter;
3219

3220
 fail:
3221
	mutex_unlock(&trace_types_lock);
3222
	kfree(iter->trace);
3223
	kfree(iter->buffer_iter);
3224
release:
3225 3226
	seq_release_private(inode, file);
	return ERR_PTR(-ENOMEM);
3227 3228 3229 3230
}

int tracing_open_generic(struct inode *inode, struct file *filp)
{
S
Steven Rostedt 已提交
3231 3232 3233
	if (tracing_disabled)
		return -ENODEV;

3234 3235 3236 3237
	filp->private_data = inode->i_private;
	return 0;
}

3238 3239 3240 3241 3242
bool tracing_is_disabled(void)
{
	return (tracing_disabled) ? true: false;
}

3243 3244 3245 3246
/*
 * Open and update trace_array ref count.
 * Must have the current trace_array passed to it.
 */
3247
static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261
{
	struct trace_array *tr = inode->i_private;

	if (tracing_disabled)
		return -ENODEV;

	if (trace_array_get(tr) < 0)
		return -ENODEV;

	filp->private_data = inode->i_private;

	return 0;
}

3262
static int tracing_release(struct inode *inode, struct file *file)
3263
{
3264
	struct trace_array *tr = inode->i_private;
3265
	struct seq_file *m = file->private_data;
3266
	struct trace_iterator *iter;
3267
	int cpu;
3268

3269
	if (!(file->f_mode & FMODE_READ)) {
3270
		trace_array_put(tr);
3271
		return 0;
3272
	}
3273

3274
	/* Writes do not use seq_file */
3275
	iter = m->private;
3276
	mutex_lock(&trace_types_lock);
3277

3278 3279 3280 3281 3282
	for_each_tracing_cpu(cpu) {
		if (iter->buffer_iter[cpu])
			ring_buffer_read_finish(iter->buffer_iter[cpu]);
	}

3283 3284 3285
	if (iter->trace && iter->trace->close)
		iter->trace->close(iter);

3286 3287
	if (!iter->snapshot)
		/* reenable tracing if it was previously enabled */
3288
		tracing_start_tr(tr);
3289 3290 3291

	__trace_array_put(tr);

3292 3293
	mutex_unlock(&trace_types_lock);

3294
	mutex_destroy(&iter->mutex);
3295
	free_cpumask_var(iter->started);
3296
	kfree(iter->trace);
3297
	kfree(iter->buffer_iter);
3298
	seq_release_private(inode, file);
3299

3300 3301 3302
	return 0;
}

3303 3304 3305 3306 3307
static int tracing_release_generic_tr(struct inode *inode, struct file *file)
{
	struct trace_array *tr = inode->i_private;

	trace_array_put(tr);
3308 3309 3310
	return 0;
}

3311 3312 3313 3314 3315 3316 3317 3318 3319
static int tracing_single_release_tr(struct inode *inode, struct file *file)
{
	struct trace_array *tr = inode->i_private;

	trace_array_put(tr);

	return single_release(inode, file);
}

3320 3321
static int tracing_open(struct inode *inode, struct file *file)
{
3322
	struct trace_array *tr = inode->i_private;
3323 3324
	struct trace_iterator *iter;
	int ret = 0;
3325

3326 3327 3328
	if (trace_array_get(tr) < 0)
		return -ENODEV;

3329
	/* If this file was open for write, then erase contents */
3330 3331 3332 3333
	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
		int cpu = tracing_get_cpu(inode);

		if (cpu == RING_BUFFER_ALL_CPUS)
3334
			tracing_reset_online_cpus(&tr->trace_buffer);
3335
		else
3336
			tracing_reset(&tr->trace_buffer, cpu);
3337
	}
3338

3339
	if (file->f_mode & FMODE_READ) {
3340
		iter = __tracing_open(inode, file, false);
3341 3342
		if (IS_ERR(iter))
			ret = PTR_ERR(iter);
3343
		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3344 3345
			iter->iter_flags |= TRACE_FILE_LAT_FMT;
	}
3346 3347 3348 3349

	if (ret < 0)
		trace_array_put(tr);

3350 3351 3352
	return ret;
}

3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373
/*
 * Some tracers are not suitable for instance buffers.
 * A tracer is always available for the global array (toplevel)
 * or if it explicitly states that it is.
 */
static bool
trace_ok_for_array(struct tracer *t, struct trace_array *tr)
{
	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
}

/* Find the next tracer that this trace array may use */
static struct tracer *
get_tracer_for_array(struct trace_array *tr, struct tracer *t)
{
	while (t && !trace_ok_for_array(t, tr))
		t = t->next;

	return t;
}

I
Ingo Molnar 已提交
3374
static void *
3375 3376
t_next(struct seq_file *m, void *v, loff_t *pos)
{
3377
	struct trace_array *tr = m->private;
L
Li Zefan 已提交
3378
	struct tracer *t = v;
3379 3380 3381 3382

	(*pos)++;

	if (t)
3383
		t = get_tracer_for_array(tr, t->next);
3384 3385 3386 3387 3388 3389

	return t;
}

static void *t_start(struct seq_file *m, loff_t *pos)
{
3390
	struct trace_array *tr = m->private;
L
Li Zefan 已提交
3391
	struct tracer *t;
3392 3393 3394
	loff_t l = 0;

	mutex_lock(&trace_types_lock);
3395 3396 3397 3398

	t = get_tracer_for_array(tr, trace_types);
	for (; t && l < *pos; t = t_next(m, t, &l))
			;
3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414

	return t;
}

static void t_stop(struct seq_file *m, void *p)
{
	mutex_unlock(&trace_types_lock);
}

static int t_show(struct seq_file *m, void *v)
{
	struct tracer *t = v;

	if (!t)
		return 0;

3415
	seq_puts(m, t->name);
3416 3417 3418 3419 3420 3421 3422 3423
	if (t->next)
		seq_putc(m, ' ');
	else
		seq_putc(m, '\n');

	return 0;
}

J
James Morris 已提交
3424
static const struct seq_operations show_traces_seq_ops = {
I
Ingo Molnar 已提交
3425 3426 3427 3428
	.start		= t_start,
	.next		= t_next,
	.stop		= t_stop,
	.show		= t_show,
3429 3430 3431 3432
};

static int show_traces_open(struct inode *inode, struct file *file)
{
3433 3434 3435 3436
	struct trace_array *tr = inode->i_private;
	struct seq_file *m;
	int ret;

S
Steven Rostedt 已提交
3437 3438 3439
	if (tracing_disabled)
		return -ENODEV;

3440 3441 3442 3443 3444 3445 3446 3447
	ret = seq_open(file, &show_traces_seq_ops);
	if (ret)
		return ret;

	m = file->private_data;
	m->private = tr;

	return 0;
3448 3449
}

3450 3451 3452 3453 3454 3455 3456
static ssize_t
tracing_write_stub(struct file *filp, const char __user *ubuf,
		   size_t count, loff_t *ppos)
{
	return count;
}

3457
loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3458
{
3459 3460
	int ret;

3461
	if (file->f_mode & FMODE_READ)
3462
		ret = seq_lseek(file, offset, whence);
3463
	else
3464 3465 3466
		file->f_pos = ret = 0;

	return ret;
3467 3468
}

3469
static const struct file_operations tracing_fops = {
I
Ingo Molnar 已提交
3470 3471
	.open		= tracing_open,
	.read		= seq_read,
3472
	.write		= tracing_write_stub,
3473
	.llseek		= tracing_lseek,
I
Ingo Molnar 已提交
3474
	.release	= tracing_release,
3475 3476
};

3477
static const struct file_operations show_traces_fops = {
I
Ingo Molnar 已提交
3478 3479 3480
	.open		= show_traces_open,
	.read		= seq_read,
	.release	= seq_release,
3481
	.llseek		= seq_lseek,
I
Ingo Molnar 已提交
3482 3483
};

3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495
/*
 * The tracer itself will not take this lock, but still we want
 * to provide a consistent cpumask to user-space:
 */
static DEFINE_MUTEX(tracing_cpumask_update_lock);

/*
 * Temporary storage for the character representation of the
 * CPU bitmask (and one more byte for the newline):
 */
static char mask_str[NR_CPUS + 1];

I
Ingo Molnar 已提交
3496 3497 3498 3499
static ssize_t
tracing_cpumask_read(struct file *filp, char __user *ubuf,
		     size_t count, loff_t *ppos)
{
3500
	struct trace_array *tr = file_inode(filp)->i_private;
3501
	int len;
I
Ingo Molnar 已提交
3502 3503

	mutex_lock(&tracing_cpumask_update_lock);
3504

3505 3506 3507
	len = snprintf(mask_str, count, "%*pb\n",
		       cpumask_pr_args(tr->tracing_cpumask));
	if (len >= count) {
3508 3509 3510 3511 3512 3513
		count = -EINVAL;
		goto out_err;
	}
	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);

out_err:
I
Ingo Molnar 已提交
3514 3515 3516 3517 3518 3519 3520 3521 3522
	mutex_unlock(&tracing_cpumask_update_lock);

	return count;
}

static ssize_t
tracing_cpumask_write(struct file *filp, const char __user *ubuf,
		      size_t count, loff_t *ppos)
{
3523
	struct trace_array *tr = file_inode(filp)->i_private;
3524
	cpumask_var_t tracing_cpumask_new;
3525
	int err, cpu;
3526 3527 3528

	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
		return -ENOMEM;
I
Ingo Molnar 已提交
3529

3530
	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
I
Ingo Molnar 已提交
3531
	if (err)
3532 3533
		goto err_unlock;

3534 3535
	mutex_lock(&tracing_cpumask_update_lock);

3536
	local_irq_disable();
3537
	arch_spin_lock(&tr->max_lock);
3538
	for_each_tracing_cpu(cpu) {
3539 3540 3541 3542
		/*
		 * Increase/decrease the disabled counter if we are
		 * about to flip a bit in the cpumask:
		 */
3543
		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3544
				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3545 3546
			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3547
		}
3548
		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3549
				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3550 3551
			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3552 3553
		}
	}
3554
	arch_spin_unlock(&tr->max_lock);
3555
	local_irq_enable();
3556

3557
	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3558 3559

	mutex_unlock(&tracing_cpumask_update_lock);
3560
	free_cpumask_var(tracing_cpumask_new);
I
Ingo Molnar 已提交
3561 3562

	return count;
3563 3564

err_unlock:
3565
	free_cpumask_var(tracing_cpumask_new);
3566 3567

	return err;
I
Ingo Molnar 已提交
3568 3569
}

3570
static const struct file_operations tracing_cpumask_fops = {
3571
	.open		= tracing_open_generic_tr,
I
Ingo Molnar 已提交
3572 3573
	.read		= tracing_cpumask_read,
	.write		= tracing_cpumask_write,
3574
	.release	= tracing_release_generic_tr,
3575
	.llseek		= generic_file_llseek,
3576 3577
};

L
Li Zefan 已提交
3578
static int tracing_trace_options_show(struct seq_file *m, void *v)
3579
{
3580
	struct tracer_opt *trace_opts;
3581
	struct trace_array *tr = m->private;
3582 3583
	u32 tracer_flags;
	int i;
3584

3585
	mutex_lock(&trace_types_lock);
3586 3587
	tracer_flags = tr->current_trace->flags->val;
	trace_opts = tr->current_trace->flags->opts;
3588

3589
	for (i = 0; trace_options[i]; i++) {
3590
		if (tr->trace_flags & (1 << i))
L
Li Zefan 已提交
3591
			seq_printf(m, "%s\n", trace_options[i]);
3592
		else
L
Li Zefan 已提交
3593
			seq_printf(m, "no%s\n", trace_options[i]);
3594 3595
	}

3596 3597
	for (i = 0; trace_opts[i].name; i++) {
		if (tracer_flags & trace_opts[i].bit)
L
Li Zefan 已提交
3598
			seq_printf(m, "%s\n", trace_opts[i].name);
3599
		else
L
Li Zefan 已提交
3600
			seq_printf(m, "no%s\n", trace_opts[i].name);
3601
	}
3602
	mutex_unlock(&trace_types_lock);
3603

L
Li Zefan 已提交
3604
	return 0;
3605 3606
}

3607
static int __set_tracer_option(struct trace_array *tr,
L
Li Zefan 已提交
3608 3609 3610
			       struct tracer_flags *tracer_flags,
			       struct tracer_opt *opts, int neg)
{
3611
	struct tracer *trace = tracer_flags->trace;
L
Li Zefan 已提交
3612
	int ret;
3613

3614
	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
L
Li Zefan 已提交
3615 3616 3617 3618 3619 3620 3621 3622
	if (ret)
		return ret;

	if (neg)
		tracer_flags->val &= ~opts->bit;
	else
		tracer_flags->val |= opts->bit;
	return 0;
3623 3624
}

3625
/* Try to assign a tracer specific option */
3626
static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3627
{
3628
	struct tracer *trace = tr->current_trace;
3629
	struct tracer_flags *tracer_flags = trace->flags;
3630
	struct tracer_opt *opts = NULL;
L
Li Zefan 已提交
3631
	int i;
3632

3633 3634
	for (i = 0; tracer_flags->opts[i].name; i++) {
		opts = &tracer_flags->opts[i];
3635

L
Li Zefan 已提交
3636
		if (strcmp(cmp, opts->name) == 0)
3637
			return __set_tracer_option(tr, trace->flags, opts, neg);
3638 3639
	}

L
Li Zefan 已提交
3640
	return -EINVAL;
3641 3642
}

3643 3644 3645 3646 3647 3648 3649 3650 3651
/* Some tracers require overwrite to stay enabled */
int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
{
	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
		return -1;

	return 0;
}

3652
int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3653 3654
{
	/* do nothing if flag is already set */
3655
	if (!!(tr->trace_flags & mask) == !!enabled)
3656 3657 3658
		return 0;

	/* Give the tracer a chance to approve the change */
3659
	if (tr->current_trace->flag_changed)
3660
		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3661
			return -EINVAL;
3662 3663

	if (enabled)
3664
		tr->trace_flags |= mask;
3665
	else
3666
		tr->trace_flags &= ~mask;
3667 3668 3669

	if (mask == TRACE_ITER_RECORD_CMD)
		trace_event_enable_cmd_record(enabled);
3670

3671 3672 3673
	if (mask == TRACE_ITER_EVENT_FORK)
		trace_event_follow_fork(tr, enabled);

3674
	if (mask == TRACE_ITER_OVERWRITE) {
3675
		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3676
#ifdef CONFIG_TRACER_MAX_TRACE
3677
		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3678 3679
#endif
	}
3680

3681
	if (mask == TRACE_ITER_PRINTK) {
3682
		trace_printk_start_stop_comm(enabled);
3683 3684
		trace_printk_control(enabled);
	}
3685 3686

	return 0;
3687 3688
}

3689
static int trace_set_options(struct trace_array *tr, char *option)
3690
{
L
Li Zefan 已提交
3691
	char *cmp;
3692
	int neg = 0;
3693
	int ret = -ENODEV;
3694
	int i;
3695
	size_t orig_len = strlen(option);
3696

3697
	cmp = strstrip(option);
3698

L
Li Zefan 已提交
3699
	if (strncmp(cmp, "no", 2) == 0) {
3700 3701 3702 3703
		neg = 1;
		cmp += 2;
	}

3704 3705
	mutex_lock(&trace_types_lock);

3706
	for (i = 0; trace_options[i]; i++) {
L
Li Zefan 已提交
3707
		if (strcmp(cmp, trace_options[i]) == 0) {
3708
			ret = set_tracer_flag(tr, 1 << i, !neg);
3709 3710 3711
			break;
		}
	}
3712 3713

	/* If no option could be set, test the specific tracer options */
3714
	if (!trace_options[i])
3715
		ret = set_tracer_option(tr, cmp, neg);
3716 3717

	mutex_unlock(&trace_types_lock);
3718

3719 3720 3721 3722 3723 3724 3725
	/*
	 * If the first trailing whitespace is replaced with '\0' by strstrip,
	 * turn it back into a space.
	 */
	if (orig_len > strlen(option))
		option[strlen(option)] = ' ';

3726 3727 3728
	return ret;
}

3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739
static void __init apply_trace_boot_options(void)
{
	char *buf = trace_boot_options_buf;
	char *option;

	while (true) {
		option = strsep(&buf, ",");

		if (!option)
			break;

3740 3741
		if (*option)
			trace_set_options(&global_trace, option);
3742 3743 3744 3745 3746 3747 3748

		/* Put back the comma to allow this to be called again */
		if (buf)
			*(buf - 1) = ',';
	}
}

3749 3750 3751 3752
static ssize_t
tracing_trace_options_write(struct file *filp, const char __user *ubuf,
			size_t cnt, loff_t *ppos)
{
3753 3754
	struct seq_file *m = filp->private_data;
	struct trace_array *tr = m->private;
3755
	char buf[64];
3756
	int ret;
3757 3758 3759 3760

	if (cnt >= sizeof(buf))
		return -EINVAL;

3761
	if (copy_from_user(buf, ubuf, cnt))
3762 3763
		return -EFAULT;

3764 3765
	buf[cnt] = 0;

3766
	ret = trace_set_options(tr, buf);
3767 3768
	if (ret < 0)
		return ret;
3769

3770
	*ppos += cnt;
3771 3772 3773 3774

	return cnt;
}

L
Li Zefan 已提交
3775 3776
static int tracing_trace_options_open(struct inode *inode, struct file *file)
{
3777
	struct trace_array *tr = inode->i_private;
3778
	int ret;
3779

L
Li Zefan 已提交
3780 3781
	if (tracing_disabled)
		return -ENODEV;
3782

3783 3784 3785
	if (trace_array_get(tr) < 0)
		return -ENODEV;

3786 3787 3788 3789 3790
	ret = single_open(file, tracing_trace_options_show, inode->i_private);
	if (ret < 0)
		trace_array_put(tr);

	return ret;
L
Li Zefan 已提交
3791 3792
}

3793
static const struct file_operations tracing_iter_fops = {
L
Li Zefan 已提交
3794 3795 3796
	.open		= tracing_trace_options_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
3797
	.release	= tracing_single_release_tr,
3798
	.write		= tracing_trace_options_write,
3799 3800
};

I
Ingo Molnar 已提交
3801 3802
static const char readme_msg[] =
	"tracing mini-HOWTO:\n\n"
3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826
	"# echo 0 > tracing_on : quick way to disable tracing\n"
	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
	" Important files:\n"
	"  trace\t\t\t- The static contents of the buffer\n"
	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
	"  current_tracer\t- function and latency tracers\n"
	"  available_tracers\t- list of configured tracers for current_tracer\n"
	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
	"  trace_clock\t\t-change the clock used to order events\n"
	"       local:   Per cpu clock but may not be synced across CPUs\n"
	"      global:   Synced across CPUs but slows tracing down.\n"
	"     counter:   Not a clock, but just an increment\n"
	"      uptime:   Jiffy counter from time of boot\n"
	"        perf:   Same clock that perf events use\n"
#ifdef CONFIG_X86_64
	"     x86-tsc:   TSC cycle counter\n"
#endif
	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
	"  tracing_cpumask\t- Limit which CPUs to trace\n"
	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
	"\t\t\t  Remove sub-buffer with rmdir\n"
	"  trace_options\t\t- Set format or modify how tracing happens\n"
3827 3828
	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
	"\t\t\t  option name\n"
3829
	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3830 3831
#ifdef CONFIG_DYNAMIC_FTRACE
	"\n  available_filter_functions - list of functions that can be filtered on\n"
3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842
	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
	"\t\t\t  functions\n"
	"\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
	"\t     modules: Can select a group via module\n"
	"\t      Format: :mod:<module-name>\n"
	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
	"\t    triggers: a command to perform when function is hit\n"
	"\t      Format: <function>:<trigger>[:count]\n"
	"\t     trigger: traceon, traceoff\n"
	"\t\t      enable_event:<system>:<event>\n"
	"\t\t      disable_event:<system>:<event>\n"
3843
#ifdef CONFIG_STACKTRACE
3844
	"\t\t      stacktrace\n"
3845 3846
#endif
#ifdef CONFIG_TRACER_SNAPSHOT
3847
	"\t\t      snapshot\n"
3848
#endif
3849 3850
	"\t\t      dump\n"
	"\t\t      cpudump\n"
3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862
	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
	"\t     The first one will disable tracing every time do_fault is hit\n"
	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
	"\t       The first time do trap is hit and it disables tracing, the\n"
	"\t       counter will decrement to 2. If tracing is already disabled,\n"
	"\t       the counter will not decrement. It only decrements when the\n"
	"\t       trigger did work\n"
	"\t     To remove trigger without count:\n"
	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
	"\t     To remove trigger with a count:\n"
	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3863
	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3864 3865 3866
	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
	"\t    modules: Can select a group via module command :mod:\n"
	"\t    Does not accept triggers\n"
3867 3868
#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_FUNCTION_TRACER
3869 3870
	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
	"\t\t    (function)\n"
3871 3872 3873
#endif
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3874
	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3875 3876 3877
	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
#endif
#ifdef CONFIG_TRACER_SNAPSHOT
3878 3879 3880
	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
	"\t\t\t  snapshot buffer. Read the contents for more\n"
	"\t\t\t  information\n"
3881
#endif
3882
#ifdef CONFIG_STACK_TRACER
3883 3884
	"  stack_trace\t\t- Shows the max stack trace when active\n"
	"  stack_max_size\t- Shows current max stack size that was traced\n"
3885 3886
	"\t\t\t  Write into this file to reset the max size (trigger a\n"
	"\t\t\t  new trace)\n"
3887
#ifdef CONFIG_DYNAMIC_FTRACE
3888 3889
	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
	"\t\t\t  traces\n"
3890
#endif
3891
#endif /* CONFIG_STACK_TRACER */
3892 3893 3894
	"  events/\t\t- Directory containing all trace event subsystems:\n"
	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
3895 3896
	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
	"\t\t\t  events\n"
3897
	"      filter\t\t- If set, only events passing filter are traced\n"
3898 3899
	"  events/<system>/<event>/\t- Directory containing control files for\n"
	"\t\t\t  <event>:\n"
3900 3901 3902
	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
	"      filter\t\t- If set, only events passing filter are traced\n"
	"      trigger\t\t- If set, a command to perform when event is hit\n"
3903 3904 3905 3906
	"\t    Format: <trigger>[:count][if <filter>]\n"
	"\t   trigger: traceon, traceoff\n"
	"\t            enable_event:<system>:<event>\n"
	"\t            disable_event:<system>:<event>\n"
3907 3908 3909 3910
#ifdef CONFIG_HIST_TRIGGERS
	"\t            enable_hist:<system>:<event>\n"
	"\t            disable_hist:<system>:<event>\n"
#endif
3911
#ifdef CONFIG_STACKTRACE
3912
	"\t\t    stacktrace\n"
3913 3914
#endif
#ifdef CONFIG_TRACER_SNAPSHOT
3915
	"\t\t    snapshot\n"
3916 3917 3918
#endif
#ifdef CONFIG_HIST_TRIGGERS
	"\t\t    hist (see below)\n"
3919
#endif
3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934
	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
	"\t                  events/block/block_unplug/trigger\n"
	"\t   The first disables tracing every time block_unplug is hit.\n"
	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
	"\t   Like function triggers, the counter is only decremented if it\n"
	"\t    enabled or disabled tracing.\n"
	"\t   To remove a trigger without a count:\n"
	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
	"\t   To remove a trigger with a count:\n"
	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
	"\t   Filters can be ignored when removing a trigger.\n"
3935 3936
#ifdef CONFIG_HIST_TRIGGERS
	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
3937
	"\t    Format: hist:keys=<field1[,field2,...]>\n"
3938
	"\t            [:values=<field1[,field2,...]>]\n"
3939
	"\t            [:sort=<field1[,field2,...]>]\n"
3940
	"\t            [:size=#entries]\n"
3941
	"\t            [:pause][:continue][:clear]\n"
3942
	"\t            [:name=histname1]\n"
3943 3944
	"\t            [if <filter>]\n\n"
	"\t    When a matching event is hit, an entry is added to a hash\n"
3945 3946 3947
	"\t    table using the key(s) and value(s) named, and the value of a\n"
	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
	"\t    correspond to fields in the event's format description.  Keys\n"
3948 3949 3950 3951 3952 3953 3954
	"\t    can be any field, or the special string 'stacktrace'.\n"
	"\t    Compound keys consisting of up to two fields can be specified\n"
	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
	"\t    fields.  Sort keys consisting of up to two fields can be\n"
	"\t    specified using the 'sort' keyword.  The sort direction can\n"
	"\t    be modified by appending '.descending' or '.ascending' to a\n"
	"\t    sort field.  The 'size' parameter can be used to specify more\n"
3955 3956 3957 3958
	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
	"\t    its histogram data will be shared with other triggers of the\n"
	"\t    same name, and trigger hits will update this common data.\n\n"
3959
	"\t    Reading the 'hist' file for the event will dump the hash\n"
3960 3961
	"\t    table in its entirety to stdout.  If there are multiple hist\n"
	"\t    triggers attached to an event, there will be a table for each\n"
3962 3963 3964 3965 3966
	"\t    trigger in the output.  The table displayed for a named\n"
	"\t    trigger will be the same as any other instance having the\n"
	"\t    same name.  The default format used to display a given field\n"
	"\t    can be modified by appending any of the following modifiers\n"
	"\t    to the field name, as applicable:\n\n"
3967 3968
	"\t            .hex        display a number as a hex value\n"
	"\t            .sym        display an address as a symbol\n"
3969
	"\t            .sym-offset display an address as a symbol and offset\n"
3970 3971
	"\t            .execname   display a common_pid as a program name\n"
	"\t            .syscall    display a syscall id as a syscall name\n\n"
3972
	"\t            .log2       display log2 value rather than raw number\n\n"
3973 3974 3975 3976
	"\t    The 'pause' parameter can be used to pause an existing hist\n"
	"\t    trigger or to start a hist trigger but not log any events\n"
	"\t    until told to do so.  'continue' can be used to start or\n"
	"\t    restart a paused hist trigger.\n\n"
3977 3978 3979
	"\t    The 'clear' parameter will clear the contents of a running\n"
	"\t    hist trigger and leave its current paused/active state\n"
	"\t    unchanged.\n\n"
3980 3981 3982 3983
	"\t    The enable_hist and disable_hist triggers can be used to\n"
	"\t    have one event conditionally start and stop another event's\n"
	"\t    already-attached hist trigger.  The syntax is analagous to\n"
	"\t    the enable_event and disable_event triggers.\n"
3984
#endif
I
Ingo Molnar 已提交
3985 3986 3987 3988 3989 3990 3991 3992 3993 3994
;

static ssize_t
tracing_readme_read(struct file *filp, char __user *ubuf,
		       size_t cnt, loff_t *ppos)
{
	return simple_read_from_buffer(ubuf, cnt, ppos,
					readme_msg, strlen(readme_msg));
}

3995
static const struct file_operations tracing_readme_fops = {
I
Ingo Molnar 已提交
3996 3997
	.open		= tracing_open_generic,
	.read		= tracing_readme_read,
3998
	.llseek		= generic_file_llseek,
I
Ingo Molnar 已提交
3999 4000
};

4001 4002 4003
static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
{
	unsigned int *ptr = v;
4004

4005 4006
	if (*pos || m->count)
		ptr++;
4007

4008
	(*pos)++;
4009

4010 4011
	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
	     ptr++) {
4012 4013
		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
			continue;
4014

4015 4016
		return ptr;
	}
4017

4018 4019 4020 4021 4022 4023 4024
	return NULL;
}

static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
{
	void *v;
	loff_t l = 0;
4025

4026 4027 4028
	preempt_disable();
	arch_spin_lock(&trace_cmdline_lock);

4029
	v = &savedcmd->map_cmdline_to_pid[0];
4030 4031 4032 4033
	while (l <= *pos) {
		v = saved_cmdlines_next(m, v, &l);
		if (!v)
			return NULL;
4034 4035
	}

4036 4037 4038 4039 4040
	return v;
}

static void saved_cmdlines_stop(struct seq_file *m, void *v)
{
4041 4042
	arch_spin_unlock(&trace_cmdline_lock);
	preempt_enable();
4043
}
4044

4045 4046 4047 4048
static int saved_cmdlines_show(struct seq_file *m, void *v)
{
	char buf[TASK_COMM_LEN];
	unsigned int *pid = v;
4049

4050
	__trace_find_cmdline(*pid, buf);
4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067
	seq_printf(m, "%d %s\n", *pid, buf);
	return 0;
}

static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
	.start		= saved_cmdlines_start,
	.next		= saved_cmdlines_next,
	.stop		= saved_cmdlines_stop,
	.show		= saved_cmdlines_show,
};

static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
{
	if (tracing_disabled)
		return -ENODEV;

	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4068 4069 4070
}

static const struct file_operations tracing_saved_cmdlines_fops = {
4071 4072 4073 4074
	.open		= tracing_saved_cmdlines_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= seq_release,
4075 4076
};

4077 4078 4079 4080 4081 4082 4083 4084
static ssize_t
tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
				 size_t cnt, loff_t *ppos)
{
	char buf[64];
	int r;

	arch_spin_lock(&trace_cmdline_lock);
4085
	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101
	arch_spin_unlock(&trace_cmdline_lock);

	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}

static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
{
	kfree(s->saved_cmdlines);
	kfree(s->map_cmdline_to_pid);
	kfree(s);
}

static int tracing_resize_saved_cmdlines(unsigned int val)
{
	struct saved_cmdlines_buffer *s, *savedcmd_temp;

4102
	s = kmalloc(sizeof(*s), GFP_KERNEL);
4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149
	if (!s)
		return -ENOMEM;

	if (allocate_cmdlines_buffer(val, s) < 0) {
		kfree(s);
		return -ENOMEM;
	}

	arch_spin_lock(&trace_cmdline_lock);
	savedcmd_temp = savedcmd;
	savedcmd = s;
	arch_spin_unlock(&trace_cmdline_lock);
	free_saved_cmdlines_buffer(savedcmd_temp);

	return 0;
}

static ssize_t
tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
				  size_t cnt, loff_t *ppos)
{
	unsigned long val;
	int ret;

	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
	if (ret)
		return ret;

	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
	if (!val || val > PID_MAX_DEFAULT)
		return -EINVAL;

	ret = tracing_resize_saved_cmdlines((unsigned int)val);
	if (ret < 0)
		return ret;

	*ppos += cnt;

	return cnt;
}

static const struct file_operations tracing_saved_cmdlines_size_fops = {
	.open		= tracing_open_generic,
	.read		= tracing_saved_cmdlines_size_read,
	.write		= tracing_saved_cmdlines_size_write,
};

4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266
#ifdef CONFIG_TRACE_ENUM_MAP_FILE
static union trace_enum_map_item *
update_enum_map(union trace_enum_map_item *ptr)
{
	if (!ptr->map.enum_string) {
		if (ptr->tail.next) {
			ptr = ptr->tail.next;
			/* Set ptr to the next real item (skip head) */
			ptr++;
		} else
			return NULL;
	}
	return ptr;
}

static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
{
	union trace_enum_map_item *ptr = v;

	/*
	 * Paranoid! If ptr points to end, we don't want to increment past it.
	 * This really should never happen.
	 */
	ptr = update_enum_map(ptr);
	if (WARN_ON_ONCE(!ptr))
		return NULL;

	ptr++;

	(*pos)++;

	ptr = update_enum_map(ptr);

	return ptr;
}

static void *enum_map_start(struct seq_file *m, loff_t *pos)
{
	union trace_enum_map_item *v;
	loff_t l = 0;

	mutex_lock(&trace_enum_mutex);

	v = trace_enum_maps;
	if (v)
		v++;

	while (v && l < *pos) {
		v = enum_map_next(m, v, &l);
	}

	return v;
}

static void enum_map_stop(struct seq_file *m, void *v)
{
	mutex_unlock(&trace_enum_mutex);
}

static int enum_map_show(struct seq_file *m, void *v)
{
	union trace_enum_map_item *ptr = v;

	seq_printf(m, "%s %ld (%s)\n",
		   ptr->map.enum_string, ptr->map.enum_value,
		   ptr->map.system);

	return 0;
}

static const struct seq_operations tracing_enum_map_seq_ops = {
	.start		= enum_map_start,
	.next		= enum_map_next,
	.stop		= enum_map_stop,
	.show		= enum_map_show,
};

static int tracing_enum_map_open(struct inode *inode, struct file *filp)
{
	if (tracing_disabled)
		return -ENODEV;

	return seq_open(filp, &tracing_enum_map_seq_ops);
}

static const struct file_operations tracing_enum_map_fops = {
	.open		= tracing_enum_map_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= seq_release,
};

static inline union trace_enum_map_item *
trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
{
	/* Return tail of array given the head */
	return ptr + ptr->head.length + 1;
}

static void
trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
			   int len)
{
	struct trace_enum_map **stop;
	struct trace_enum_map **map;
	union trace_enum_map_item *map_array;
	union trace_enum_map_item *ptr;

	stop = start + len;

	/*
	 * The trace_enum_maps contains the map plus a head and tail item,
	 * where the head holds the module and length of array, and the
	 * tail holds a pointer to the next list.
	 */
	map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
	if (!map_array) {
4267
		pr_warn("Unable to allocate trace enum mapping\n");
4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312
		return;
	}

	mutex_lock(&trace_enum_mutex);

	if (!trace_enum_maps)
		trace_enum_maps = map_array;
	else {
		ptr = trace_enum_maps;
		for (;;) {
			ptr = trace_enum_jmp_to_tail(ptr);
			if (!ptr->tail.next)
				break;
			ptr = ptr->tail.next;

		}
		ptr->tail.next = map_array;
	}
	map_array->head.mod = mod;
	map_array->head.length = len;
	map_array++;

	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
		map_array->map = **map;
		map_array++;
	}
	memset(map_array, 0, sizeof(*map_array));

	mutex_unlock(&trace_enum_mutex);
}

static void trace_create_enum_file(struct dentry *d_tracer)
{
	trace_create_file("enum_map", 0444, d_tracer,
			  NULL, &tracing_enum_map_fops);
}

#else /* CONFIG_TRACE_ENUM_MAP_FILE */
static inline void trace_create_enum_file(struct dentry *d_tracer) { }
static inline void trace_insert_enum_map_file(struct module *mod,
			      struct trace_enum_map **start, int len) { }
#endif /* !CONFIG_TRACE_ENUM_MAP_FILE */

static void trace_insert_enum_map(struct module *mod,
				  struct trace_enum_map **start, int len)
4313 4314 4315 4316 4317 4318 4319 4320 4321
{
	struct trace_enum_map **map;

	if (len <= 0)
		return;

	map = start;

	trace_event_enum_update(map, len);
4322 4323

	trace_insert_enum_map_file(mod, start, len);
4324 4325
}

4326 4327 4328 4329
static ssize_t
tracing_set_trace_read(struct file *filp, char __user *ubuf,
		       size_t cnt, loff_t *ppos)
{
4330
	struct trace_array *tr = filp->private_data;
L
Li Zefan 已提交
4331
	char buf[MAX_TRACER_SIZE+2];
4332 4333 4334
	int r;

	mutex_lock(&trace_types_lock);
4335
	r = sprintf(buf, "%s\n", tr->current_trace->name);
4336 4337
	mutex_unlock(&trace_types_lock);

I
Ingo Molnar 已提交
4338
	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4339 4340
}

4341 4342
int tracer_init(struct tracer *t, struct trace_array *tr)
{
4343
	tracing_reset_online_cpus(&tr->trace_buffer);
4344 4345 4346
	return t->init(tr);
}

4347
static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4348 4349
{
	int cpu;
4350

4351
	for_each_tracing_cpu(cpu)
4352
		per_cpu_ptr(buf->data, cpu)->entries = val;
4353 4354
}

4355
#ifdef CONFIG_TRACER_MAX_TRACE
4356
/* resize @tr's buffer to the size of @size_tr's entries */
4357 4358
static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
					struct trace_buffer *size_buf, int cpu_id)
4359 4360 4361 4362 4363
{
	int cpu, ret = 0;

	if (cpu_id == RING_BUFFER_ALL_CPUS) {
		for_each_tracing_cpu(cpu) {
4364 4365
			ret = ring_buffer_resize(trace_buf->buffer,
				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4366 4367
			if (ret < 0)
				break;
4368 4369
			per_cpu_ptr(trace_buf->data, cpu)->entries =
				per_cpu_ptr(size_buf->data, cpu)->entries;
4370 4371
		}
	} else {
4372 4373
		ret = ring_buffer_resize(trace_buf->buffer,
				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4374
		if (ret == 0)
4375 4376
			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
				per_cpu_ptr(size_buf->data, cpu_id)->entries;
4377 4378 4379 4380
	}

	return ret;
}
4381
#endif /* CONFIG_TRACER_MAX_TRACE */
4382

4383 4384
static int __tracing_resize_ring_buffer(struct trace_array *tr,
					unsigned long size, int cpu)
4385 4386 4387 4388 4389
{
	int ret;

	/*
	 * If kernel or user changes the size of the ring buffer
4390 4391
	 * we use the size that was given, and we can forget about
	 * expanding it later.
4392
	 */
4393
	ring_buffer_expanded = true;
4394

4395
	/* May be called before buffers are initialized */
4396
	if (!tr->trace_buffer.buffer)
4397 4398
		return 0;

4399
	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4400 4401 4402
	if (ret < 0)
		return ret;

4403
#ifdef CONFIG_TRACER_MAX_TRACE
4404 4405
	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
	    !tr->current_trace->use_max_tr)
4406 4407
		goto out;

4408
	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4409
	if (ret < 0) {
4410 4411
		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
						     &tr->trace_buffer, cpu);
4412
		if (r < 0) {
4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426
			/*
			 * AARGH! We are left with different
			 * size max buffer!!!!
			 * The max buffer is our "snapshot" buffer.
			 * When a tracer needs a snapshot (one of the
			 * latency tracers), it swaps the max buffer
			 * with the saved snap shot. We succeeded to
			 * update the size of the main buffer, but failed to
			 * update the size of the max buffer. But when we tried
			 * to reset the main buffer to the original size, we
			 * failed there too. This is very unlikely to
			 * happen, but if it does, warn and kill all
			 * tracing.
			 */
4427 4428 4429 4430 4431 4432
			WARN_ON(1);
			tracing_disabled = 1;
		}
		return ret;
	}

4433
	if (cpu == RING_BUFFER_ALL_CPUS)
4434
		set_buffer_entries(&tr->max_buffer, size);
4435
	else
4436
		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4437

4438
 out:
4439 4440
#endif /* CONFIG_TRACER_MAX_TRACE */

4441
	if (cpu == RING_BUFFER_ALL_CPUS)
4442
		set_buffer_entries(&tr->trace_buffer, size);
4443
	else
4444
		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4445 4446 4447 4448

	return ret;
}

4449 4450
static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
					  unsigned long size, int cpu_id)
4451
{
4452
	int ret = size;
4453 4454 4455

	mutex_lock(&trace_types_lock);

4456 4457 4458 4459 4460 4461 4462
	if (cpu_id != RING_BUFFER_ALL_CPUS) {
		/* make sure, this cpu is enabled in the mask */
		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
			ret = -EINVAL;
			goto out;
		}
	}
4463

4464
	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4465 4466 4467
	if (ret < 0)
		ret = -ENOMEM;

4468
out:
4469 4470 4471 4472 4473
	mutex_unlock(&trace_types_lock);

	return ret;
}

4474

4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488
/**
 * tracing_update_buffers - used by tracing facility to expand ring buffers
 *
 * To save on memory when the tracing is never used on a system with it
 * configured in. The ring buffers are set to a minimum size. But once
 * a user starts to use the tracing facility, then they need to grow
 * to their default size.
 *
 * This function is to be called when a tracer is about to be used.
 */
int tracing_update_buffers(void)
{
	int ret = 0;

4489
	mutex_lock(&trace_types_lock);
4490
	if (!ring_buffer_expanded)
4491
		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4492
						RING_BUFFER_ALL_CPUS);
4493
	mutex_unlock(&trace_types_lock);
4494 4495 4496 4497

	return ret;
}

4498 4499
struct trace_option_dentry;

4500
static void
4501
create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4502

4503 4504 4505 4506 4507 4508 4509 4510 4511
/*
 * Used to clear out the tracer before deletion of an instance.
 * Must have trace_types_lock held.
 */
static void tracing_set_nop(struct trace_array *tr)
{
	if (tr->current_trace == &nop_trace)
		return;
	
4512
	tr->current_trace->enabled--;
4513 4514 4515 4516 4517 4518 4519

	if (tr->current_trace->reset)
		tr->current_trace->reset(tr);

	tr->current_trace = &nop_trace;
}

4520
static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4521
{
4522 4523 4524 4525
	/* Only enable if the directory has been created already. */
	if (!tr->dir)
		return;

4526
	create_trace_option_files(tr, t);
4527 4528 4529 4530
}

static int tracing_set_tracer(struct trace_array *tr, const char *buf)
{
4531
	struct tracer *t;
4532
#ifdef CONFIG_TRACER_MAX_TRACE
4533
	bool had_max_tr;
4534
#endif
4535
	int ret = 0;
4536

4537 4538
	mutex_lock(&trace_types_lock);

4539
	if (!ring_buffer_expanded) {
4540
		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4541
						RING_BUFFER_ALL_CPUS);
4542
		if (ret < 0)
4543
			goto out;
4544 4545 4546
		ret = 0;
	}

4547 4548 4549 4550
	for (t = trace_types; t; t = t->next) {
		if (strcmp(t->name, buf) == 0)
			break;
	}
4551 4552 4553 4554
	if (!t) {
		ret = -EINVAL;
		goto out;
	}
4555
	if (t == tr->current_trace)
4556 4557
		goto out;

4558 4559 4560 4561 4562 4563
	/* Some tracers are only allowed for the top level buffer */
	if (!trace_ok_for_array(t, tr)) {
		ret = -EINVAL;
		goto out;
	}

4564 4565 4566 4567 4568 4569
	/* If trace pipe files are being read, we can't change the tracer */
	if (tr->current_trace->ref) {
		ret = -EBUSY;
		goto out;
	}

4570
	trace_branch_disable();
4571

4572
	tr->current_trace->enabled--;
4573

4574 4575
	if (tr->current_trace->reset)
		tr->current_trace->reset(tr);
4576

4577
	/* Current trace needs to be nop_trace before synchronize_sched */
4578
	tr->current_trace = &nop_trace;
4579

4580 4581
#ifdef CONFIG_TRACER_MAX_TRACE
	had_max_tr = tr->allocated_snapshot;
4582 4583 4584 4585 4586 4587 4588 4589 4590 4591

	if (had_max_tr && !t->use_max_tr) {
		/*
		 * We need to make sure that the update_max_tr sees that
		 * current_trace changed to nop_trace to keep it from
		 * swapping the buffers after we resize it.
		 * The update_max_tr is called from interrupts disabled
		 * so a synchronized_sched() is sufficient.
		 */
		synchronize_sched();
4592
		free_snapshot(tr);
4593
	}
4594 4595 4596
#endif

#ifdef CONFIG_TRACER_MAX_TRACE
4597
	if (t->use_max_tr && !had_max_tr) {
4598
		ret = alloc_snapshot(tr);
4599 4600
		if (ret < 0)
			goto out;
4601
	}
4602
#endif
4603

4604
	if (t->init) {
4605
		ret = tracer_init(t, tr);
4606 4607 4608
		if (ret)
			goto out;
	}
4609

4610
	tr->current_trace = t;
4611
	tr->current_trace->enabled++;
4612
	trace_branch_enable(tr);
4613 4614 4615
 out:
	mutex_unlock(&trace_types_lock);

4616 4617 4618 4619 4620 4621 4622
	return ret;
}

static ssize_t
tracing_set_trace_write(struct file *filp, const char __user *ubuf,
			size_t cnt, loff_t *ppos)
{
4623
	struct trace_array *tr = filp->private_data;
L
Li Zefan 已提交
4624
	char buf[MAX_TRACER_SIZE+1];
4625 4626
	int i;
	size_t ret;
4627 4628 4629
	int err;

	ret = cnt;
4630

L
Li Zefan 已提交
4631 4632
	if (cnt > MAX_TRACER_SIZE)
		cnt = MAX_TRACER_SIZE;
4633

4634
	if (copy_from_user(buf, ubuf, cnt))
4635 4636 4637 4638 4639 4640 4641 4642
		return -EFAULT;

	buf[cnt] = 0;

	/* strip ending whitespace. */
	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
		buf[i] = 0;

4643
	err = tracing_set_tracer(tr, buf);
4644 4645
	if (err)
		return err;
4646

4647
	*ppos += ret;
4648

4649
	return ret;
4650 4651 4652
}

static ssize_t
4653 4654
tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
		   size_t cnt, loff_t *ppos)
4655 4656 4657 4658
{
	char buf[64];
	int r;

S
Steven Rostedt 已提交
4659
	r = snprintf(buf, sizeof(buf), "%ld\n",
4660
		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
S
Steven Rostedt 已提交
4661 4662
	if (r > sizeof(buf))
		r = sizeof(buf);
I
Ingo Molnar 已提交
4663
	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4664 4665 4666
}

static ssize_t
4667 4668
tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
		    size_t cnt, loff_t *ppos)
4669
{
4670
	unsigned long val;
4671
	int ret;
4672

4673 4674
	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
	if (ret)
4675
		return ret;
4676 4677 4678 4679 4680 4681

	*ptr = val * 1000;

	return cnt;
}

4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713
static ssize_t
tracing_thresh_read(struct file *filp, char __user *ubuf,
		    size_t cnt, loff_t *ppos)
{
	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
}

static ssize_t
tracing_thresh_write(struct file *filp, const char __user *ubuf,
		     size_t cnt, loff_t *ppos)
{
	struct trace_array *tr = filp->private_data;
	int ret;

	mutex_lock(&trace_types_lock);
	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
	if (ret < 0)
		goto out;

	if (tr->current_trace->update_thresh) {
		ret = tr->current_trace->update_thresh(tr);
		if (ret < 0)
			goto out;
	}

	ret = cnt;
out:
	mutex_unlock(&trace_types_lock);

	return ret;
}

4714 4715
#ifdef CONFIG_TRACER_MAX_TRACE

4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729
static ssize_t
tracing_max_lat_read(struct file *filp, char __user *ubuf,
		     size_t cnt, loff_t *ppos)
{
	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
}

static ssize_t
tracing_max_lat_write(struct file *filp, const char __user *ubuf,
		      size_t cnt, loff_t *ppos)
{
	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
}

4730 4731
#endif

4732 4733
static int tracing_open_pipe(struct inode *inode, struct file *filp)
{
4734
	struct trace_array *tr = inode->i_private;
4735
	struct trace_iterator *iter;
4736
	int ret = 0;
4737 4738 4739 4740

	if (tracing_disabled)
		return -ENODEV;

4741 4742 4743
	if (trace_array_get(tr) < 0)
		return -ENODEV;

4744 4745
	mutex_lock(&trace_types_lock);

4746 4747
	/* create a buffer to store the information to pass to userspace */
	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4748 4749
	if (!iter) {
		ret = -ENOMEM;
4750
		__trace_array_put(tr);
4751 4752
		goto out;
	}
4753

4754
	trace_seq_init(&iter->seq);
4755
	iter->trace = tr->current_trace;
4756

4757
	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4758
		ret = -ENOMEM;
4759
		goto fail;
4760 4761
	}

4762
	/* trace pipe does not show start of buffer */
4763
	cpumask_setall(iter->started);
4764

4765
	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4766 4767
		iter->iter_flags |= TRACE_FILE_LAT_FMT;

4768
	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4769
	if (trace_clocks[tr->clock_id].in_ns)
4770 4771
		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;

4772 4773 4774
	iter->tr = tr;
	iter->trace_buffer = &tr->trace_buffer;
	iter->cpu_file = tracing_get_cpu(inode);
4775
	mutex_init(&iter->mutex);
4776 4777
	filp->private_data = iter;

4778 4779 4780
	if (iter->trace->pipe_open)
		iter->trace->pipe_open(iter);

4781
	nonseekable_open(inode, filp);
4782 4783

	tr->current_trace->ref++;
4784 4785 4786
out:
	mutex_unlock(&trace_types_lock);
	return ret;
4787 4788 4789 4790

fail:
	kfree(iter->trace);
	kfree(iter);
4791
	__trace_array_put(tr);
4792 4793
	mutex_unlock(&trace_types_lock);
	return ret;
4794 4795 4796 4797 4798
}

static int tracing_release_pipe(struct inode *inode, struct file *file)
{
	struct trace_iterator *iter = file->private_data;
4799
	struct trace_array *tr = inode->i_private;
4800

4801 4802
	mutex_lock(&trace_types_lock);

4803 4804
	tr->current_trace->ref--;

4805
	if (iter->trace->pipe_close)
S
Steven Rostedt 已提交
4806 4807
		iter->trace->pipe_close(iter);

4808 4809
	mutex_unlock(&trace_types_lock);

4810
	free_cpumask_var(iter->started);
4811
	mutex_destroy(&iter->mutex);
4812 4813
	kfree(iter);

4814 4815
	trace_array_put(tr);

4816 4817 4818
	return 0;
}

4819
static unsigned int
4820
trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4821
{
4822 4823
	struct trace_array *tr = iter->tr;

4824 4825 4826
	/* Iterators are static, they should be filled or empty */
	if (trace_buffer_iter(iter, iter->cpu_file))
		return POLLIN | POLLRDNORM;
4827

4828
	if (tr->trace_flags & TRACE_ITER_BLOCK)
4829 4830 4831 4832
		/*
		 * Always select as readable when in blocking mode
		 */
		return POLLIN | POLLRDNORM;
4833
	else
4834
		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4835
					     filp, poll_table);
4836 4837
}

4838 4839 4840 4841 4842 4843
static unsigned int
tracing_poll_pipe(struct file *filp, poll_table *poll_table)
{
	struct trace_iterator *iter = filp->private_data;

	return trace_poll(iter, filp, poll_table);
4844 4845
}

4846
/* Must be called with iter->mutex held. */
4847
static int tracing_wait_pipe(struct file *filp)
4848 4849
{
	struct trace_iterator *iter = filp->private_data;
4850
	int ret;
4851 4852

	while (trace_empty(iter)) {
4853

4854
		if ((filp->f_flags & O_NONBLOCK)) {
4855
			return -EAGAIN;
4856
		}
4857

4858
		/*
L
Liu Bo 已提交
4859
		 * We block until we read something and tracing is disabled.
4860 4861 4862 4863 4864 4865 4866
		 * We still block if tracing is disabled, but we have never
		 * read anything. This allows a user to cat this file, and
		 * then enable tracing. But after we have read something,
		 * we give an EOF when tracing is again disabled.
		 *
		 * iter->pos will be 0 if we haven't read anything.
		 */
4867
		if (!tracing_is_on() && iter->pos)
4868
			break;
4869 4870 4871

		mutex_unlock(&iter->mutex);

4872
		ret = wait_on_pipe(iter, false);
4873 4874 4875

		mutex_lock(&iter->mutex);

4876 4877
		if (ret)
			return ret;
4878 4879
	}

4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897
	return 1;
}

/*
 * Consumer reader.
 */
static ssize_t
tracing_read_pipe(struct file *filp, char __user *ubuf,
		  size_t cnt, loff_t *ppos)
{
	struct trace_iterator *iter = filp->private_data;
	ssize_t sret;

	/* return any leftover data */
	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
	if (sret != -EBUSY)
		return sret;

4898
	trace_seq_init(&iter->seq);
4899

4900 4901 4902 4903 4904 4905
	/*
	 * Avoid more than one consumer on a single file descriptor
	 * This is just a matter of traces coherency, the ring buffer itself
	 * is protected.
	 */
	mutex_lock(&iter->mutex);
4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916
	if (iter->trace->read) {
		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
		if (sret)
			goto out;
	}

waitagain:
	sret = tracing_wait_pipe(filp);
	if (sret <= 0)
		goto out;

4917
	/* stop when tracing is finished */
4918 4919
	if (trace_empty(iter)) {
		sret = 0;
4920
		goto out;
4921
	}
4922 4923 4924 4925

	if (cnt >= PAGE_SIZE)
		cnt = PAGE_SIZE - 1;

4926 4927 4928 4929
	/* reset all but tr, trace, and overruns */
	memset(&iter->seq, 0,
	       sizeof(struct trace_iterator) -
	       offsetof(struct trace_iterator, seq));
4930
	cpumask_clear(iter->started);
4931
	iter->pos = -1;
4932

4933
	trace_event_read_lock();
4934
	trace_access_lock(iter->cpu_file);
4935
	while (trace_find_next_entry_inc(iter) != NULL) {
4936
		enum print_line_t ret;
4937
		int save_len = iter->seq.seq.len;
S
Steven Rostedt 已提交
4938

I
Ingo Molnar 已提交
4939
		ret = print_trace_line(iter);
4940
		if (ret == TRACE_TYPE_PARTIAL_LINE) {
S
Steven Rostedt 已提交
4941
			/* don't print partial lines */
4942
			iter->seq.seq.len = save_len;
4943
			break;
S
Steven Rostedt 已提交
4944
		}
4945 4946
		if (ret != TRACE_TYPE_NO_CONSUME)
			trace_consume(iter);
4947

4948
		if (trace_seq_used(&iter->seq) >= cnt)
4949
			break;
4950 4951 4952 4953 4954 4955 4956 4957

		/*
		 * Setting the full flag means we reached the trace_seq buffer
		 * size and we should leave by partial output condition above.
		 * One of the trace_seq_* functions is not used properly.
		 */
		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
			  iter->ent->type);
4958
	}
4959
	trace_access_unlock(iter->cpu_file);
4960
	trace_event_read_unlock();
4961 4962

	/* Now copy what we have to the user */
4963
	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4964
	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
4965
		trace_seq_init(&iter->seq);
P
Pekka Paalanen 已提交
4966 4967

	/*
L
Lucas De Marchi 已提交
4968
	 * If there was nothing to send to user, in spite of consuming trace
P
Pekka Paalanen 已提交
4969 4970
	 * entries, go back to wait for more entries.
	 */
4971
	if (sret == -EBUSY)
P
Pekka Paalanen 已提交
4972
		goto waitagain;
4973

4974
out:
4975
	mutex_unlock(&iter->mutex);
4976

4977
	return sret;
4978 4979
}

4980 4981 4982 4983 4984 4985
static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
				     unsigned int idx)
{
	__free_page(spd->pages[idx]);
}

4986
static const struct pipe_buf_operations tracing_pipe_buf_ops = {
S
Steven Rostedt 已提交
4987 4988
	.can_merge		= 0,
	.confirm		= generic_pipe_buf_confirm,
4989
	.release		= generic_pipe_buf_release,
S
Steven Rostedt 已提交
4990 4991
	.steal			= generic_pipe_buf_steal,
	.get			= generic_pipe_buf_get,
4992 4993
};

S
Steven Rostedt 已提交
4994
static size_t
4995
tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
S
Steven Rostedt 已提交
4996 4997
{
	size_t count;
4998
	int save_len;
S
Steven Rostedt 已提交
4999 5000 5001 5002
	int ret;

	/* Seq buffer is page-sized, exactly what we need. */
	for (;;) {
5003
		save_len = iter->seq.seq.len;
S
Steven Rostedt 已提交
5004
		ret = print_trace_line(iter);
5005 5006 5007

		if (trace_seq_has_overflowed(&iter->seq)) {
			iter->seq.seq.len = save_len;
S
Steven Rostedt 已提交
5008 5009
			break;
		}
5010 5011 5012 5013 5014 5015

		/*
		 * This should not be hit, because it should only
		 * be set if the iter->seq overflowed. But check it
		 * anyway to be safe.
		 */
S
Steven Rostedt 已提交
5016
		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5017 5018 5019 5020
			iter->seq.seq.len = save_len;
			break;
		}

5021
		count = trace_seq_used(&iter->seq) - save_len;
5022 5023 5024
		if (rem < count) {
			rem = 0;
			iter->seq.seq.len = save_len;
S
Steven Rostedt 已提交
5025 5026 5027
			break;
		}

5028 5029
		if (ret != TRACE_TYPE_NO_CONSUME)
			trace_consume(iter);
S
Steven Rostedt 已提交
5030
		rem -= count;
5031
		if (!trace_find_next_entry_inc(iter))	{
S
Steven Rostedt 已提交
5032 5033 5034 5035 5036 5037 5038 5039 5040
			rem = 0;
			iter->ent = NULL;
			break;
		}
	}

	return rem;
}

5041 5042 5043 5044 5045 5046
static ssize_t tracing_splice_read_pipe(struct file *filp,
					loff_t *ppos,
					struct pipe_inode_info *pipe,
					size_t len,
					unsigned int flags)
{
5047 5048
	struct page *pages_def[PIPE_DEF_BUFFERS];
	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5049 5050
	struct trace_iterator *iter = filp->private_data;
	struct splice_pipe_desc spd = {
5051 5052
		.pages		= pages_def,
		.partial	= partial_def,
S
Steven Rostedt 已提交
5053
		.nr_pages	= 0, /* This gets updated below. */
5054
		.nr_pages_max	= PIPE_DEF_BUFFERS,
S
Steven Rostedt 已提交
5055 5056 5057
		.flags		= flags,
		.ops		= &tracing_pipe_buf_ops,
		.spd_release	= tracing_spd_release_pipe,
5058 5059
	};
	ssize_t ret;
S
Steven Rostedt 已提交
5060
	size_t rem;
5061 5062
	unsigned int i;

5063 5064 5065
	if (splice_grow_spd(pipe, &spd))
		return -ENOMEM;

5066
	mutex_lock(&iter->mutex);
5067 5068 5069 5070 5071

	if (iter->trace->splice_read) {
		ret = iter->trace->splice_read(iter, filp,
					       ppos, pipe, len, flags);
		if (ret)
S
Steven Rostedt 已提交
5072
			goto out_err;
5073 5074 5075 5076
	}

	ret = tracing_wait_pipe(filp);
	if (ret <= 0)
S
Steven Rostedt 已提交
5077
		goto out_err;
5078

5079
	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5080
		ret = -EFAULT;
S
Steven Rostedt 已提交
5081
		goto out_err;
5082 5083
	}

5084
	trace_event_read_lock();
5085
	trace_access_lock(iter->cpu_file);
5086

5087
	/* Fill as many pages as possible. */
5088
	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5089 5090
		spd.pages[i] = alloc_page(GFP_KERNEL);
		if (!spd.pages[i])
S
Steven Rostedt 已提交
5091
			break;
5092

5093
		rem = tracing_fill_pipe_page(rem, iter);
5094 5095 5096

		/* Copy the data into the page, so we can start over. */
		ret = trace_seq_to_buffer(&iter->seq,
5097
					  page_address(spd.pages[i]),
5098
					  trace_seq_used(&iter->seq));
5099
		if (ret < 0) {
5100
			__free_page(spd.pages[i]);
5101 5102
			break;
		}
5103
		spd.partial[i].offset = 0;
5104
		spd.partial[i].len = trace_seq_used(&iter->seq);
5105

5106
		trace_seq_init(&iter->seq);
5107 5108
	}

5109
	trace_access_unlock(iter->cpu_file);
5110
	trace_event_read_unlock();
5111
	mutex_unlock(&iter->mutex);
5112 5113 5114

	spd.nr_pages = i;

5115 5116 5117 5118
	if (i)
		ret = splice_to_pipe(pipe, &spd);
	else
		ret = 0;
5119
out:
5120
	splice_shrink_spd(&spd);
5121
	return ret;
5122

S
Steven Rostedt 已提交
5123
out_err:
5124
	mutex_unlock(&iter->mutex);
5125
	goto out;
5126 5127
}

5128 5129 5130 5131
static ssize_t
tracing_entries_read(struct file *filp, char __user *ubuf,
		     size_t cnt, loff_t *ppos)
{
5132 5133 5134
	struct inode *inode = file_inode(filp);
	struct trace_array *tr = inode->i_private;
	int cpu = tracing_get_cpu(inode);
5135 5136 5137
	char buf[64];
	int r = 0;
	ssize_t ret;
5138

5139
	mutex_lock(&trace_types_lock);
5140

5141
	if (cpu == RING_BUFFER_ALL_CPUS) {
5142 5143 5144 5145 5146 5147 5148 5149 5150
		int cpu, buf_size_same;
		unsigned long size;

		size = 0;
		buf_size_same = 1;
		/* check if all cpu sizes are same */
		for_each_tracing_cpu(cpu) {
			/* fill in the size from first enabled cpu */
			if (size == 0)
5151 5152
				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167
				buf_size_same = 0;
				break;
			}
		}

		if (buf_size_same) {
			if (!ring_buffer_expanded)
				r = sprintf(buf, "%lu (expanded: %lu)\n",
					    size >> 10,
					    trace_buf_size >> 10);
			else
				r = sprintf(buf, "%lu\n", size >> 10);
		} else
			r = sprintf(buf, "X\n");
	} else
5168
		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5169

5170 5171
	mutex_unlock(&trace_types_lock);

5172 5173
	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
	return ret;
5174 5175 5176 5177 5178 5179
}

static ssize_t
tracing_entries_write(struct file *filp, const char __user *ubuf,
		      size_t cnt, loff_t *ppos)
{
5180 5181
	struct inode *inode = file_inode(filp);
	struct trace_array *tr = inode->i_private;
5182
	unsigned long val;
5183
	int ret;
5184

5185 5186
	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
	if (ret)
5187
		return ret;
5188 5189 5190 5191 5192

	/* must have at least 1 entry */
	if (!val)
		return -EINVAL;

5193 5194
	/* value is in KB */
	val <<= 10;
5195
	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5196 5197
	if (ret < 0)
		return ret;
5198

5199
	*ppos += cnt;
5200

5201 5202
	return cnt;
}
S
Steven Rostedt 已提交
5203

5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214
static ssize_t
tracing_total_entries_read(struct file *filp, char __user *ubuf,
				size_t cnt, loff_t *ppos)
{
	struct trace_array *tr = filp->private_data;
	char buf[64];
	int r, cpu;
	unsigned long size = 0, expanded_size = 0;

	mutex_lock(&trace_types_lock);
	for_each_tracing_cpu(cpu) {
5215
		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227
		if (!ring_buffer_expanded)
			expanded_size += trace_buf_size >> 10;
	}
	if (ring_buffer_expanded)
		r = sprintf(buf, "%lu\n", size);
	else
		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
	mutex_unlock(&trace_types_lock);

	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}

5228 5229 5230 5231 5232 5233 5234 5235 5236 5237
static ssize_t
tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
			  size_t cnt, loff_t *ppos)
{
	/*
	 * There is no need to read what the user has written, this function
	 * is just to make sure that there is no error when "echo" is used
	 */

	*ppos += cnt;
5238 5239 5240 5241

	return cnt;
}

5242 5243 5244
static int
tracing_free_buffer_release(struct inode *inode, struct file *filp)
{
5245 5246
	struct trace_array *tr = inode->i_private;

5247
	/* disable tracing ? */
5248
	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5249
		tracer_tracing_off(tr);
5250
	/* resize the ring buffer to 0 */
5251
	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5252

5253 5254
	trace_array_put(tr);

5255 5256 5257
	return 0;
}

5258 5259 5260 5261
static ssize_t
tracing_mark_write(struct file *filp, const char __user *ubuf,
					size_t cnt, loff_t *fpos)
{
5262
	unsigned long addr = (unsigned long)ubuf;
5263
	struct trace_array *tr = filp->private_data;
5264 5265 5266 5267 5268
	struct ring_buffer_event *event;
	struct ring_buffer *buffer;
	struct print_entry *entry;
	unsigned long irq_flags;
	struct page *pages[2];
5269
	void *map_page[2];
5270 5271 5272 5273 5274 5275
	int nr_pages = 1;
	ssize_t written;
	int offset;
	int size;
	int len;
	int ret;
5276
	int i;
5277

S
Steven Rostedt 已提交
5278
	if (tracing_disabled)
5279 5280
		return -EINVAL;

5281
	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5282 5283
		return -EINVAL;

5284 5285 5286
	if (cnt > TRACE_BUF_SIZE)
		cnt = TRACE_BUF_SIZE;

5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301
	/*
	 * Userspace is injecting traces into the kernel trace buffer.
	 * We want to be as non intrusive as possible.
	 * To do so, we do not want to allocate any special buffers
	 * or take any locks, but instead write the userspace data
	 * straight into the ring buffer.
	 *
	 * First we need to pin the userspace buffer into memory,
	 * which, most likely it is, because it just referenced it.
	 * But there's no guarantee that it is. By using get_user_pages_fast()
	 * and kmap_atomic/kunmap_atomic() we can get access to the
	 * pages directly. We then write the data directly into the
	 * ring buffer.
	 */
	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5302

5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315
	/* check if we cross pages */
	if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
		nr_pages = 2;

	offset = addr & (PAGE_SIZE - 1);
	addr &= PAGE_MASK;

	ret = get_user_pages_fast(addr, nr_pages, 0, pages);
	if (ret < nr_pages) {
		while (--ret >= 0)
			put_page(pages[ret]);
		written = -EFAULT;
		goto out;
5316
	}
5317

5318 5319
	for (i = 0; i < nr_pages; i++)
		map_page[i] = kmap_atomic(pages[i]);
5320 5321 5322

	local_save_flags(irq_flags);
	size = sizeof(*entry) + cnt + 2; /* possible \n added */
5323
	buffer = tr->trace_buffer.buffer;
5324 5325 5326 5327 5328 5329
	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
					  irq_flags, preempt_count());
	if (!event) {
		/* Ring buffer disabled, return as if not open for write */
		written = -EBADF;
		goto out_unlock;
5330
	}
5331 5332 5333 5334 5335 5336

	entry = ring_buffer_event_data(event);
	entry->ip = _THIS_IP_;

	if (nr_pages == 2) {
		len = PAGE_SIZE - offset;
5337 5338
		memcpy(&entry->buf, map_page[0] + offset, len);
		memcpy(&entry->buf[len], map_page[1], cnt - len);
C
Carsten Emde 已提交
5339
	} else
5340
		memcpy(&entry->buf, map_page[0] + offset, cnt);
5341

5342 5343 5344 5345 5346 5347
	if (entry->buf[cnt - 1] != '\n') {
		entry->buf[cnt] = '\n';
		entry->buf[cnt + 1] = '\0';
	} else
		entry->buf[cnt] = '\0';

5348
	__buffer_unlock_commit(buffer, event);
5349

5350
	written = cnt;
5351

5352
	*fpos += written;
5353

5354
 out_unlock:
5355
	for (i = nr_pages - 1; i >= 0; i--) {
5356 5357 5358
		kunmap_atomic(map_page[i]);
		put_page(pages[i]);
	}
5359
 out:
5360
	return written;
5361 5362
}

L
Li Zefan 已提交
5363
static int tracing_clock_show(struct seq_file *m, void *v)
5364
{
5365
	struct trace_array *tr = m->private;
5366 5367 5368
	int i;

	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
L
Li Zefan 已提交
5369
		seq_printf(m,
5370
			"%s%s%s%s", i ? " " : "",
5371 5372
			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
			i == tr->clock_id ? "]" : "");
L
Li Zefan 已提交
5373
	seq_putc(m, '\n');
5374

L
Li Zefan 已提交
5375
	return 0;
5376 5377
}

5378
static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390
{
	int i;

	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
		if (strcmp(trace_clocks[i].name, clockstr) == 0)
			break;
	}
	if (i == ARRAY_SIZE(trace_clocks))
		return -EINVAL;

	mutex_lock(&trace_types_lock);

5391 5392
	tr->clock_id = i;

5393
	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5394

5395 5396 5397 5398
	/*
	 * New clock may not be consistent with the previous clock.
	 * Reset the buffer so that it doesn't have incomparable timestamps.
	 */
5399
	tracing_reset_online_cpus(&tr->trace_buffer);
5400 5401 5402 5403

#ifdef CONFIG_TRACER_MAX_TRACE
	if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5404
	tracing_reset_online_cpus(&tr->max_buffer);
5405
#endif
5406

5407 5408
	mutex_unlock(&trace_types_lock);

5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423
	return 0;
}

static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
				   size_t cnt, loff_t *fpos)
{
	struct seq_file *m = filp->private_data;
	struct trace_array *tr = m->private;
	char buf[64];
	const char *clockstr;
	int ret;

	if (cnt >= sizeof(buf))
		return -EINVAL;

5424
	if (copy_from_user(buf, ubuf, cnt))
5425 5426 5427 5428 5429 5430 5431 5432 5433 5434
		return -EFAULT;

	buf[cnt] = 0;

	clockstr = strstrip(buf);

	ret = tracing_set_clock(tr, clockstr);
	if (ret)
		return ret;

5435 5436 5437 5438 5439
	*fpos += cnt;

	return cnt;
}

L
Li Zefan 已提交
5440 5441
static int tracing_clock_open(struct inode *inode, struct file *file)
{
5442 5443 5444
	struct trace_array *tr = inode->i_private;
	int ret;

L
Li Zefan 已提交
5445 5446
	if (tracing_disabled)
		return -ENODEV;
5447

5448 5449 5450 5451 5452 5453 5454 5455
	if (trace_array_get(tr))
		return -ENODEV;

	ret = single_open(file, tracing_clock_show, inode->i_private);
	if (ret < 0)
		trace_array_put(tr);

	return ret;
L
Li Zefan 已提交
5456 5457
}

5458 5459 5460 5461 5462 5463
struct ftrace_buffer_info {
	struct trace_iterator	iter;
	void			*spare;
	unsigned int		read;
};

5464 5465 5466
#ifdef CONFIG_TRACER_SNAPSHOT
static int tracing_snapshot_open(struct inode *inode, struct file *file)
{
5467
	struct trace_array *tr = inode->i_private;
5468
	struct trace_iterator *iter;
5469
	struct seq_file *m;
5470 5471
	int ret = 0;

5472 5473 5474
	if (trace_array_get(tr) < 0)
		return -ENODEV;

5475
	if (file->f_mode & FMODE_READ) {
5476
		iter = __tracing_open(inode, file, true);
5477 5478
		if (IS_ERR(iter))
			ret = PTR_ERR(iter);
5479 5480
	} else {
		/* Writes still need the seq_file to hold the private data */
5481
		ret = -ENOMEM;
5482 5483
		m = kzalloc(sizeof(*m), GFP_KERNEL);
		if (!m)
5484
			goto out;
5485 5486 5487
		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
		if (!iter) {
			kfree(m);
5488
			goto out;
5489
		}
5490 5491
		ret = 0;

5492
		iter->tr = tr;
5493 5494
		iter->trace_buffer = &tr->max_buffer;
		iter->cpu_file = tracing_get_cpu(inode);
5495 5496
		m->private = iter;
		file->private_data = m;
5497
	}
5498
out:
5499 5500 5501
	if (ret < 0)
		trace_array_put(tr);

5502 5503 5504 5505 5506 5507 5508
	return ret;
}

static ssize_t
tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
		       loff_t *ppos)
{
5509 5510 5511
	struct seq_file *m = filp->private_data;
	struct trace_iterator *iter = m->private;
	struct trace_array *tr = iter->tr;
5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524
	unsigned long val;
	int ret;

	ret = tracing_update_buffers();
	if (ret < 0)
		return ret;

	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
	if (ret)
		return ret;

	mutex_lock(&trace_types_lock);

5525
	if (tr->current_trace->use_max_tr) {
5526 5527 5528 5529 5530 5531
		ret = -EBUSY;
		goto out;
	}

	switch (val) {
	case 0:
5532 5533 5534
		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
			ret = -EINVAL;
			break;
5535
		}
5536 5537
		if (tr->allocated_snapshot)
			free_snapshot(tr);
5538 5539
		break;
	case 1:
5540 5541 5542 5543 5544 5545 5546
/* Only allow per-cpu swap if the ring buffer supports it */
#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
			ret = -EINVAL;
			break;
		}
#endif
5547
		if (!tr->allocated_snapshot) {
5548
			ret = alloc_snapshot(tr);
5549 5550 5551 5552 5553
			if (ret < 0)
				break;
		}
		local_irq_disable();
		/* Now, we're going to swap */
5554
		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5555
			update_max_tr(tr, current, smp_processor_id());
5556
		else
5557
			update_max_tr_single(tr, current, iter->cpu_file);
5558 5559 5560
		local_irq_enable();
		break;
	default:
5561
		if (tr->allocated_snapshot) {
5562 5563 5564 5565 5566
			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
				tracing_reset_online_cpus(&tr->max_buffer);
			else
				tracing_reset(&tr->max_buffer, iter->cpu_file);
		}
5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577
		break;
	}

	if (ret >= 0) {
		*ppos += cnt;
		ret = cnt;
	}
out:
	mutex_unlock(&trace_types_lock);
	return ret;
}
5578 5579 5580 5581

static int tracing_snapshot_release(struct inode *inode, struct file *file)
{
	struct seq_file *m = file->private_data;
5582 5583 5584
	int ret;

	ret = tracing_release(inode, file);
5585 5586

	if (file->f_mode & FMODE_READ)
5587
		return ret;
5588 5589 5590 5591 5592 5593 5594 5595 5596

	/* If write only, the seq_file is just a stub */
	if (m)
		kfree(m->private);
	kfree(m);

	return 0;
}

5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625
static int tracing_buffers_open(struct inode *inode, struct file *filp);
static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
				    size_t count, loff_t *ppos);
static int tracing_buffers_release(struct inode *inode, struct file *file);
static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);

static int snapshot_raw_open(struct inode *inode, struct file *filp)
{
	struct ftrace_buffer_info *info;
	int ret;

	ret = tracing_buffers_open(inode, filp);
	if (ret < 0)
		return ret;

	info = filp->private_data;

	if (info->iter.trace->use_max_tr) {
		tracing_buffers_release(inode, filp);
		return -EBUSY;
	}

	info->iter.snapshot = true;
	info->iter.trace_buffer = &info->iter.tr->max_buffer;

	return ret;
}

5626 5627 5628
#endif /* CONFIG_TRACER_SNAPSHOT */


5629 5630 5631 5632 5633 5634 5635
static const struct file_operations tracing_thresh_fops = {
	.open		= tracing_open_generic,
	.read		= tracing_thresh_read,
	.write		= tracing_thresh_write,
	.llseek		= generic_file_llseek,
};

5636
#ifdef CONFIG_TRACER_MAX_TRACE
5637
static const struct file_operations tracing_max_lat_fops = {
I
Ingo Molnar 已提交
5638 5639 5640
	.open		= tracing_open_generic,
	.read		= tracing_max_lat_read,
	.write		= tracing_max_lat_write,
5641
	.llseek		= generic_file_llseek,
5642
};
5643
#endif
5644

5645
static const struct file_operations set_tracer_fops = {
I
Ingo Molnar 已提交
5646 5647 5648
	.open		= tracing_open_generic,
	.read		= tracing_set_trace_read,
	.write		= tracing_set_trace_write,
5649
	.llseek		= generic_file_llseek,
5650 5651
};

5652
static const struct file_operations tracing_pipe_fops = {
I
Ingo Molnar 已提交
5653
	.open		= tracing_open_pipe,
5654
	.poll		= tracing_poll_pipe,
I
Ingo Molnar 已提交
5655
	.read		= tracing_read_pipe,
5656
	.splice_read	= tracing_splice_read_pipe,
I
Ingo Molnar 已提交
5657
	.release	= tracing_release_pipe,
5658
	.llseek		= no_llseek,
5659 5660
};

5661
static const struct file_operations tracing_entries_fops = {
5662
	.open		= tracing_open_generic_tr,
5663 5664
	.read		= tracing_entries_read,
	.write		= tracing_entries_write,
5665
	.llseek		= generic_file_llseek,
5666
	.release	= tracing_release_generic_tr,
5667 5668
};

5669
static const struct file_operations tracing_total_entries_fops = {
5670
	.open		= tracing_open_generic_tr,
5671 5672
	.read		= tracing_total_entries_read,
	.llseek		= generic_file_llseek,
5673
	.release	= tracing_release_generic_tr,
5674 5675
};

5676
static const struct file_operations tracing_free_buffer_fops = {
5677
	.open		= tracing_open_generic_tr,
5678 5679 5680 5681
	.write		= tracing_free_buffer_write,
	.release	= tracing_free_buffer_release,
};

5682
static const struct file_operations tracing_mark_fops = {
5683
	.open		= tracing_open_generic_tr,
5684
	.write		= tracing_mark_write,
5685
	.llseek		= generic_file_llseek,
5686
	.release	= tracing_release_generic_tr,
5687 5688
};

5689
static const struct file_operations trace_clock_fops = {
L
Li Zefan 已提交
5690 5691 5692
	.open		= tracing_clock_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
5693
	.release	= tracing_single_release_tr,
5694 5695 5696
	.write		= tracing_clock_write,
};

5697 5698 5699 5700 5701
#ifdef CONFIG_TRACER_SNAPSHOT
static const struct file_operations snapshot_fops = {
	.open		= tracing_snapshot_open,
	.read		= seq_read,
	.write		= tracing_snapshot_write,
5702
	.llseek		= tracing_lseek,
5703
	.release	= tracing_snapshot_release,
5704 5705
};

5706 5707 5708 5709 5710 5711
static const struct file_operations snapshot_raw_fops = {
	.open		= snapshot_raw_open,
	.read		= tracing_buffers_read,
	.release	= tracing_buffers_release,
	.splice_read	= tracing_buffers_splice_read,
	.llseek		= no_llseek,
5712 5713
};

5714 5715
#endif /* CONFIG_TRACER_SNAPSHOT */

5716 5717
static int tracing_buffers_open(struct inode *inode, struct file *filp)
{
5718
	struct trace_array *tr = inode->i_private;
5719
	struct ftrace_buffer_info *info;
5720
	int ret;
5721 5722 5723 5724

	if (tracing_disabled)
		return -ENODEV;

5725 5726 5727
	if (trace_array_get(tr) < 0)
		return -ENODEV;

5728
	info = kzalloc(sizeof(*info), GFP_KERNEL);
5729 5730
	if (!info) {
		trace_array_put(tr);
5731
		return -ENOMEM;
5732
	}
5733

5734 5735
	mutex_lock(&trace_types_lock);

5736
	info->iter.tr		= tr;
5737
	info->iter.cpu_file	= tracing_get_cpu(inode);
5738
	info->iter.trace	= tr->current_trace;
5739
	info->iter.trace_buffer = &tr->trace_buffer;
5740
	info->spare		= NULL;
5741
	/* Force reading ring buffer for first read */
5742
	info->read		= (unsigned int)-1;
5743 5744 5745

	filp->private_data = info;

5746 5747
	tr->current_trace->ref++;

5748 5749
	mutex_unlock(&trace_types_lock);

5750 5751 5752 5753 5754
	ret = nonseekable_open(inode, filp);
	if (ret < 0)
		trace_array_put(tr);

	return ret;
5755 5756
}

5757 5758 5759 5760 5761 5762 5763 5764 5765
static unsigned int
tracing_buffers_poll(struct file *filp, poll_table *poll_table)
{
	struct ftrace_buffer_info *info = filp->private_data;
	struct trace_iterator *iter = &info->iter;

	return trace_poll(iter, filp, poll_table);
}

5766 5767 5768 5769 5770
static ssize_t
tracing_buffers_read(struct file *filp, char __user *ubuf,
		     size_t count, loff_t *ppos)
{
	struct ftrace_buffer_info *info = filp->private_data;
5771
	struct trace_iterator *iter = &info->iter;
5772
	ssize_t ret;
5773
	ssize_t size;
5774

5775 5776 5777
	if (!count)
		return 0;

5778
#ifdef CONFIG_TRACER_MAX_TRACE
5779 5780
	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
		return -EBUSY;
5781 5782
#endif

5783
	if (!info->spare)
5784 5785
		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
							  iter->cpu_file);
5786
	if (!info->spare)
5787
		return -ENOMEM;
5788

5789 5790 5791 5792
	/* Do we have previous read data to read? */
	if (info->read < PAGE_SIZE)
		goto read;

5793
 again:
5794
	trace_access_lock(iter->cpu_file);
5795
	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5796 5797
				    &info->spare,
				    count,
5798 5799
				    iter->cpu_file, 0);
	trace_access_unlock(iter->cpu_file);
5800

5801 5802
	if (ret < 0) {
		if (trace_empty(iter)) {
5803 5804 5805
			if ((filp->f_flags & O_NONBLOCK))
				return -EAGAIN;

5806
			ret = wait_on_pipe(iter, false);
5807 5808 5809
			if (ret)
				return ret;

5810 5811
			goto again;
		}
5812
		return 0;
5813
	}
5814 5815

	info->read = 0;
5816
 read:
5817 5818 5819 5820 5821
	size = PAGE_SIZE - info->read;
	if (size > count)
		size = count;

	ret = copy_to_user(ubuf, info->spare + info->read, size);
5822 5823 5824
	if (ret == size)
		return -EFAULT;

5825 5826
	size -= ret;

5827 5828 5829 5830 5831 5832 5833 5834 5835
	*ppos += size;
	info->read += size;

	return size;
}

static int tracing_buffers_release(struct inode *inode, struct file *file)
{
	struct ftrace_buffer_info *info = file->private_data;
5836
	struct trace_iterator *iter = &info->iter;
5837

5838 5839
	mutex_lock(&trace_types_lock);

5840 5841
	iter->tr->current_trace->ref--;

5842
	__trace_array_put(iter->tr);
5843

5844
	if (info->spare)
5845
		ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5846 5847
	kfree(info);

5848 5849
	mutex_unlock(&trace_types_lock);

5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880
	return 0;
}

struct buffer_ref {
	struct ring_buffer	*buffer;
	void			*page;
	int			ref;
};

static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
				    struct pipe_buffer *buf)
{
	struct buffer_ref *ref = (struct buffer_ref *)buf->private;

	if (--ref->ref)
		return;

	ring_buffer_free_read_page(ref->buffer, ref->page);
	kfree(ref);
	buf->private = 0;
}

static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
				struct pipe_buffer *buf)
{
	struct buffer_ref *ref = (struct buffer_ref *)buf->private;

	ref->ref++;
}

/* Pipe buffer operations for a buffer. */
5881
static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5882 5883 5884
	.can_merge		= 0,
	.confirm		= generic_pipe_buf_confirm,
	.release		= buffer_pipe_buf_release,
5885
	.steal			= generic_pipe_buf_steal,
5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911
	.get			= buffer_pipe_buf_get,
};

/*
 * Callback from splice_to_pipe(), if we need to release some pages
 * at the end of the spd in case we error'ed out in filling the pipe.
 */
static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
	struct buffer_ref *ref =
		(struct buffer_ref *)spd->partial[i].private;

	if (--ref->ref)
		return;

	ring_buffer_free_read_page(ref->buffer, ref->page);
	kfree(ref);
	spd->partial[i].private = 0;
}

static ssize_t
tracing_buffers_splice_read(struct file *file, loff_t *ppos,
			    struct pipe_inode_info *pipe, size_t len,
			    unsigned int flags)
{
	struct ftrace_buffer_info *info = file->private_data;
5912
	struct trace_iterator *iter = &info->iter;
5913 5914
	struct partial_page partial_def[PIPE_DEF_BUFFERS];
	struct page *pages_def[PIPE_DEF_BUFFERS];
5915
	struct splice_pipe_desc spd = {
5916 5917
		.pages		= pages_def,
		.partial	= partial_def,
5918
		.nr_pages_max	= PIPE_DEF_BUFFERS,
5919 5920 5921 5922 5923
		.flags		= flags,
		.ops		= &buffer_pipe_buf_ops,
		.spd_release	= buffer_spd_release,
	};
	struct buffer_ref *ref;
5924
	int entries, size, i;
5925
	ssize_t ret = 0;
5926

5927
#ifdef CONFIG_TRACER_MAX_TRACE
5928 5929
	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
		return -EBUSY;
5930 5931
#endif

5932 5933
	if (splice_grow_spd(pipe, &spd))
		return -ENOMEM;
5934

5935 5936
	if (*ppos & (PAGE_SIZE - 1))
		return -EINVAL;
5937 5938

	if (len & (PAGE_SIZE - 1)) {
5939 5940
		if (len < PAGE_SIZE)
			return -EINVAL;
5941 5942 5943
		len &= PAGE_MASK;
	}

5944 5945
 again:
	trace_access_lock(iter->cpu_file);
5946
	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5947

5948
	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5949 5950 5951 5952
		struct page *page;
		int r;

		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5953 5954
		if (!ref) {
			ret = -ENOMEM;
5955
			break;
5956
		}
5957

5958
		ref->ref = 1;
5959
		ref->buffer = iter->trace_buffer->buffer;
5960
		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5961
		if (!ref->page) {
5962
			ret = -ENOMEM;
5963 5964 5965 5966 5967
			kfree(ref);
			break;
		}

		r = ring_buffer_read_page(ref->buffer, &ref->page,
5968
					  len, iter->cpu_file, 1);
5969
		if (r < 0) {
5970
			ring_buffer_free_read_page(ref->buffer, ref->page);
5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989
			kfree(ref);
			break;
		}

		/*
		 * zero out any left over data, this is going to
		 * user land.
		 */
		size = ring_buffer_page_len(ref->page);
		if (size < PAGE_SIZE)
			memset(ref->page + size, 0, PAGE_SIZE - size);

		page = virt_to_page(ref->page);

		spd.pages[i] = page;
		spd.partial[i].len = PAGE_SIZE;
		spd.partial[i].offset = 0;
		spd.partial[i].private = (unsigned long)ref;
		spd.nr_pages++;
5990
		*ppos += PAGE_SIZE;
5991

5992
		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5993 5994
	}

5995
	trace_access_unlock(iter->cpu_file);
5996 5997 5998 5999
	spd.nr_pages = i;

	/* did we read anything? */
	if (!spd.nr_pages) {
6000
		if (ret)
6001 6002 6003 6004
			return ret;

		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
			return -EAGAIN;
6005

6006
		ret = wait_on_pipe(iter, true);
6007
		if (ret)
6008
			return ret;
6009

6010
		goto again;
6011 6012 6013
	}

	ret = splice_to_pipe(pipe, &spd);
6014
	splice_shrink_spd(&spd);
6015

6016 6017 6018 6019 6020 6021
	return ret;
}

static const struct file_operations tracing_buffers_fops = {
	.open		= tracing_buffers_open,
	.read		= tracing_buffers_read,
6022
	.poll		= tracing_buffers_poll,
6023 6024 6025 6026 6027
	.release	= tracing_buffers_release,
	.splice_read	= tracing_buffers_splice_read,
	.llseek		= no_llseek,
};

6028 6029 6030 6031
static ssize_t
tracing_stats_read(struct file *filp, char __user *ubuf,
		   size_t count, loff_t *ppos)
{
6032 6033
	struct inode *inode = file_inode(filp);
	struct trace_array *tr = inode->i_private;
6034
	struct trace_buffer *trace_buf = &tr->trace_buffer;
6035
	int cpu = tracing_get_cpu(inode);
6036 6037
	struct trace_seq *s;
	unsigned long cnt;
6038 6039
	unsigned long long t;
	unsigned long usec_rem;
6040

6041
	s = kmalloc(sizeof(*s), GFP_KERNEL);
6042
	if (!s)
6043
		return -ENOMEM;
6044 6045 6046

	trace_seq_init(s);

6047
	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6048 6049
	trace_seq_printf(s, "entries: %ld\n", cnt);

6050
	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6051 6052
	trace_seq_printf(s, "overrun: %ld\n", cnt);

6053
	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6054 6055
	trace_seq_printf(s, "commit overrun: %ld\n", cnt);

6056
	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6057 6058
	trace_seq_printf(s, "bytes: %ld\n", cnt);

6059
	if (trace_clocks[tr->clock_id].in_ns) {
6060
		/* local or global for trace_clock */
6061
		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6062 6063 6064 6065
		usec_rem = do_div(t, USEC_PER_SEC);
		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
								t, usec_rem);

6066
		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6067 6068 6069 6070 6071
		usec_rem = do_div(t, USEC_PER_SEC);
		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
	} else {
		/* counter or tsc mode for trace_clock */
		trace_seq_printf(s, "oldest event ts: %llu\n",
6072
				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6073

6074
		trace_seq_printf(s, "now ts: %llu\n",
6075
				ring_buffer_time_stamp(trace_buf->buffer, cpu));
6076
	}
6077

6078
	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6079 6080
	trace_seq_printf(s, "dropped events: %ld\n", cnt);

6081
	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6082 6083
	trace_seq_printf(s, "read events: %ld\n", cnt);

6084 6085
	count = simple_read_from_buffer(ubuf, count, ppos,
					s->buffer, trace_seq_used(s));
6086 6087 6088 6089 6090 6091 6092

	kfree(s);

	return count;
}

static const struct file_operations tracing_stats_fops = {
6093
	.open		= tracing_open_generic_tr,
6094
	.read		= tracing_stats_read,
6095
	.llseek		= generic_file_llseek,
6096
	.release	= tracing_release_generic_tr,
6097 6098
};

6099 6100
#ifdef CONFIG_DYNAMIC_FTRACE

S
Steven Rostedt 已提交
6101 6102 6103 6104 6105
int __weak ftrace_arch_read_dyn_info(char *buf, int size)
{
	return 0;
}

6106
static ssize_t
S
Steven Rostedt 已提交
6107
tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6108 6109
		  size_t cnt, loff_t *ppos)
{
S
Steven Rostedt 已提交
6110 6111
	static char ftrace_dyn_info_buffer[1024];
	static DEFINE_MUTEX(dyn_info_mutex);
6112
	unsigned long *p = filp->private_data;
S
Steven Rostedt 已提交
6113
	char *buf = ftrace_dyn_info_buffer;
S
Steven Rostedt 已提交
6114
	int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6115 6116
	int r;

S
Steven Rostedt 已提交
6117 6118
	mutex_lock(&dyn_info_mutex);
	r = sprintf(buf, "%ld ", *p);
I
Ingo Molnar 已提交
6119

S
Steven Rostedt 已提交
6120
	r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
S
Steven Rostedt 已提交
6121 6122 6123 6124 6125 6126 6127
	buf[r++] = '\n';

	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);

	mutex_unlock(&dyn_info_mutex);

	return r;
6128 6129
}

6130
static const struct file_operations tracing_dyn_info_fops = {
I
Ingo Molnar 已提交
6131
	.open		= tracing_open_generic,
S
Steven Rostedt 已提交
6132
	.read		= tracing_read_dyn_info,
6133
	.llseek		= generic_file_llseek,
6134
};
6135
#endif /* CONFIG_DYNAMIC_FTRACE */
6136

6137 6138 6139 6140 6141 6142
#if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
static void
ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
{
	tracing_snapshot();
}
6143

6144 6145
static void
ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6146
{
6147 6148 6149 6150
	unsigned long *count = (long *)data;

	if (!*count)
		return;
6151

6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165
	if (*count != -1)
		(*count)--;

	tracing_snapshot();
}

static int
ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
		      struct ftrace_probe_ops *ops, void *data)
{
	long count = (long)data;

	seq_printf(m, "%ps:", (void *)ip);

6166
	seq_puts(m, "snapshot");
6167 6168

	if (count == -1)
6169
		seq_puts(m, ":unlimited\n");
6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235
	else
		seq_printf(m, ":count=%ld\n", count);

	return 0;
}

static struct ftrace_probe_ops snapshot_probe_ops = {
	.func			= ftrace_snapshot,
	.print			= ftrace_snapshot_print,
};

static struct ftrace_probe_ops snapshot_count_probe_ops = {
	.func			= ftrace_count_snapshot,
	.print			= ftrace_snapshot_print,
};

static int
ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
			       char *glob, char *cmd, char *param, int enable)
{
	struct ftrace_probe_ops *ops;
	void *count = (void *)-1;
	char *number;
	int ret;

	/* hash funcs only work with set_ftrace_filter */
	if (!enable)
		return -EINVAL;

	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;

	if (glob[0] == '!') {
		unregister_ftrace_function_probe_func(glob+1, ops);
		return 0;
	}

	if (!param)
		goto out_reg;

	number = strsep(&param, ":");

	if (!strlen(number))
		goto out_reg;

	/*
	 * We use the callback data field (which is a pointer)
	 * as our counter.
	 */
	ret = kstrtoul(number, 0, (unsigned long *)&count);
	if (ret)
		return ret;

 out_reg:
	ret = register_ftrace_function_probe(glob, ops, count);

	if (ret >= 0)
		alloc_snapshot(&global_trace);

	return ret < 0 ? ret : 0;
}

static struct ftrace_func_command ftrace_snapshot_cmd = {
	.name			= "snapshot",
	.func			= ftrace_trace_snapshot_callback,
};

6236
static __init int register_snapshot_cmd(void)
6237 6238 6239 6240
{
	return register_ftrace_command(&ftrace_snapshot_cmd);
}
#else
6241
static inline __init int register_snapshot_cmd(void) { return 0; }
6242
#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6243

6244
static struct dentry *tracing_get_dentry(struct trace_array *tr)
6245
{
6246 6247 6248 6249 6250 6251 6252 6253
	if (WARN_ON(!tr->dir))
		return ERR_PTR(-ENODEV);

	/* Top directory uses NULL as the parent */
	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
		return NULL;

	/* All sub buffers have a descriptor */
6254
	return tr->dir;
6255 6256
}

6257
static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6258 6259 6260
{
	struct dentry *d_tracer;

6261 6262
	if (tr->percpu_dir)
		return tr->percpu_dir;
6263

6264
	d_tracer = tracing_get_dentry(tr);
6265
	if (IS_ERR(d_tracer))
6266 6267
		return NULL;

6268
	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6269

6270
	WARN_ONCE(!tr->percpu_dir,
6271
		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6272

6273
	return tr->percpu_dir;
6274 6275
}

6276 6277 6278 6279 6280 6281 6282
static struct dentry *
trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
		      void *data, long cpu, const struct file_operations *fops)
{
	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);

	if (ret) /* See tracing_get_cpu() */
6283
		d_inode(ret)->i_cdev = (void *)(cpu + 1);
6284 6285 6286
	return ret;
}

6287
static void
6288
tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6289
{
6290
	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6291
	struct dentry *d_cpu;
6292
	char cpu_dir[30]; /* 30 characters should be more than enough */
6293

6294 6295 6296
	if (!d_percpu)
		return;

6297
	snprintf(cpu_dir, 30, "cpu%ld", cpu);
6298
	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6299
	if (!d_cpu) {
6300
		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6301 6302
		return;
	}
6303

6304
	/* per cpu trace_pipe */
6305
	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6306
				tr, cpu, &tracing_pipe_fops);
6307 6308

	/* per cpu trace */
6309
	trace_create_cpu_file("trace", 0644, d_cpu,
6310
				tr, cpu, &tracing_fops);
6311

6312
	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6313
				tr, cpu, &tracing_buffers_fops);
6314

6315
	trace_create_cpu_file("stats", 0444, d_cpu,
6316
				tr, cpu, &tracing_stats_fops);
6317

6318
	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6319
				tr, cpu, &tracing_entries_fops);
6320 6321

#ifdef CONFIG_TRACER_SNAPSHOT
6322
	trace_create_cpu_file("snapshot", 0644, d_cpu,
6323
				tr, cpu, &snapshot_fops);
6324

6325
	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6326
				tr, cpu, &snapshot_raw_fops);
6327
#endif
6328 6329
}

S
Steven Rostedt 已提交
6330 6331 6332 6333 6334
#ifdef CONFIG_FTRACE_SELFTEST
/* Let selftest have access to static functions in this file */
#include "trace_selftest.c"
#endif

6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357
static ssize_t
trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
			loff_t *ppos)
{
	struct trace_option_dentry *topt = filp->private_data;
	char *buf;

	if (topt->flags->val & topt->opt->bit)
		buf = "1\n";
	else
		buf = "0\n";

	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
}

static ssize_t
trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
			 loff_t *ppos)
{
	struct trace_option_dentry *topt = filp->private_data;
	unsigned long val;
	int ret;

6358 6359
	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
	if (ret)
6360 6361
		return ret;

L
Li Zefan 已提交
6362 6363
	if (val != 0 && val != 1)
		return -EINVAL;
6364

L
Li Zefan 已提交
6365
	if (!!(topt->flags->val & topt->opt->bit) != val) {
6366
		mutex_lock(&trace_types_lock);
6367
		ret = __set_tracer_option(topt->tr, topt->flags,
6368
					  topt->opt, !val);
6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383
		mutex_unlock(&trace_types_lock);
		if (ret)
			return ret;
	}

	*ppos += cnt;

	return cnt;
}


static const struct file_operations trace_options_fops = {
	.open = tracing_open_generic,
	.read = trace_options_read,
	.write = trace_options_write,
6384
	.llseek	= generic_file_llseek,
6385 6386
};

6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419
/*
 * In order to pass in both the trace_array descriptor as well as the index
 * to the flag that the trace option file represents, the trace_array
 * has a character array of trace_flags_index[], which holds the index
 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
 * The address of this character array is passed to the flag option file
 * read/write callbacks.
 *
 * In order to extract both the index and the trace_array descriptor,
 * get_tr_index() uses the following algorithm.
 *
 *   idx = *ptr;
 *
 * As the pointer itself contains the address of the index (remember
 * index[1] == 1).
 *
 * Then to get the trace_array descriptor, by subtracting that index
 * from the ptr, we get to the start of the index itself.
 *
 *   ptr - idx == &index[0]
 *
 * Then a simple container_of() from that pointer gets us to the
 * trace_array descriptor.
 */
static void get_tr_index(void *data, struct trace_array **ptr,
			 unsigned int *pindex)
{
	*pindex = *(unsigned char *)data;

	*ptr = container_of(data - *pindex, struct trace_array,
			    trace_flags_index);
}

6420 6421 6422 6423
static ssize_t
trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
			loff_t *ppos)
{
6424 6425 6426
	void *tr_index = filp->private_data;
	struct trace_array *tr;
	unsigned int index;
6427 6428
	char *buf;

6429 6430 6431
	get_tr_index(tr_index, &tr, &index);

	if (tr->trace_flags & (1 << index))
6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442
		buf = "1\n";
	else
		buf = "0\n";

	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
}

static ssize_t
trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
			 loff_t *ppos)
{
6443 6444 6445
	void *tr_index = filp->private_data;
	struct trace_array *tr;
	unsigned int index;
6446 6447 6448
	unsigned long val;
	int ret;

6449 6450
	get_tr_index(tr_index, &tr, &index);

6451 6452
	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
	if (ret)
6453 6454
		return ret;

6455
	if (val != 0 && val != 1)
6456
		return -EINVAL;
6457 6458

	mutex_lock(&trace_types_lock);
6459
	ret = set_tracer_flag(tr, 1 << index, val);
6460
	mutex_unlock(&trace_types_lock);
6461

6462 6463 6464
	if (ret < 0)
		return ret;

6465 6466 6467 6468 6469 6470 6471 6472 6473
	*ppos += cnt;

	return cnt;
}

static const struct file_operations trace_options_core_fops = {
	.open = tracing_open_generic,
	.read = trace_options_core_read,
	.write = trace_options_core_write,
6474
	.llseek = generic_file_llseek,
6475 6476
};

6477
struct dentry *trace_create_file(const char *name,
A
Al Viro 已提交
6478
				 umode_t mode,
6479 6480 6481 6482 6483 6484
				 struct dentry *parent,
				 void *data,
				 const struct file_operations *fops)
{
	struct dentry *ret;

6485
	ret = tracefs_create_file(name, mode, parent, data, fops);
6486
	if (!ret)
6487
		pr_warn("Could not create tracefs '%s' entry\n", name);
6488 6489 6490 6491 6492

	return ret;
}


6493
static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6494 6495 6496
{
	struct dentry *d_tracer;

6497 6498
	if (tr->options)
		return tr->options;
6499

6500
	d_tracer = tracing_get_dentry(tr);
6501
	if (IS_ERR(d_tracer))
6502 6503
		return NULL;

6504
	tr->options = tracefs_create_dir("options", d_tracer);
6505
	if (!tr->options) {
6506
		pr_warn("Could not create tracefs directory 'options'\n");
6507 6508 6509
		return NULL;
	}

6510
	return tr->options;
6511 6512
}

6513
static void
6514 6515
create_trace_option_file(struct trace_array *tr,
			 struct trace_option_dentry *topt,
6516 6517 6518 6519 6520
			 struct tracer_flags *flags,
			 struct tracer_opt *opt)
{
	struct dentry *t_options;

6521
	t_options = trace_options_init_dentry(tr);
6522 6523 6524 6525 6526
	if (!t_options)
		return;

	topt->flags = flags;
	topt->opt = opt;
6527
	topt->tr = tr;
6528

6529
	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6530 6531 6532 6533
				    &trace_options_fops);

}

6534
static void
6535
create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6536 6537
{
	struct trace_option_dentry *topts;
6538
	struct trace_options *tr_topts;
6539 6540 6541
	struct tracer_flags *flags;
	struct tracer_opt *opts;
	int cnt;
6542
	int i;
6543 6544

	if (!tracer)
6545
		return;
6546 6547 6548 6549

	flags = tracer->flags;

	if (!flags || !flags->opts)
6550 6551 6552 6553 6554 6555 6556 6557 6558 6559
		return;

	/*
	 * If this is an instance, only create flags for tracers
	 * the instance may have.
	 */
	if (!trace_ok_for_array(tracer, tr))
		return;

	for (i = 0; i < tr->nr_topts; i++) {
6560 6561
		/* Make sure there's no duplicate flags. */
		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6562 6563
			return;
	}
6564 6565 6566 6567 6568 6569

	opts = flags->opts;

	for (cnt = 0; opts[cnt].name; cnt++)
		;

6570
	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6571
	if (!topts)
6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584
		return;

	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
			    GFP_KERNEL);
	if (!tr_topts) {
		kfree(topts);
		return;
	}

	tr->topts = tr_topts;
	tr->topts[tr->nr_topts].tracer = tracer;
	tr->topts[tr->nr_topts].topts = topts;
	tr->nr_topts++;
6585

6586
	for (cnt = 0; opts[cnt].name; cnt++) {
6587
		create_trace_option_file(tr, &topts[cnt], flags,
6588
					 &opts[cnt]);
6589 6590 6591 6592
		WARN_ONCE(topts[cnt].entry == NULL,
			  "Failed to create trace option: %s",
			  opts[cnt].name);
	}
6593 6594
}

6595
static struct dentry *
6596 6597
create_trace_option_core_file(struct trace_array *tr,
			      const char *option, long index)
6598 6599 6600
{
	struct dentry *t_options;

6601
	t_options = trace_options_init_dentry(tr);
6602 6603 6604
	if (!t_options)
		return NULL;

6605 6606 6607
	return trace_create_file(option, 0644, t_options,
				 (void *)&tr->trace_flags_index[index],
				 &trace_options_core_fops);
6608 6609
}

6610
static void create_trace_options_dir(struct trace_array *tr)
6611 6612
{
	struct dentry *t_options;
6613
	bool top_level = tr == &global_trace;
6614 6615
	int i;

6616
	t_options = trace_options_init_dentry(tr);
6617 6618 6619
	if (!t_options)
		return;

6620 6621 6622 6623 6624
	for (i = 0; trace_options[i]; i++) {
		if (top_level ||
		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
			create_trace_option_core_file(tr, trace_options[i], i);
	}
6625 6626
}

6627 6628 6629 6630
static ssize_t
rb_simple_read(struct file *filp, char __user *ubuf,
	       size_t cnt, loff_t *ppos)
{
6631
	struct trace_array *tr = filp->private_data;
6632 6633 6634
	char buf[64];
	int r;

6635
	r = tracer_tracing_is_on(tr);
6636 6637 6638 6639 6640 6641 6642 6643 6644
	r = sprintf(buf, "%d\n", r);

	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}

static ssize_t
rb_simple_write(struct file *filp, const char __user *ubuf,
		size_t cnt, loff_t *ppos)
{
6645
	struct trace_array *tr = filp->private_data;
6646
	struct ring_buffer *buffer = tr->trace_buffer.buffer;
6647 6648 6649 6650 6651 6652 6653 6654
	unsigned long val;
	int ret;

	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
	if (ret)
		return ret;

	if (buffer) {
6655 6656
		mutex_lock(&trace_types_lock);
		if (val) {
6657
			tracer_tracing_on(tr);
6658 6659
			if (tr->current_trace->start)
				tr->current_trace->start(tr);
6660
		} else {
6661
			tracer_tracing_off(tr);
6662 6663
			if (tr->current_trace->stop)
				tr->current_trace->stop(tr);
6664 6665
		}
		mutex_unlock(&trace_types_lock);
6666 6667 6668 6669 6670 6671 6672 6673
	}

	(*ppos)++;

	return cnt;
}

static const struct file_operations rb_simple_fops = {
6674
	.open		= tracing_open_generic_tr,
6675 6676
	.read		= rb_simple_read,
	.write		= rb_simple_write,
6677
	.release	= tracing_release_generic_tr,
6678 6679 6680
	.llseek		= default_llseek,
};

6681 6682 6683
struct dentry *trace_instance_dir;

static void
6684
init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6685

6686 6687
static int
allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6688 6689
{
	enum ring_buffer_flags rb_flags;
6690

6691
	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6692

6693 6694
	buf->tr = tr;

6695 6696 6697
	buf->buffer = ring_buffer_alloc(size, rb_flags);
	if (!buf->buffer)
		return -ENOMEM;
6698

6699 6700 6701 6702 6703
	buf->data = alloc_percpu(struct trace_array_cpu);
	if (!buf->data) {
		ring_buffer_free(buf->buffer);
		return -ENOMEM;
	}
6704 6705 6706 6707 6708

	/* Allocate the first page for all buffers */
	set_buffer_entries(&tr->trace_buffer,
			   ring_buffer_size(tr->trace_buffer.buffer, 0));

6709 6710
	return 0;
}
6711

6712 6713 6714
static int allocate_trace_buffers(struct trace_array *tr, int size)
{
	int ret;
6715

6716 6717 6718
	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
	if (ret)
		return ret;
6719

6720 6721 6722 6723
#ifdef CONFIG_TRACER_MAX_TRACE
	ret = allocate_trace_buffer(tr, &tr->max_buffer,
				    allocate_snapshot ? size : 1);
	if (WARN_ON(ret)) {
6724
		ring_buffer_free(tr->trace_buffer.buffer);
6725 6726 6727 6728
		free_percpu(tr->trace_buffer.data);
		return -ENOMEM;
	}
	tr->allocated_snapshot = allocate_snapshot;
6729

6730 6731 6732 6733 6734
	/*
	 * Only the top level trace array gets its snapshot allocated
	 * from the kernel command line.
	 */
	allocate_snapshot = false;
6735
#endif
6736
	return 0;
6737 6738
}

6739 6740 6741 6742 6743 6744 6745 6746 6747 6748
static void free_trace_buffer(struct trace_buffer *buf)
{
	if (buf->buffer) {
		ring_buffer_free(buf->buffer);
		buf->buffer = NULL;
		free_percpu(buf->data);
		buf->data = NULL;
	}
}

6749 6750 6751 6752 6753
static void free_trace_buffers(struct trace_array *tr)
{
	if (!tr)
		return;

6754
	free_trace_buffer(&tr->trace_buffer);
6755 6756

#ifdef CONFIG_TRACER_MAX_TRACE
6757
	free_trace_buffer(&tr->max_buffer);
6758 6759 6760
#endif
}

6761 6762 6763 6764 6765 6766 6767 6768 6769
static void init_trace_flags_index(struct trace_array *tr)
{
	int i;

	/* Used by the trace options files */
	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
		tr->trace_flags_index[i] = i;
}

6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784
static void __update_tracer_options(struct trace_array *tr)
{
	struct tracer *t;

	for (t = trace_types; t; t = t->next)
		add_tracer_options(tr, t);
}

static void update_tracer_options(struct trace_array *tr)
{
	mutex_lock(&trace_types_lock);
	__update_tracer_options(tr);
	mutex_unlock(&trace_types_lock);
}

6785
static int instance_mkdir(const char *name)
6786
{
6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806
	struct trace_array *tr;
	int ret;

	mutex_lock(&trace_types_lock);

	ret = -EEXIST;
	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
		if (tr->name && strcmp(tr->name, name) == 0)
			goto out_unlock;
	}

	ret = -ENOMEM;
	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
	if (!tr)
		goto out_unlock;

	tr->name = kstrdup(name, GFP_KERNEL);
	if (!tr->name)
		goto out_free_tr;

6807 6808 6809
	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
		goto out_free_tr;

6810
	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
6811

6812 6813
	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);

6814 6815
	raw_spin_lock_init(&tr->start_lock);

6816 6817
	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;

6818 6819 6820 6821 6822
	tr->current_trace = &nop_trace;

	INIT_LIST_HEAD(&tr->systems);
	INIT_LIST_HEAD(&tr->events);

6823
	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6824 6825
		goto out_free_tr;

6826
	tr->dir = tracefs_create_dir(name, trace_instance_dir);
6827 6828 6829 6830
	if (!tr->dir)
		goto out_free_tr;

	ret = event_trace_add_tracer(tr->dir, tr);
6831
	if (ret) {
6832
		tracefs_remove_recursive(tr->dir);
6833
		goto out_free_tr;
6834
	}
6835

6836
	init_tracer_tracefs(tr, tr->dir);
6837
	init_trace_flags_index(tr);
6838
	__update_tracer_options(tr);
6839 6840 6841 6842 6843 6844 6845 6846

	list_add(&tr->list, &ftrace_trace_arrays);

	mutex_unlock(&trace_types_lock);

	return 0;

 out_free_tr:
6847
	free_trace_buffers(tr);
6848
	free_cpumask_var(tr->tracing_cpumask);
6849 6850 6851 6852 6853 6854 6855 6856 6857 6858
	kfree(tr->name);
	kfree(tr);

 out_unlock:
	mutex_unlock(&trace_types_lock);

	return ret;

}

6859
static int instance_rmdir(const char *name)
6860 6861 6862 6863
{
	struct trace_array *tr;
	int found = 0;
	int ret;
6864
	int i;
6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877

	mutex_lock(&trace_types_lock);

	ret = -ENODEV;
	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
		if (tr->name && strcmp(tr->name, name) == 0) {
			found = 1;
			break;
		}
	}
	if (!found)
		goto out_unlock;

6878
	ret = -EBUSY;
6879
	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
6880 6881
		goto out_unlock;

6882 6883
	list_del(&tr->list);

6884 6885 6886 6887 6888 6889
	/* Disable all the flags that were enabled coming in */
	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
		if ((1 << i) & ZEROED_TRACE_FLAGS)
			set_tracer_flag(tr, 1 << i, 0);
	}

6890
	tracing_set_nop(tr);
6891
	event_trace_del_tracer(tr);
6892
	ftrace_destroy_function_files(tr);
6893
	tracefs_remove_recursive(tr->dir);
6894
	free_trace_buffers(tr);
6895

6896 6897 6898 6899 6900
	for (i = 0; i < tr->nr_topts; i++) {
		kfree(tr->topts[i].topts);
	}
	kfree(tr->topts);

6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911
	kfree(tr->name);
	kfree(tr);

	ret = 0;

 out_unlock:
	mutex_unlock(&trace_types_lock);

	return ret;
}

6912 6913
static __init void create_trace_instances(struct dentry *d_tracer)
{
6914 6915 6916
	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
							 instance_mkdir,
							 instance_rmdir);
6917 6918 6919 6920
	if (WARN_ON(!trace_instance_dir))
		return;
}

6921
static void
6922
init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
6923
{
6924
	int cpu;
6925

6926 6927 6928 6929 6930 6931
	trace_create_file("available_tracers", 0444, d_tracer,
			tr, &show_traces_fops);

	trace_create_file("current_tracer", 0644, d_tracer,
			tr, &set_tracer_fops);

6932 6933 6934
	trace_create_file("tracing_cpumask", 0644, d_tracer,
			  tr, &tracing_cpumask_fops);

6935 6936 6937 6938
	trace_create_file("trace_options", 0644, d_tracer,
			  tr, &tracing_iter_fops);

	trace_create_file("trace", 0644, d_tracer,
6939
			  tr, &tracing_fops);
6940 6941

	trace_create_file("trace_pipe", 0444, d_tracer,
6942
			  tr, &tracing_pipe_fops);
6943 6944

	trace_create_file("buffer_size_kb", 0644, d_tracer,
6945
			  tr, &tracing_entries_fops);
6946 6947 6948 6949

	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
			  tr, &tracing_total_entries_fops);

6950
	trace_create_file("free_buffer", 0200, d_tracer,
6951 6952 6953 6954 6955 6956 6957 6958 6959
			  tr, &tracing_free_buffer_fops);

	trace_create_file("trace_marker", 0220, d_tracer,
			  tr, &tracing_mark_fops);

	trace_create_file("trace_clock", 0644, d_tracer, tr,
			  &trace_clock_fops);

	trace_create_file("tracing_on", 0644, d_tracer,
6960
			  tr, &rb_simple_fops);
6961

6962 6963
	create_trace_options_dir(tr);

6964 6965 6966 6967 6968
#ifdef CONFIG_TRACER_MAX_TRACE
	trace_create_file("tracing_max_latency", 0644, d_tracer,
			&tr->max_latency, &tracing_max_lat_fops);
#endif

6969 6970 6971
	if (ftrace_create_function_files(tr, d_tracer))
		WARN(1, "Could not allocate function filter files");

6972 6973
#ifdef CONFIG_TRACER_SNAPSHOT
	trace_create_file("snapshot", 0644, d_tracer,
6974
			  tr, &snapshot_fops);
6975
#endif
6976 6977

	for_each_tracing_cpu(cpu)
6978
		tracing_init_tracefs_percpu(tr, cpu);
6979

6980 6981
}

6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003
static struct vfsmount *trace_automount(void *ingore)
{
	struct vfsmount *mnt;
	struct file_system_type *type;

	/*
	 * To maintain backward compatibility for tools that mount
	 * debugfs to get to the tracing facility, tracefs is automatically
	 * mounted to the debugfs/tracing directory.
	 */
	type = get_fs_type("tracefs");
	if (!type)
		return NULL;
	mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
	put_filesystem(type);
	if (IS_ERR(mnt))
		return NULL;
	mntget(mnt);

	return mnt;
}

7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014
/**
 * tracing_init_dentry - initialize top level trace array
 *
 * This is called when creating files or directories in the tracing
 * directory. It is called via fs_initcall() by any of the boot up code
 * and expects to return the dentry of the top level tracing directory.
 */
struct dentry *tracing_init_dentry(void)
{
	struct trace_array *tr = &global_trace;

7015
	/* The top level trace array uses  NULL as parent */
7016
	if (tr->dir)
7017
		return NULL;
7018

7019 7020 7021
	if (WARN_ON(!tracefs_initialized()) ||
		(IS_ENABLED(CONFIG_DEBUG_FS) &&
		 WARN_ON(!debugfs_initialized())))
7022 7023
		return ERR_PTR(-ENODEV);

7024 7025 7026 7027 7028 7029 7030 7031
	/*
	 * As there may still be users that expect the tracing
	 * files to exist in debugfs/tracing, we must automount
	 * the tracefs file system there, so older tools still
	 * work with the newer kerenl.
	 */
	tr->dir = debugfs_create_automount("tracing", NULL,
					   trace_automount, NULL);
7032 7033 7034 7035 7036
	if (!tr->dir) {
		pr_warn_once("Could not create debugfs directory 'tracing'\n");
		return ERR_PTR(-ENOMEM);
	}

7037
	return NULL;
7038 7039
}

7040 7041 7042 7043 7044
extern struct trace_enum_map *__start_ftrace_enum_maps[];
extern struct trace_enum_map *__stop_ftrace_enum_maps[];

static void __init trace_enum_init(void)
{
7045 7046 7047
	int len;

	len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7048
	trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063
}

#ifdef CONFIG_MODULES
static void trace_module_add_enums(struct module *mod)
{
	if (!mod->num_trace_enums)
		return;

	/*
	 * Modules with bad taint do not have events created, do
	 * not bother with enums either.
	 */
	if (trace_module_has_bad_taint(mod))
		return;

7064
	trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7065 7066
}

7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098
#ifdef CONFIG_TRACE_ENUM_MAP_FILE
static void trace_module_remove_enums(struct module *mod)
{
	union trace_enum_map_item *map;
	union trace_enum_map_item **last = &trace_enum_maps;

	if (!mod->num_trace_enums)
		return;

	mutex_lock(&trace_enum_mutex);

	map = trace_enum_maps;

	while (map) {
		if (map->head.mod == mod)
			break;
		map = trace_enum_jmp_to_tail(map);
		last = &map->tail.next;
		map = map->tail.next;
	}
	if (!map)
		goto out;

	*last = trace_enum_jmp_to_tail(map)->tail.next;
	kfree(map);
 out:
	mutex_unlock(&trace_enum_mutex);
}
#else
static inline void trace_module_remove_enums(struct module *mod) { }
#endif /* CONFIG_TRACE_ENUM_MAP_FILE */

7099 7100 7101 7102 7103 7104 7105 7106 7107
static int trace_module_notify(struct notifier_block *self,
			       unsigned long val, void *data)
{
	struct module *mod = data;

	switch (val) {
	case MODULE_STATE_COMING:
		trace_module_add_enums(mod);
		break;
7108 7109 7110
	case MODULE_STATE_GOING:
		trace_module_remove_enums(mod);
		break;
7111 7112 7113
	}

	return 0;
7114 7115
}

7116 7117 7118 7119
static struct notifier_block trace_module_nb = {
	.notifier_call = trace_module_notify,
	.priority = 0,
};
7120
#endif /* CONFIG_MODULES */
7121

7122
static __init int tracer_init_tracefs(void)
7123 7124 7125
{
	struct dentry *d_tracer;

7126 7127
	trace_access_lock_init();

7128
	d_tracer = tracing_init_dentry();
7129
	if (IS_ERR(d_tracer))
7130
		return 0;
7131

7132
	init_tracer_tracefs(&global_trace, d_tracer);
7133

7134
	trace_create_file("tracing_thresh", 0644, d_tracer,
7135
			&global_trace, &tracing_thresh_fops);
7136

7137
	trace_create_file("README", 0444, d_tracer,
7138 7139
			NULL, &tracing_readme_fops);

7140 7141
	trace_create_file("saved_cmdlines", 0444, d_tracer,
			NULL, &tracing_saved_cmdlines_fops);
7142

7143 7144 7145
	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
			  NULL, &tracing_saved_cmdlines_size_fops);

7146 7147
	trace_enum_init();

7148 7149
	trace_create_enum_file(d_tracer);

7150 7151 7152 7153
#ifdef CONFIG_MODULES
	register_module_notifier(&trace_module_nb);
#endif

7154
#ifdef CONFIG_DYNAMIC_FTRACE
7155 7156
	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7157
#endif
7158

7159
	create_trace_instances(d_tracer);
7160

7161
	update_tracer_options(&global_trace);
7162

7163
	return 0;
7164 7165
}

7166 7167 7168
static int trace_panic_handler(struct notifier_block *this,
			       unsigned long event, void *unused)
{
7169
	if (ftrace_dump_on_oops)
7170
		ftrace_dump(ftrace_dump_on_oops);
7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185
	return NOTIFY_OK;
}

static struct notifier_block trace_panic_notifier = {
	.notifier_call  = trace_panic_handler,
	.next           = NULL,
	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
};

static int trace_die_handler(struct notifier_block *self,
			     unsigned long val,
			     void *data)
{
	switch (val) {
	case DIE_OOPS:
7186
		if (ftrace_dump_on_oops)
7187
			ftrace_dump(ftrace_dump_on_oops);
7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210
		break;
	default:
		break;
	}
	return NOTIFY_OK;
}

static struct notifier_block trace_die_notifier = {
	.notifier_call = trace_die_handler,
	.priority = 200
};

/*
 * printk is set to max of 1024, we really don't need it that big.
 * Nothing should be printing 1000 characters anyway.
 */
#define TRACE_MAX_PRINT		1000

/*
 * Define here KERN_TRACE so that we have one place to modify
 * it if we decide to change what log level the ftrace dump
 * should be at.
 */
7211
#define KERN_TRACE		KERN_EMERG
7212

7213
void
7214 7215 7216
trace_printk_seq(struct trace_seq *s)
{
	/* Probably should print a warning here. */
7217 7218
	if (s->seq.len >= TRACE_MAX_PRINT)
		s->seq.len = TRACE_MAX_PRINT;
7219

7220 7221 7222 7223 7224 7225 7226
	/*
	 * More paranoid code. Although the buffer size is set to
	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
	 * an extra layer of protection.
	 */
	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
		s->seq.len = s->seq.size - 1;
7227 7228

	/* should be zero ended, but we are paranoid. */
7229
	s->buffer[s->seq.len] = 0;
7230 7231 7232

	printk(KERN_TRACE "%s", s->buffer);

7233
	trace_seq_init(s);
7234 7235
}

7236 7237 7238
void trace_init_global_iter(struct trace_iterator *iter)
{
	iter->tr = &global_trace;
7239
	iter->trace = iter->tr->current_trace;
7240
	iter->cpu_file = RING_BUFFER_ALL_CPUS;
7241
	iter->trace_buffer = &global_trace.trace_buffer;
7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252

	if (iter->trace && iter->trace->open)
		iter->trace->open(iter);

	/* Annotate start of buffers if we had overruns */
	if (ring_buffer_overruns(iter->trace_buffer->buffer))
		iter->iter_flags |= TRACE_FILE_ANNOTATE;

	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
	if (trace_clocks[iter->tr->clock_id].in_ns)
		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7253 7254
}

7255
void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7256 7257 7258
{
	/* use static because iter can be a bit big for the stack */
	static struct trace_iterator iter;
7259
	static atomic_t dump_running;
7260
	struct trace_array *tr = &global_trace;
7261
	unsigned int old_userobj;
7262 7263
	unsigned long flags;
	int cnt = 0, cpu;
7264

7265 7266 7267 7268 7269
	/* Only allow one dump user at a time. */
	if (atomic_inc_return(&dump_running) != 1) {
		atomic_dec(&dump_running);
		return;
	}
7270

7271 7272 7273 7274 7275 7276 7277 7278
	/*
	 * Always turn off tracing when we dump.
	 * We don't need to show trace output of what happens
	 * between multiple crashes.
	 *
	 * If the user does a sysrq-z, then they can re-enable
	 * tracing with echo 1 > tracing_on.
	 */
7279
	tracing_off();
7280

7281
	local_irq_save(flags);
7282

7283
	/* Simulate the iterator */
7284 7285
	trace_init_global_iter(&iter);

7286
	for_each_tracing_cpu(cpu) {
7287
		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7288 7289
	}

7290
	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7291

7292
	/* don't look at user memory in panic mode */
7293
	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7294

7295 7296
	switch (oops_dump_mode) {
	case DUMP_ALL:
7297
		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7298 7299 7300 7301 7302 7303 7304 7305
		break;
	case DUMP_ORIG:
		iter.cpu_file = raw_smp_processor_id();
		break;
	case DUMP_NONE:
		goto out_enable;
	default:
		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7306
		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7307 7308 7309
	}

	printk(KERN_TRACE "Dumping ftrace buffer:\n");
7310

7311 7312 7313 7314 7315 7316
	/* Did function tracer already get disabled? */
	if (ftrace_is_dead()) {
		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
	}

7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337
	/*
	 * We need to stop all tracing on all CPUS to read the
	 * the next buffer. This is a bit expensive, but is
	 * not done often. We fill all what we can read,
	 * and then release the locks again.
	 */

	while (!trace_empty(&iter)) {

		if (!cnt)
			printk(KERN_TRACE "---------------------------------\n");

		cnt++;

		/* reset all but tr, trace, and overruns */
		memset(&iter.seq, 0,
		       sizeof(struct trace_iterator) -
		       offsetof(struct trace_iterator, seq));
		iter.iter_flags |= TRACE_FILE_LAT_FMT;
		iter.pos = -1;

7338
		if (trace_find_next_entry_inc(&iter) != NULL) {
7339 7340 7341 7342 7343
			int ret;

			ret = print_trace_line(&iter);
			if (ret != TRACE_TYPE_NO_CONSUME)
				trace_consume(&iter);
7344
		}
7345
		touch_nmi_watchdog();
7346 7347 7348 7349 7350 7351 7352 7353 7354

		trace_printk_seq(&iter.seq);
	}

	if (!cnt)
		printk(KERN_TRACE "   (ftrace buffer empty)\n");
	else
		printk(KERN_TRACE "---------------------------------\n");

7355
 out_enable:
7356
	tr->trace_flags |= old_userobj;
7357

7358 7359
	for_each_tracing_cpu(cpu) {
		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7360
	}
7361
 	atomic_dec(&dump_running);
7362
	local_irq_restore(flags);
7363
}
7364
EXPORT_SYMBOL_GPL(ftrace_dump);
7365

7366
__init static int tracer_alloc_buffers(void)
7367
{
7368
	int ring_buf_size;
7369
	int ret = -ENOMEM;
7370

7371 7372 7373 7374
	/*
	 * Make sure we don't accidently add more trace options
	 * than we have bits for.
	 */
7375
	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7376

7377 7378 7379
	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
		goto out;

7380
	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7381
		goto out_free_buffer_mask;
7382

7383 7384
	/* Only allocate trace_printk buffers if a trace_printk exists */
	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7385
		/* Must be called before global_trace.buffer is allocated */
7386 7387
		trace_printk_init_buffers();

7388 7389 7390 7391 7392 7393
	/* To save memory, keep the ring buffer size to its minimum */
	if (ring_buffer_expanded)
		ring_buf_size = trace_buf_size;
	else
		ring_buf_size = 1;

7394
	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7395
	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7396

7397 7398
	raw_spin_lock_init(&global_trace.start_lock);

7399 7400 7401 7402 7403
	/* Used for event triggers */
	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
	if (!temp_buffer)
		goto out_free_cpumask;

7404 7405 7406
	if (trace_create_savedcmd() < 0)
		goto out_free_temp_buffer;

7407
	/* TODO: make the number of buffers hot pluggable with CPUS */
7408
	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7409 7410
		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
		WARN_ON(1);
7411
		goto out_free_savedcmd;
7412
	}
7413

7414 7415
	if (global_trace.buffer_disabled)
		tracing_off();
7416

7417 7418 7419
	if (trace_boot_clock) {
		ret = tracing_set_clock(&global_trace, trace_boot_clock);
		if (ret < 0)
7420 7421
			pr_warn("Trace clock %s not defined, going back to default\n",
				trace_boot_clock);
7422 7423
	}

7424 7425 7426 7427 7428
	/*
	 * register_tracer() might reference current_trace, so it
	 * needs to be set before we register anything. This is
	 * just a bootstrap of current_trace anyway.
	 */
7429 7430
	global_trace.current_trace = &nop_trace;

7431 7432
	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;

7433 7434
	ftrace_init_global_array_ops(&global_trace);

7435 7436
	init_trace_flags_index(&global_trace);

7437 7438
	register_tracer(&nop_trace);

S
Steven Rostedt 已提交
7439 7440
	/* All seems OK, enable tracing */
	tracing_disabled = 0;
7441

7442 7443 7444 7445
	atomic_notifier_chain_register(&panic_notifier_list,
				       &trace_panic_notifier);

	register_die_notifier(&trace_die_notifier);
7446

7447 7448 7449 7450 7451 7452
	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;

	INIT_LIST_HEAD(&global_trace.systems);
	INIT_LIST_HEAD(&global_trace.events);
	list_add(&global_trace.list, &ftrace_trace_arrays);

7453
	apply_trace_boot_options();
7454

7455 7456
	register_snapshot_cmd();

7457
	return 0;
7458

7459 7460
out_free_savedcmd:
	free_saved_cmdlines_buffer(savedcmd);
7461 7462
out_free_temp_buffer:
	ring_buffer_free(temp_buffer);
7463
out_free_cpumask:
7464
	free_cpumask_var(global_trace.tracing_cpumask);
7465 7466 7467 7468
out_free_buffer_mask:
	free_cpumask_var(tracing_buffer_mask);
out:
	return ret;
7469
}
7470

7471 7472
void __init trace_init(void)
{
7473 7474 7475 7476 7477 7478
	if (tracepoint_printk) {
		tracepoint_print_iter =
			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
		if (WARN_ON(!tracepoint_print_iter))
			tracepoint_printk = 0;
	}
7479
	tracer_alloc_buffers();
7480
	trace_event_init();
7481 7482
}

7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501
__init static int clear_boot_tracer(void)
{
	/*
	 * The default tracer at boot buffer is an init section.
	 * This function is called in lateinit. If we did not
	 * find the boot tracer, then clear it out, to prevent
	 * later registration from accessing the buffer that is
	 * about to be freed.
	 */
	if (!default_bootup_tracer)
		return 0;

	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
	       default_bootup_tracer);
	default_bootup_tracer = NULL;

	return 0;
}

7502
fs_initcall(tracer_init_tracefs);
7503
late_initcall(clear_boot_tracer);