提交 b3806b43 编写于 作者: S Steven Rostedt 提交者: Thomas Gleixner

ftrace: user run time file reading

This patch creates a file called trace_pipe in the tracing
debug directory. This file is a consumer of the trace buffers.
This means that reads of this file consumes the entries from
the trace buffers so that they will not be read a second time,
as contrast to the static buffers latency_trace and trace.

Reading from the trace_pipe will remove the entries from trace
and latency_trace too.

The advantage that trace_pipe has is that it can record live
traces. It will block when there is nothing in the buffer,
and read the entries as they are entered.  An EOF happens when
tracing is disabled (tracing_enabled = 0).
Signed-off-by: NSteven Rostedt <srostedt@redhat.com>
Signed-off-by: NIngo Molnar <mingo@elte.hu>
Signed-off-by: NThomas Gleixner <tglx@linutronix.de>
上级 214023c3
......@@ -174,15 +174,20 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
{
int len = (PAGE_SIZE - 1) - s->len;
va_list ap;
int ret;
if (!len)
return 0;
va_start(ap, fmt);
len = vsnprintf(s->buffer + s->len, len, fmt, ap);
ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
va_end(ap);
s->len += len;
/* If we can't write it all, don't bother writing anything */
if (ret > len)
return 0;
s->len += ret;
return len;
}
......@@ -193,7 +198,7 @@ trace_seq_puts(struct trace_seq *s, const char *str)
int len = strlen(str);
if (len > ((PAGE_SIZE - 1) - s->len))
len = (PAGE_SIZE - 1) - s->len;
return 0;
memcpy(s->buffer + s->len, str, len);
s->len += len;
......@@ -615,11 +620,13 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
{
struct trace_entry *entry;
spin_lock(&data->lock);
entry = tracing_get_trace_entry(tr, data);
tracing_generic_entry_update(entry, flags);
entry->type = TRACE_FN;
entry->fn.ip = ip;
entry->fn.parent_ip = parent_ip;
spin_unlock(&data->lock);
}
notrace void
......@@ -630,6 +637,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
{
struct trace_entry *entry;
spin_lock(&data->lock);
entry = tracing_get_trace_entry(tr, data);
tracing_generic_entry_update(entry, flags);
entry->type = TRACE_CTX;
......@@ -638,6 +646,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
entry->ctx.prev_state = prev->state;
entry->ctx.next_pid = next->pid;
entry->ctx.next_prio = next->prio;
spin_unlock(&data->lock);
}
enum trace_file_type {
......@@ -652,7 +661,9 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
struct trace_entry *array;
if (iter->next_idx[cpu] >= tr->entries ||
iter->next_idx[cpu] >= data->trace_idx)
iter->next_idx[cpu] >= data->trace_idx ||
(data->trace_head == data->trace_tail &&
data->trace_head_idx == data->trace_tail_idx))
return NULL;
if (!iter->next_page[cpu]) {
......@@ -702,33 +713,57 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu)
return next;
}
static void *find_next_entry_inc(struct trace_iterator *iter)
static notrace void
trace_iterator_increment(struct trace_iterator *iter)
{
struct trace_entry *next;
int next_cpu = -1;
iter->idx++;
iter->next_idx[iter->cpu]++;
iter->next_page_idx[iter->cpu]++;
if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
struct trace_array_cpu *data = iter->tr->data[iter->cpu];
iter->next_page_idx[iter->cpu] = 0;
iter->next_page[iter->cpu] =
trace_next_list(data, iter->next_page[iter->cpu]);
}
}
next = find_next_entry(iter, &next_cpu);
static notrace void
trace_consume(struct trace_iterator *iter)
{
struct trace_array_cpu *data = iter->tr->data[iter->cpu];
if (next) {
iter->idx++;
iter->next_idx[next_cpu]++;
iter->next_page_idx[next_cpu]++;
data->trace_tail_idx++;
if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
data->trace_tail = trace_next_page(data, data->trace_tail);
data->trace_tail_idx = 0;
}
if (iter->next_page_idx[next_cpu] >= ENTRIES_PER_PAGE) {
struct trace_array_cpu *data = iter->tr->data[next_cpu];
/* Check if we empty it, then reset the index */
if (data->trace_head == data->trace_tail &&
data->trace_head_idx == data->trace_tail_idx)
data->trace_idx = 0;
iter->next_page_idx[next_cpu] = 0;
iter->next_page[next_cpu] =
trace_next_list(data, iter->next_page[next_cpu]);
trace_iterator_increment(iter);
}
static notrace void *
find_next_entry_inc(struct trace_iterator *iter)
{
struct trace_entry *next;
int next_cpu = -1;
next = find_next_entry(iter, &next_cpu);
}
}
iter->prev_ent = iter->ent;
iter->prev_cpu = iter->cpu;
iter->ent = next;
iter->cpu = next_cpu;
if (next)
trace_iterator_increment(iter);
return next ? iter : NULL;
}
......@@ -815,7 +850,7 @@ static void s_stop(struct seq_file *m, void *p)
mutex_unlock(&trace_types_lock);
}
static void
static int
seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
{
#ifdef CONFIG_KALLSYMS
......@@ -823,11 +858,12 @@ seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
kallsyms_lookup(address, NULL, NULL, NULL, str);
trace_seq_printf(s, fmt, str);
return trace_seq_printf(s, fmt, str);
#endif
return 1;
}
static void
static int
seq_print_sym_offset(struct trace_seq *s, const char *fmt,
unsigned long address)
{
......@@ -835,8 +871,9 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt,
char str[KSYM_SYMBOL_LEN];
sprint_symbol(str, address);
trace_seq_printf(s, fmt, str);
return trace_seq_printf(s, fmt, str);
#endif
return 1;
}
#ifndef CONFIG_64BIT
......@@ -845,21 +882,25 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt,
# define IP_FMT "%016lx"
#endif
static notrace void
static notrace int
seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
{
if (!ip) {
trace_seq_printf(s, "0");
return;
}
int ret;
if (!ip)
return trace_seq_printf(s, "0");
if (sym_flags & TRACE_ITER_SYM_OFFSET)
seq_print_sym_offset(s, "%s", ip);
ret = seq_print_sym_offset(s, "%s", ip);
else
seq_print_sym_short(s, "%s", ip);
ret = seq_print_sym_short(s, "%s", ip);
if (!ret)
return 0;
if (sym_flags & TRACE_ITER_SYM_ADDR)
trace_seq_printf(s, " <" IP_FMT ">", ip);
ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
return ret;
}
static notrace void print_lat_help_header(struct seq_file *m)
......@@ -1089,7 +1130,7 @@ static notrace void sync_time_offset(struct trace_iterator *iter)
array->time_offset += prev_t - t;
}
static notrace void
static notrace int
print_trace_fmt(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
......@@ -1100,6 +1141,7 @@ print_trace_fmt(struct trace_iterator *iter)
unsigned long secs;
char *comm;
int S;
int ret;
sync_time_offset(iter);
entry = iter->ent;
......@@ -1110,31 +1152,49 @@ print_trace_fmt(struct trace_iterator *iter)
usec_rem = do_div(t, 1000000ULL);
secs = (unsigned long)t;
trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
trace_seq_printf(s, "[%02d] ", iter->cpu);
trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
if (!ret)
return 0;
ret = trace_seq_printf(s, "[%02d] ", iter->cpu);
if (!ret)
return 0;
ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
if (!ret)
return 0;
switch (entry->type) {
case TRACE_FN:
seq_print_ip_sym(s, entry->fn.ip, sym_flags);
ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags);
if (!ret)
return 0;
if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
entry->fn.parent_ip) {
trace_seq_printf(s, " <-");
seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags);
ret = trace_seq_printf(s, " <-");
if (!ret)
return 0;
ret = seq_print_ip_sym(s, entry->fn.parent_ip,
sym_flags);
if (!ret)
return 0;
}
trace_seq_printf(s, "\n");
ret = trace_seq_printf(s, "\n");
if (!ret)
return 0;
break;
case TRACE_CTX:
S = entry->ctx.prev_state < sizeof(state_to_char) ?
state_to_char[entry->ctx.prev_state] : 'X';
trace_seq_printf(s, " %d:%d:%c ==> %d:%d\n",
ret = trace_seq_printf(s, " %d:%d:%c ==> %d:%d\n",
entry->ctx.prev_pid,
entry->ctx.prev_prio,
S,
entry->ctx.next_pid,
entry->ctx.next_prio);
if (!ret)
return 0;
break;
}
return 1;
}
static int trace_empty(struct trace_iterator *iter)
......@@ -1145,7 +1205,9 @@ static int trace_empty(struct trace_iterator *iter)
for_each_possible_cpu(cpu) {
data = iter->tr->data[cpu];
if (head_page(data) && data->trace_idx)
if (head_page(data) && data->trace_idx &&
(data->trace_tail != data->trace_head ||
data->trace_tail_idx != data->trace_head_idx))
return 0;
}
return 1;
......@@ -1645,6 +1707,192 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
return cnt;
}
static atomic_t tracing_reader;
static int tracing_open_pipe(struct inode *inode, struct file *filp)
{
struct trace_iterator *iter;
if (tracing_disabled)
return -ENODEV;
/* We only allow for reader of the pipe */
if (atomic_inc_return(&tracing_reader) != 1) {
atomic_dec(&tracing_reader);
return -EBUSY;
}
/* create a buffer to store the information to pass to userspace */
iter = kzalloc(sizeof(*iter), GFP_KERNEL);
if (!iter)
return -ENOMEM;
iter->tr = &global_trace;
filp->private_data = iter;
return 0;
}
static int tracing_release_pipe(struct inode *inode, struct file *file)
{
struct trace_iterator *iter = file->private_data;
kfree(iter);
atomic_dec(&tracing_reader);
return 0;
}
/*
* Consumer reader.
*/
static ssize_t
tracing_read_pipe(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
struct trace_iterator *iter = filp->private_data;
struct trace_array_cpu *data;
static cpumask_t mask;
struct trace_entry *entry;
static int start;
unsigned long flags;
int read = 0;
int cpu;
int len;
int ret;
/* return any leftover data */
if (iter->seq.len > start) {
len = iter->seq.len - start;
if (cnt > len)
cnt = len;
ret = copy_to_user(ubuf, iter->seq.buffer + start, cnt);
if (ret)
cnt = -EFAULT;
start += len;
return cnt;
}
trace_seq_reset(&iter->seq);
start = 0;
while (trace_empty(iter)) {
/*
* This is a make-shift waitqueue. The reason we don't use
* an actual wait queue is because:
* 1) we only ever have one waiter
* 2) the tracing, traces all functions, we don't want
* the overhead of calling wake_up and friends
* (and tracing them too)
* Anyway, this is really very primitive wakeup.
*/
set_current_state(TASK_INTERRUPTIBLE);
iter->tr->waiter = current;
/* sleep for one second, and try again. */
schedule_timeout(HZ);
iter->tr->waiter = NULL;
if (signal_pending(current))
return -EINTR;
/*
* We block until we read something and tracing is disabled.
* We still block if tracing is disabled, but we have never
* read anything. This allows a user to cat this file, and
* then enable tracing. But after we have read something,
* we give an EOF when tracing is again disabled.
*
* iter->pos will be 0 if we haven't read anything.
*/
if (!tracer_enabled && iter->pos)
break;
continue;
}
/* stop when tracing is finished */
if (trace_empty(iter))
return 0;
if (cnt >= PAGE_SIZE)
cnt = PAGE_SIZE - 1;
memset(iter, 0, sizeof(*iter));
iter->tr = &global_trace;
iter->pos = -1;
/*
* We need to stop all tracing on all CPUS to read the
* the next buffer. This is a bit expensive, but is
* not done often. We fill all what we can read,
* and then release the locks again.
*/
cpus_clear(mask);
local_irq_save(flags);
for_each_possible_cpu(cpu) {
data = iter->tr->data[cpu];
if (!head_page(data) || !data->trace_idx)
continue;
atomic_inc(&data->disabled);
spin_lock(&data->lock);
cpu_set(cpu, mask);
}
while ((entry = find_next_entry(iter, &cpu))) {
if (!entry)
break;
iter->ent = entry;
iter->cpu = cpu;
ret = print_trace_fmt(iter);
if (!ret)
break;
trace_consume(iter);
if (iter->seq.len >= cnt)
break;
}
for_each_possible_cpu(cpu) {
data = iter->tr->data[cpu];
if (!cpu_isset(cpu, mask))
continue;
spin_unlock(&data->lock);
atomic_dec(&data->disabled);
}
local_irq_restore(flags);
/* Now copy what we have to the user */
read = iter->seq.len;
if (read > cnt)
read = cnt;
ret = copy_to_user(ubuf, iter->seq.buffer, read);
if (read < iter->seq.len)
start = read;
else
trace_seq_reset(&iter->seq);
if (ret)
read = -EFAULT;
return read;
}
static struct file_operations tracing_max_lat_fops = {
.open = tracing_open_generic,
.read = tracing_max_lat_read,
......@@ -1663,6 +1911,12 @@ static struct file_operations set_tracer_fops = {
.write = tracing_set_trace_write,
};
static struct file_operations tracing_pipe_fops = {
.open = tracing_open_pipe,
.read = tracing_read_pipe,
.release = tracing_release_pipe,
};
#ifdef CONFIG_DYNAMIC_FTRACE
static ssize_t
......@@ -1763,6 +2017,11 @@ static __init void tracer_init_debugfs(void)
if (!entry)
pr_warning("Could not create debugfs 'README' entry\n");
entry = debugfs_create_file("trace_pipe", 0644, d_tracer,
NULL, &tracing_pipe_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'tracing_threash' entry\n");
#ifdef CONFIG_DYNAMIC_FTRACE
entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
......@@ -1816,6 +2075,7 @@ static int trace_alloc_page(void)
/* Now that we successfully allocate a page per CPU, add them */
for_each_possible_cpu(i) {
data = global_trace.data[i];
spin_lock_init(&data->lock);
page = list_entry(pages.next, struct page, lru);
list_del_init(&page->lru);
list_add_tail(&page->lru, &data->trace_pages);
......@@ -1823,6 +2083,7 @@ static int trace_alloc_page(void)
#ifdef CONFIG_TRACER_MAX_TRACE
data = max_tr.data[i];
spin_lock_init(&data->lock);
page = list_entry(pages.next, struct page, lru);
list_del_init(&page->lru);
list_add_tail(&page->lru, &data->trace_pages);
......
......@@ -55,6 +55,7 @@ struct trace_entry {
struct trace_array_cpu {
struct list_head trace_pages;
atomic_t disabled;
spinlock_t lock;
cycle_t time_offset;
/* these fields get copied into max-trace: */
......@@ -88,6 +89,7 @@ struct trace_array {
long ctrl;
int cpu;
cycle_t time_start;
struct task_struct *waiter;
struct trace_array_cpu *data[NR_CPUS];
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册