提交 41cbc01f 编写于 作者: L Linus Torvalds

Merge tag 'trace-v3.20' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace

Pull tracing updates from Steven Rostedt:
 "The updates included in this pull request for ftrace are:

   o Several clean ups to the code

     One such clean up was to convert to 64 bit time keeping, in the
     ring buffer benchmark code.

   o Adding of __print_array() helper macro for TRACE_EVENT()

   o Updating the sample/trace_events/ to add samples of different ways
     to make trace events.  Lots of features have been added since the
     sample code was made, and these features are mostly unknown.
     Developers have been making their own hacks to do things that are
     already available.

   o Performance improvements.  Most notably, I found a performance bug
     where a waiter that is waiting for a full page from the ring buffer
     will see that a full page is not available, and go to sleep.  The
     sched event caused by it going to sleep would cause it to wake up
     again.  It would see that there was still not a full page, and go
     back to sleep again, and that would wake it up again, until finally
     it would see a full page.  This change has been marked for stable.

  Other improvements include removing global locks from fast paths"

* tag 'trace-v3.20' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace:
  ring-buffer: Do not wake up a splice waiter when page is not full
  tracing: Fix unmapping loop in tracing_mark_write
  tracing: Add samples of DECLARE_EVENT_CLASS() and DEFINE_EVENT()
  tracing: Add TRACE_EVENT_FN example
  tracing: Add TRACE_EVENT_CONDITION sample
  tracing: Update the TRACE_EVENT fields available in the sample code
  tracing: Separate out initializing top level dir from instances
  tracing: Make tracing_init_dentry_tr() static
  trace: Use 64-bit timekeeping
  tracing: Add array printing helper
  tracing: Remove newline from trace_printk warning banner
  tracing: Use IS_ERR() check for return value of tracing_init_dentry()
  tracing: Remove unneeded includes of debugfs.h and fs.h
  tracing: Remove taking of trace_types_lock in pipe files
  tracing: Add ref count to tracer for when they are being read by pipe
......@@ -44,6 +44,10 @@ const char *ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
const char *ftrace_print_hex_seq(struct trace_seq *p,
const unsigned char *buf, int len);
const char *ftrace_print_array_seq(struct trace_seq *p,
const void *buf, int buf_len,
size_t el_size);
struct trace_iterator;
struct trace_event;
......
......@@ -263,6 +263,14 @@
#undef __print_hex
#define __print_hex(buf, buf_len) ftrace_print_hex_seq(p, buf, buf_len)
#undef __print_array
#define __print_array(array, count, el_size) \
({ \
BUILD_BUG_ON(el_size != 1 && el_size != 2 && \
el_size != 4 && el_size != 8); \
ftrace_print_array_seq(p, array, count, el_size); \
})
#undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
static notrace enum print_line_t \
......@@ -674,6 +682,7 @@ static inline void ftrace_test_probe_##call(void) \
#undef __get_dynamic_array_len
#undef __get_str
#undef __get_bitmask
#undef __print_array
#undef TP_printk
#define TP_printk(fmt, args...) "\"" fmt "\", " __stringify(args)
......
......@@ -5456,7 +5456,7 @@ static __init int ftrace_init_debugfs(void)
struct dentry *d_tracer;
d_tracer = tracing_init_dentry();
if (!d_tracer)
if (IS_ERR(d_tracer))
return 0;
ftrace_init_dyn_debugfs(d_tracer);
......
......@@ -9,7 +9,6 @@
#include <linux/trace_seq.h>
#include <linux/spinlock.h>
#include <linux/irq_work.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/hardirq.h>
#include <linux/kthread.h> /* for self test */
......@@ -23,7 +22,6 @@
#include <linux/hash.h>
#include <linux/list.h>
#include <linux/cpu.h>
#include <linux/fs.h>
#include <asm/local.h>
......@@ -447,7 +445,10 @@ int ring_buffer_print_page_header(struct trace_seq *s)
struct rb_irq_work {
struct irq_work work;
wait_queue_head_t waiters;
wait_queue_head_t full_waiters;
bool waiters_pending;
bool full_waiters_pending;
bool wakeup_full;
};
/*
......@@ -529,6 +530,10 @@ static void rb_wake_up_waiters(struct irq_work *work)
struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
wake_up_all(&rbwork->waiters);
if (rbwork->wakeup_full) {
rbwork->wakeup_full = false;
wake_up_all(&rbwork->full_waiters);
}
}
/**
......@@ -553,9 +558,11 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
* data in any cpu buffer, or a specific buffer, put the
* caller on the appropriate wait queue.
*/
if (cpu == RING_BUFFER_ALL_CPUS)
if (cpu == RING_BUFFER_ALL_CPUS) {
work = &buffer->irq_work;
else {
/* Full only makes sense on per cpu reads */
full = false;
} else {
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return -ENODEV;
cpu_buffer = buffer->buffers[cpu];
......@@ -564,7 +571,10 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
while (true) {
prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
if (full)
prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE);
else
prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
/*
* The events can happen in critical sections where
......@@ -586,7 +596,10 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
* that is necessary is that the wake up happens after
* a task has been queued. It's OK for spurious wake ups.
*/
work->waiters_pending = true;
if (full)
work->full_waiters_pending = true;
else
work->waiters_pending = true;
if (signal_pending(current)) {
ret = -EINTR;
......@@ -615,7 +628,10 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
schedule();
}
finish_wait(&work->waiters, &wait);
if (full)
finish_wait(&work->full_waiters, &wait);
else
finish_wait(&work->waiters, &wait);
return ret;
}
......@@ -1230,6 +1246,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
init_completion(&cpu_buffer->update_done);
init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters);
init_waitqueue_head(&cpu_buffer->irq_work.waiters);
init_waitqueue_head(&cpu_buffer->irq_work.full_waiters);
bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
GFP_KERNEL, cpu_to_node(cpu));
......@@ -2801,6 +2818,8 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
static __always_inline void
rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
{
bool pagebusy;
if (buffer->irq_work.waiters_pending) {
buffer->irq_work.waiters_pending = false;
/* irq_work_queue() supplies it's own memory barriers */
......@@ -2812,6 +2831,15 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
/* irq_work_queue() supplies it's own memory barriers */
irq_work_queue(&cpu_buffer->irq_work.work);
}
pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
cpu_buffer->irq_work.wakeup_full = true;
cpu_buffer->irq_work.full_waiters_pending = false;
/* irq_work_queue() supplies it's own memory barriers */
irq_work_queue(&cpu_buffer->irq_work.work);
}
}
/**
......
......@@ -7,7 +7,7 @@
#include <linux/completion.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/time.h>
#include <linux/ktime.h>
#include <asm/local.h>
struct rb_page {
......@@ -17,7 +17,7 @@ struct rb_page {
};
/* run time and sleep time in seconds */
#define RUN_TIME 10
#define RUN_TIME 10ULL
#define SLEEP_TIME 10
/* number of events for writer to wake up the reader */
......@@ -212,8 +212,7 @@ static void ring_buffer_consumer(void)
static void ring_buffer_producer(void)
{
struct timeval start_tv;
struct timeval end_tv;
ktime_t start_time, end_time, timeout;
unsigned long long time;
unsigned long long entries;
unsigned long long overruns;
......@@ -227,7 +226,8 @@ static void ring_buffer_producer(void)
* make the system stall)
*/
trace_printk("Starting ring buffer hammer\n");
do_gettimeofday(&start_tv);
start_time = ktime_get();
timeout = ktime_add_ns(start_time, RUN_TIME * NSEC_PER_SEC);
do {
struct ring_buffer_event *event;
int *entry;
......@@ -244,7 +244,7 @@ static void ring_buffer_producer(void)
ring_buffer_unlock_commit(buffer, event);
}
}
do_gettimeofday(&end_tv);
end_time = ktime_get();
cnt++;
if (consumer && !(cnt % wakeup_interval))
......@@ -264,7 +264,7 @@ static void ring_buffer_producer(void)
cond_resched();
#endif
} while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test);
} while (ktime_before(end_time, timeout) && !kill_test);
trace_printk("End ring buffer hammer\n");
if (consumer) {
......@@ -280,9 +280,7 @@ static void ring_buffer_producer(void)
wait_for_completion(&read_done);
}
time = end_tv.tv_sec - start_tv.tv_sec;
time *= USEC_PER_SEC;
time += (long long)((long)end_tv.tv_usec - (long)start_tv.tv_usec);
time = ktime_us_delta(end_time, start_time);
entries = ring_buffer_entries(buffer);
overruns = ring_buffer_overruns(buffer);
......
......@@ -2036,7 +2036,8 @@ void trace_printk_init_buffers(void)
/* trace_printk() is for debug use only. Don't use it in production. */
pr_warning("\n**********************************************************\n");
pr_warning("\n");
pr_warning("**********************************************************\n");
pr_warning("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
pr_warning("** **\n");
pr_warning("** trace_printk() being used. Allocating extra memory. **\n");
......@@ -4140,6 +4141,12 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf)
goto out;
}
/* If trace pipe files are being read, we can't change the tracer */
if (tr->current_trace->ref) {
ret = -EBUSY;
goto out;
}
trace_branch_disable();
tr->current_trace->enabled--;
......@@ -4326,17 +4333,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
}
trace_seq_init(&iter->seq);
/*
* We make a copy of the current tracer to avoid concurrent
* changes on it while we are reading.
*/
iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
if (!iter->trace) {
ret = -ENOMEM;
goto fail;
}
*iter->trace = *tr->current_trace;
iter->trace = tr->current_trace;
if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
ret = -ENOMEM;
......@@ -4363,6 +4360,8 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
iter->trace->pipe_open(iter);
nonseekable_open(inode, filp);
tr->current_trace->ref++;
out:
mutex_unlock(&trace_types_lock);
return ret;
......@@ -4382,6 +4381,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
mutex_lock(&trace_types_lock);
tr->current_trace->ref--;
if (iter->trace->pipe_close)
iter->trace->pipe_close(iter);
......@@ -4389,7 +4390,6 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
free_cpumask_var(iter->started);
mutex_destroy(&iter->mutex);
kfree(iter->trace);
kfree(iter);
trace_array_put(tr);
......@@ -4422,7 +4422,7 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table)
return trace_poll(iter, filp, poll_table);
}
/* Must be called with trace_types_lock mutex held. */
/* Must be called with iter->mutex held. */
static int tracing_wait_pipe(struct file *filp)
{
struct trace_iterator *iter = filp->private_data;
......@@ -4467,7 +4467,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
struct trace_iterator *iter = filp->private_data;
struct trace_array *tr = iter->tr;
ssize_t sret;
/* return any leftover data */
......@@ -4477,12 +4476,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
trace_seq_init(&iter->seq);
/* copy the tracer to avoid using a global lock all around */
mutex_lock(&trace_types_lock);
if (unlikely(iter->trace->name != tr->current_trace->name))
*iter->trace = *tr->current_trace;
mutex_unlock(&trace_types_lock);
/*
* Avoid more than one consumer on a single file descriptor
* This is just a matter of traces coherency, the ring buffer itself
......@@ -4642,7 +4635,6 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
.ops = &tracing_pipe_buf_ops,
.spd_release = tracing_spd_release_pipe,
};
struct trace_array *tr = iter->tr;
ssize_t ret;
size_t rem;
unsigned int i;
......@@ -4650,12 +4642,6 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
if (splice_grow_spd(pipe, &spd))
return -ENOMEM;
/* copy the tracer to avoid using a global lock all around */
mutex_lock(&trace_types_lock);
if (unlikely(iter->trace->name != tr->current_trace->name))
*iter->trace = *tr->current_trace;
mutex_unlock(&trace_types_lock);
mutex_lock(&iter->mutex);
if (iter->trace->splice_read) {
......@@ -4942,7 +4928,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
*fpos += written;
out_unlock:
for (i = 0; i < nr_pages; i++){
for (i = nr_pages - 1; i >= 0; i--) {
kunmap_atomic(map_page[i]);
put_page(pages[i]);
}
......@@ -5331,6 +5317,8 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
filp->private_data = info;
tr->current_trace->ref++;
mutex_unlock(&trace_types_lock);
ret = nonseekable_open(inode, filp);
......@@ -5361,21 +5349,16 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
if (!count)
return 0;
mutex_lock(&trace_types_lock);
#ifdef CONFIG_TRACER_MAX_TRACE
if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
size = -EBUSY;
goto out_unlock;
}
if (iter->snapshot && iter->tr->current_trace->use_max_tr)
return -EBUSY;
#endif
if (!info->spare)
info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
iter->cpu_file);
size = -ENOMEM;
if (!info->spare)
goto out_unlock;
return -ENOMEM;
/* Do we have previous read data to read? */
if (info->read < PAGE_SIZE)
......@@ -5391,21 +5374,16 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
if (ret < 0) {
if (trace_empty(iter)) {
if ((filp->f_flags & O_NONBLOCK)) {
size = -EAGAIN;
goto out_unlock;
}
mutex_unlock(&trace_types_lock);
if ((filp->f_flags & O_NONBLOCK))
return -EAGAIN;
ret = wait_on_pipe(iter, false);
mutex_lock(&trace_types_lock);
if (ret) {
size = ret;
goto out_unlock;
}
if (ret)
return ret;
goto again;
}
size = 0;
goto out_unlock;
return 0;
}
info->read = 0;
......@@ -5415,18 +5393,14 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
size = count;
ret = copy_to_user(ubuf, info->spare + info->read, size);
if (ret == size) {
size = -EFAULT;
goto out_unlock;
}
if (ret == size)
return -EFAULT;
size -= ret;
*ppos += size;
info->read += size;
out_unlock:
mutex_unlock(&trace_types_lock);
return size;
}
......@@ -5437,6 +5411,8 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
mutex_lock(&trace_types_lock);
iter->tr->current_trace->ref--;
__trace_array_put(iter->tr);
if (info->spare)
......@@ -5522,30 +5498,20 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
int entries, size, i;
ssize_t ret = 0;
mutex_lock(&trace_types_lock);
#ifdef CONFIG_TRACER_MAX_TRACE
if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
ret = -EBUSY;
goto out;
}
if (iter->snapshot && iter->tr->current_trace->use_max_tr)
return -EBUSY;
#endif
if (splice_grow_spd(pipe, &spd)) {
ret = -ENOMEM;
goto out;
}
if (splice_grow_spd(pipe, &spd))
return -ENOMEM;
if (*ppos & (PAGE_SIZE - 1)) {
ret = -EINVAL;
goto out;
}
if (*ppos & (PAGE_SIZE - 1))
return -EINVAL;
if (len & (PAGE_SIZE - 1)) {
if (len < PAGE_SIZE) {
ret = -EINVAL;
goto out;
}
if (len < PAGE_SIZE)
return -EINVAL;
len &= PAGE_MASK;
}
......@@ -5606,25 +5572,20 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
/* did we read anything? */
if (!spd.nr_pages) {
if (ret)
goto out;
return ret;
if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
return -EAGAIN;
if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
ret = -EAGAIN;
goto out;
}
mutex_unlock(&trace_types_lock);
ret = wait_on_pipe(iter, true);
mutex_lock(&trace_types_lock);
if (ret)
goto out;
return ret;
goto again;
}
ret = splice_to_pipe(pipe, &spd);
splice_shrink_spd(&spd);
out:
mutex_unlock(&trace_types_lock);
return ret;
}
......@@ -5854,28 +5815,11 @@ static __init int register_snapshot_cmd(void)
static inline __init int register_snapshot_cmd(void) { return 0; }
#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
static struct dentry *tracing_get_dentry(struct trace_array *tr)
{
if (tr->dir)
return tr->dir;
if (!debugfs_initialized())
return NULL;
if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
tr->dir = debugfs_create_dir("tracing", NULL);
if (!tr->dir)
pr_warn_once("Could not create debugfs directory 'tracing'\n");
return tr->dir;
}
struct dentry *tracing_init_dentry(void)
{
return tracing_init_dentry_tr(&global_trace);
}
static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
{
struct dentry *d_tracer;
......@@ -5883,8 +5827,8 @@ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
if (tr->percpu_dir)
return tr->percpu_dir;
d_tracer = tracing_init_dentry_tr(tr);
if (!d_tracer)
d_tracer = tracing_get_dentry(tr);
if (IS_ERR(d_tracer))
return NULL;
tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
......@@ -6086,8 +6030,8 @@ static struct dentry *trace_options_init_dentry(struct trace_array *tr)
if (tr->options)
return tr->options;
d_tracer = tracing_init_dentry_tr(tr);
if (!d_tracer)
d_tracer = tracing_get_dentry(tr);
if (IS_ERR(d_tracer))
return NULL;
tr->options = debugfs_create_dir("options", d_tracer);
......@@ -6416,7 +6360,7 @@ static int instance_delete(const char *name)
goto out_unlock;
ret = -EBUSY;
if (tr->ref)
if (tr->ref || (tr->current_trace && tr->current_trace->ref))
goto out_unlock;
list_del(&tr->list);
......@@ -6571,6 +6515,33 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
}
/**
* tracing_init_dentry - initialize top level trace array
*
* This is called when creating files or directories in the tracing
* directory. It is called via fs_initcall() by any of the boot up code
* and expects to return the dentry of the top level tracing directory.
*/
struct dentry *tracing_init_dentry(void)
{
struct trace_array *tr = &global_trace;
if (tr->dir)
return tr->dir;
if (WARN_ON(!debugfs_initialized()))
return ERR_PTR(-ENODEV);
tr->dir = debugfs_create_dir("tracing", NULL);
if (!tr->dir) {
pr_warn_once("Could not create debugfs directory 'tracing'\n");
return ERR_PTR(-ENOMEM);
}
return tr->dir;
}
static __init int tracer_init_debugfs(void)
{
struct dentry *d_tracer;
......@@ -6578,7 +6549,7 @@ static __init int tracer_init_debugfs(void)
trace_access_lock_init();
d_tracer = tracing_init_dentry();
if (!d_tracer)
if (IS_ERR(d_tracer))
return 0;
init_tracer_debugfs(&global_trace, d_tracer);
......@@ -6811,7 +6782,6 @@ __init static int tracer_alloc_buffers(void)
int ring_buf_size;
int ret = -ENOMEM;
if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
goto out;
......
......@@ -388,6 +388,7 @@ struct tracer {
struct tracer *next;
struct tracer_flags *flags;
int enabled;
int ref;
bool print_max;
bool allow_instances;
#ifdef CONFIG_TRACER_MAX_TRACE
......@@ -541,7 +542,6 @@ struct dentry *trace_create_file(const char *name,
void *data,
const struct file_operations *fops);
struct dentry *tracing_init_dentry_tr(struct trace_array *tr);
struct dentry *tracing_init_dentry(void);
struct ring_buffer_event;
......
......@@ -7,7 +7,6 @@
#include <linux/seq_file.h>
#include <linux/spinlock.h>
#include <linux/irqflags.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ftrace.h>
......
......@@ -2531,7 +2531,7 @@ static __init int event_trace_init(void)
return -ENODEV;
d_tracer = tracing_init_dentry();
if (!d_tracer)
if (IS_ERR(d_tracer))
return 0;
entry = debugfs_create_file("available_events", 0444, d_tracer,
......
......@@ -6,12 +6,10 @@
#include <linux/stringify.h>
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
#include "trace_output.h"
......
......@@ -1437,7 +1437,7 @@ static __init int init_graph_debugfs(void)
struct dentry *d_tracer;
d_tracer = tracing_init_dentry();
if (!d_tracer)
if (IS_ERR(d_tracer))
return 0;
trace_create_file("max_graph_depth", 0644, d_tracer,
......
......@@ -10,11 +10,9 @@
* Copyright (C) 2004 Nadia Yvette Chambers
*/
#include <linux/kallsyms.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ftrace.h>
#include <linux/fs.h>
#include "trace.h"
......
......@@ -1320,7 +1320,7 @@ static __init int init_kprobe_trace(void)
return -EINVAL;
d_tracer = tracing_init_dentry();
if (!d_tracer)
if (IS_ERR(d_tracer))
return 0;
entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
......
......@@ -6,8 +6,6 @@
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
#include <linux/ftrace.h>
#include "trace.h"
......
......@@ -177,6 +177,50 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
}
EXPORT_SYMBOL(ftrace_print_hex_seq);
const char *
ftrace_print_array_seq(struct trace_seq *p, const void *buf, int buf_len,
size_t el_size)
{
const char *ret = trace_seq_buffer_ptr(p);
const char *prefix = "";
void *ptr = (void *)buf;
trace_seq_putc(p, '{');
while (ptr < buf + buf_len) {
switch (el_size) {
case 1:
trace_seq_printf(p, "%s0x%x", prefix,
*(u8 *)ptr);
break;
case 2:
trace_seq_printf(p, "%s0x%x", prefix,
*(u16 *)ptr);
break;
case 4:
trace_seq_printf(p, "%s0x%x", prefix,
*(u32 *)ptr);
break;
case 8:
trace_seq_printf(p, "%s0x%llx", prefix,
*(u64 *)ptr);
break;
default:
trace_seq_printf(p, "BAD SIZE:%zu 0x%x", el_size,
*(u8 *)ptr);
el_size = 1;
}
prefix = ",";
ptr += el_size;
}
trace_seq_putc(p, '}');
trace_seq_putc(p, 0);
return ret;
}
EXPORT_SYMBOL(ftrace_print_array_seq);
int ftrace_raw_output_prep(struct trace_iterator *iter,
struct trace_event *trace_event)
{
......
......@@ -5,7 +5,6 @@
*
*/
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <linux/ftrace.h>
......@@ -15,7 +14,6 @@
#include <linux/ctype.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include "trace.h"
......@@ -349,7 +347,7 @@ static __init int init_trace_printk_function_export(void)
struct dentry *d_tracer;
d_tracer = tracing_init_dentry();
if (!d_tracer)
if (IS_ERR(d_tracer))
return 0;
trace_create_file("printk_formats", 0444, d_tracer,
......
......@@ -5,8 +5,6 @@
*
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
......
......@@ -10,8 +10,6 @@
* Copyright (C) 2004 Nadia Yvette Chambers
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/debugfs.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
......
......@@ -7,12 +7,10 @@
#include <linux/seq_file.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h>
#include <linux/debugfs.h>
#include <linux/ftrace.h>
#include <linux/module.h>
#include <linux/sysctl.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <asm/setup.h>
......@@ -462,7 +460,7 @@ static __init int stack_trace_init(void)
struct dentry *d_tracer;
d_tracer = tracing_init_dentry();
if (!d_tracer)
if (IS_ERR(d_tracer))
return 0;
trace_create_file("stack_max_size", 0644, d_tracer,
......
......@@ -276,7 +276,7 @@ static int tracing_stat_init(void)
struct dentry *d_tracing;
d_tracing = tracing_init_dentry();
if (!d_tracing)
if (IS_ERR(d_tracing))
return 0;
stat_dir = debugfs_create_dir("trace_stat", d_tracing);
......
......@@ -1321,7 +1321,7 @@ static __init int init_uprobe_trace(void)
struct dentry *d_tracer;
d_tracer = tracing_init_dentry();
if (!d_tracer)
if (IS_ERR(d_tracer))
return 0;
trace_create_file("uprobe_events", 0644, d_tracer,
......
......@@ -10,12 +10,38 @@
#define CREATE_TRACE_POINTS
#include "trace-events-sample.h"
static const char *random_strings[] = {
"Mother Goose",
"Snoopy",
"Gandalf",
"Frodo",
"One ring to rule them all"
};
static void simple_thread_func(int cnt)
{
int array[6];
int len = cnt % 5;
int i;
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(HZ);
trace_foo_bar("hello", cnt);
for (i = 0; i < len; i++)
array[i] = i + 1;
array[i] = 0;
/* Silly tracepoints */
trace_foo_bar("hello", cnt, array, random_strings[len],
tsk_cpus_allowed(current));
trace_foo_with_template_simple("HELLO", cnt);
trace_foo_bar_with_cond("Some times print", cnt);
trace_foo_with_template_cond("prints other times", cnt);
trace_foo_with_template_print("I have to be different", cnt);
}
static int simple_thread(void *arg)
......@@ -29,6 +55,53 @@ static int simple_thread(void *arg)
}
static struct task_struct *simple_tsk;
static struct task_struct *simple_tsk_fn;
static void simple_thread_func_fn(int cnt)
{
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(HZ);
/* More silly tracepoints */
trace_foo_bar_with_fn("Look at me", cnt);
trace_foo_with_template_fn("Look at me too", cnt);
}
static int simple_thread_fn(void *arg)
{
int cnt = 0;
while (!kthread_should_stop())
simple_thread_func_fn(cnt++);
return 0;
}
static DEFINE_MUTEX(thread_mutex);
void foo_bar_reg(void)
{
pr_info("Starting thread for foo_bar_fn\n");
/*
* We shouldn't be able to start a trace when the module is
* unloading (there's other locks to prevent that). But
* for consistency sake, we still take the thread_mutex.
*/
mutex_lock(&thread_mutex);
simple_tsk_fn = kthread_run(simple_thread_fn, NULL, "event-sample-fn");
mutex_unlock(&thread_mutex);
}
void foo_bar_unreg(void)
{
pr_info("Killing thread for foo_bar_fn\n");
/* protect against module unloading */
mutex_lock(&thread_mutex);
if (simple_tsk_fn)
kthread_stop(simple_tsk_fn);
simple_tsk_fn = NULL;
mutex_unlock(&thread_mutex);
}
static int __init trace_event_init(void)
{
......@@ -42,6 +115,11 @@ static int __init trace_event_init(void)
static void __exit trace_event_exit(void)
{
kthread_stop(simple_tsk);
mutex_lock(&thread_mutex);
if (simple_tsk_fn)
kthread_stop(simple_tsk_fn);
simple_tsk_fn = NULL;
mutex_unlock(&thread_mutex);
}
module_init(trace_event_init);
......
/*
* If TRACE_SYSTEM is defined, that will be the directory created
* in the ftrace directory under /sys/kernel/debug/tracing/events/<system>
* in the ftrace directory under /sys/kernel/tracing/events/<system>
*
* The define_trace.h below will also look for a file name of
* TRACE_SYSTEM.h where TRACE_SYSTEM is what is defined here.
......@@ -54,45 +54,347 @@
* Here it is simply "foo, bar".
*
* struct: This defines the way the data will be stored in the ring buffer.
* There are currently two types of elements. __field and __array.
* a __field is broken up into (type, name). Where type can be any
* primitive type (integer, long or pointer). __field_struct() can
* be any static complex data value (struct, union, but not an array).
* For an array. there are three fields. (type, name, size). The
* type of elements in the array, the name of the field and the size
* of the array.
* The items declared here become part of a special structure
* called "__entry", which can be used in the fast_assign part of the
* TRACE_EVENT macro.
*
* Here are the currently defined types you can use:
*
* __field : Is broken up into type and name. Where type can be any
* primitive type (integer, long or pointer).
*
* __field(int, foo)
*
* __entry->foo = 5;
*
* __field_struct : This can be any static complex data type (struct, union
* but not an array). Be careful using complex types, as each
* event is limited in size, and copying large amounts of data
* into the ring buffer can slow things down.
*
* __field_struct(struct bar, foo)
*
* __entry->bar.x = y;
* __array: There are three fields (type, name, size). The type is the
* type of elements in teh array, the name is the name of the array.
* size is the number of items in the array (not the total size).
*
* __array( char, foo, 10) is the same as saying: char foo[10];
*
* Assigning arrays can be done like any array:
*
* __entry->foo[0] = 'a';
*
* memcpy(__entry->foo, bar, 10);
*
* __dynamic_array: This is similar to array, but can vary is size from
* instance to instance of the tracepoint being called.
* Like __array, this too has three elements (type, name, size);
* type is the type of the element, name is the name of the array.
* The size is different than __array. It is not a static number,
* but the algorithm to figure out the length of the array for the
* specific instance of tracepoint. Again, size is the numebr of
* items in the array, not the total length in bytes.
*
* __dynamic_array( int, foo, bar) is similar to: int foo[bar];
*
* Note, unlike arrays, you must use the __get_dynamic_array() macro
* to access the array.
*
* memcpy(__get_dynamic_array(foo), bar, 10);
*
* Notice, that "__entry" is not needed here.
*
* __string: This is a special kind of __dynamic_array. It expects to
* have a nul terminated character array passed to it (it allows
* for NULL too, which would be converted into "(null)"). __string
* takes two paramenter (name, src), where name is the name of
* the string saved, and src is the string to copy into the
* ring buffer.
*
* __string(foo, bar) is similar to: strcpy(foo, bar)
*
* To assign a string, use the helper macro __assign_str().
*
* __assign_str(foo, bar);
*
* In most cases, the __assign_str() macro will take the same
* parameters as the __string() macro had to declare the string.
*
* __bitmask: This is another kind of __dynamic_array, but it expects
* an array of longs, and the number of bits to parse. It takes
* two parameters (name, nr_bits), where name is the name of the
* bitmask to save, and the nr_bits is the number of bits to record.
*
* __bitmask(target_cpu, nr_cpumask_bits)
*
* To assign a bitmask, use the __assign_bitmask() helper macro.
*
* __assign_bitmask(target_cpus, cpumask_bits(bar), nr_cpumask_bits);
*
* __array( char, foo, 10) is the same as saying char foo[10].
*
* fast_assign: This is a C like function that is used to store the items
* into the ring buffer.
* into the ring buffer. A special variable called "__entry" will be the
* structure that points into the ring buffer and has the same fields as
* described by the struct part of TRACE_EVENT above.
*
* printk: This is a way to print out the data in pretty print. This is
* useful if the system crashes and you are logging via a serial line,
* the data can be printed to the console using this "printk" method.
* This is also used to print out the data from the trace files.
* Again, the __entry macro is used to access the data from the ring buffer.
*
* Note, __dynamic_array, __string, and __bitmask require special helpers
* to access the data.
*
* For __dynamic_array(int, foo, bar) use __get_dynamic_array(foo)
* Use __get_dynamic_array_len(foo) to get the length of the array
* saved.
*
* For __string(foo, bar) use __get_str(foo)
*
* For __bitmask(target_cpus, nr_cpumask_bits) use __get_bitmask(target_cpus)
*
*
* Note, that for both the assign and the printk, __entry is the handler
* to the data structure in the ring buffer, and is defined by the
* TP_STRUCT__entry.
*/
/*
* It is OK to have helper functions in the file, but they need to be protected
* from being defined more than once. Remember, this file gets included more
* than once.
*/
#ifndef __TRACE_EVENT_SAMPLE_HELPER_FUNCTIONS
#define __TRACE_EVENT_SAMPLE_HELPER_FUNCTIONS
static inline int __length_of(const int *list)
{
int i;
if (!list)
return 0;
for (i = 0; list[i]; i++)
;
return i;
}
#endif
TRACE_EVENT(foo_bar,
TP_PROTO(char *foo, int bar),
TP_PROTO(const char *foo, int bar, const int *lst,
const char *string, const struct cpumask *mask),
TP_ARGS(foo, bar),
TP_ARGS(foo, bar, lst, string, mask),
TP_STRUCT__entry(
__array( char, foo, 10 )
__field( int, bar )
__dynamic_array(int, list, __length_of(lst))
__string( str, string )
__bitmask( cpus, num_possible_cpus() )
),
TP_fast_assign(
strlcpy(__entry->foo, foo, 10);
__entry->bar = bar;
memcpy(__get_dynamic_array(list), lst,
__length_of(lst) * sizeof(int));
__assign_str(str, string);
__assign_bitmask(cpus, cpumask_bits(mask), num_possible_cpus());
),
TP_printk("foo %s %d %s %s (%s)", __entry->foo, __entry->bar,
__print_array(__get_dynamic_array(list),
__get_dynamic_array_len(list),
sizeof(int)),
__get_str(str), __get_bitmask(cpus))
);
/*
* There may be a case where a tracepoint should only be called if
* some condition is set. Otherwise the tracepoint should not be called.
* But to do something like:
*
* if (cond)
* trace_foo();
*
* Would cause a little overhead when tracing is not enabled, and that
* overhead, even if small, is not something we want. As tracepoints
* use static branch (aka jump_labels), where no branch is taken to
* skip the tracepoint when not enabled, and a jmp is placed to jump
* to the tracepoint code when it is enabled, having a if statement
* nullifies that optimization. It would be nice to place that
* condition within the static branch. This is where TRACE_EVENT_CONDITION
* comes in.
*
* TRACE_EVENT_CONDITION() is just like TRACE_EVENT, except it adds another
* parameter just after args. Where TRACE_EVENT has:
*
* TRACE_EVENT(name, proto, args, struct, assign, printk)
*
* the CONDITION version has:
*
* TRACE_EVENT_CONDITION(name, proto, args, cond, struct, assign, printk)
*
* Everything is the same as TRACE_EVENT except for the new cond. Think
* of the cond variable as:
*
* if (cond)
* trace_foo_bar_with_cond();
*
* Except that the logic for the if branch is placed after the static branch.
* That is, the if statement that processes the condition will not be
* executed unless that traecpoint is enabled. Otherwise it still remains
* a nop.
*/
TRACE_EVENT_CONDITION(foo_bar_with_cond,
TP_PROTO(const char *foo, int bar),
TP_ARGS(foo, bar),
TP_CONDITION(!(bar % 10)),
TP_STRUCT__entry(
__string( foo, foo )
__field( int, bar )
),
TP_fast_assign(
__assign_str(foo, foo);
__entry->bar = bar;
),
TP_printk("foo %s %d", __get_str(foo), __entry->bar)
);
void foo_bar_reg(void);
void foo_bar_unreg(void);
/*
* Now in the case that some function needs to be called when the
* tracepoint is enabled and/or when it is disabled, the
* TRACE_EVENT_FN() serves this purpose. This is just like TRACE_EVENT()
* but adds two more parameters at the end:
*
* TRACE_EVENT_FN( name, proto, args, struct, assign, printk, reg, unreg)
*
* reg and unreg are functions with the prototype of:
*
* void reg(void)
*
* The reg function gets called before the tracepoint is enabled, and
* the unreg function gets called after the tracepoint is disabled.
*
* Note, reg and unreg are allowed to be NULL. If you only need to
* call a function before enabling, or after disabling, just set one
* function and pass in NULL for the other parameter.
*/
TRACE_EVENT_FN(foo_bar_with_fn,
TP_PROTO(const char *foo, int bar),
TP_ARGS(foo, bar),
TP_STRUCT__entry(
__string( foo, foo )
__field( int, bar )
),
TP_fast_assign(
__assign_str(foo, foo);
__entry->bar = bar;
),
TP_printk("foo %s %d", __get_str(foo), __entry->bar),
foo_bar_reg, foo_bar_unreg
);
/*
* Each TRACE_EVENT macro creates several helper functions to produce
* the code to add the tracepoint, create the files in the trace
* directory, hook it to perf, assign the values and to print out
* the raw data from the ring buffer. To prevent too much bloat,
* if there are more than one tracepoint that uses the same format
* for the proto, args, struct, assign and printk, and only the name
* is different, it is highly recommended to use the DECLARE_EVENT_CLASS
*
* DECLARE_EVENT_CLASS() macro creates most of the functions for the
* tracepoint. Then DEFINE_EVENT() is use to hook a tracepoint to those
* functions. This DEFINE_EVENT() is an instance of the class and can
* be enabled and disabled separately from other events (either TRACE_EVENT
* or other DEFINE_EVENT()s).
*
* Note, TRACE_EVENT() itself is simply defined as:
*
* #define TRACE_EVENT(name, proto, args, tstruct, assign, printk) \
* DEFINE_EVENT_CLASS(name, proto, args, tstruct, assign, printk); \
* DEFINE_EVENT(name, name, proto, args)
*
* The DEFINE_EVENT() also can be declared with conditions and reg functions:
*
* DEFINE_EVENT_CONDITION(template, name, proto, args, cond);
* DEFINE_EVENT_FN(template, name, proto, args, reg, unreg);
*/
DECLARE_EVENT_CLASS(foo_template,
TP_PROTO(const char *foo, int bar),
TP_ARGS(foo, bar),
TP_STRUCT__entry(
__string( foo, foo )
__field( int, bar )
),
TP_fast_assign(
__assign_str(foo, foo);
__entry->bar = bar;
),
TP_printk("foo %s %d", __entry->foo, __entry->bar)
TP_printk("foo %s %d", __get_str(foo), __entry->bar)
);
/*
* Here's a better way for the previous samples (except, the first
* exmaple had more fields and could not be used here).
*/
DEFINE_EVENT(foo_template, foo_with_template_simple,
TP_PROTO(const char *foo, int bar),
TP_ARGS(foo, bar));
DEFINE_EVENT_CONDITION(foo_template, foo_with_template_cond,
TP_PROTO(const char *foo, int bar),
TP_ARGS(foo, bar),
TP_CONDITION(!(bar % 8)));
DEFINE_EVENT_FN(foo_template, foo_with_template_fn,
TP_PROTO(const char *foo, int bar),
TP_ARGS(foo, bar),
foo_bar_reg, foo_bar_unreg);
/*
* Anytime two events share basically the same values and have
* the same output, use the DECLARE_EVENT_CLASS() and DEFINE_EVENT()
* when ever possible.
*/
/*
* If the event is similar to the DECLARE_EVENT_CLASS, but you need
* to have a different output, then use DEFINE_EVENT_PRINT() which
* lets you override the TP_printk() of the class.
*/
DEFINE_EVENT_PRINT(foo_template, foo_with_template_print,
TP_PROTO(const char *foo, int bar),
TP_ARGS(foo, bar),
TP_printk("bar %s %d", __get_str(foo), __entry->bar));
#endif
/***** NOTICE! The #if protection ends here. *****/
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册