diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index f5e793d69bd30a62e06be6ce904de39f71c1d60c..79fcbc4b09d6098296e3488ff9aed09fdfcc3a22 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -121,6 +121,9 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); u64 ring_buffer_time_stamp(int cpu); void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); +size_t ring_buffer_page_len(void *page); + + /* * The below functions are fine to use outside the tracing facility. */ @@ -138,8 +141,8 @@ static inline int tracing_is_on(void) { return 0; } void *ring_buffer_alloc_read_page(struct ring_buffer *buffer); void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); -int ring_buffer_read_page(struct ring_buffer *buffer, - void **data_page, int cpu, int full); +int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page, + size_t len, int cpu, int full); enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a8c275c01e83c7514b0c890248e1f80f8436570a..f2a163db52f9e55f3b3078639439e49324f8915a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -61,6 +61,8 @@ enum { static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; +#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) + /** * tracing_on - enable all tracing buffers * @@ -132,7 +134,7 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) } EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); -#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event)) +#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) #define RB_ALIGNMENT 4U #define RB_MAX_SMALL_DATA 28 @@ -234,6 +236,18 @@ static void rb_init_page(struct buffer_data_page *bpage) local_set(&bpage->commit, 0); } +/** + * ring_buffer_page_len - the size of data on the page. + * @page: The page to read + * + * Returns the amount of data on the page, including buffer page header. + */ +size_t ring_buffer_page_len(void *page) +{ + return local_read(&((struct buffer_data_page *)page)->commit) + + BUF_PAGE_HDR_SIZE; +} + /* * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing * this issue out. @@ -254,7 +268,7 @@ static inline int test_time_stamp(u64 delta) return 0; } -#define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data)) +#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE) /* * head_page == tail_page && head == tail then buffer is empty. @@ -2378,8 +2392,8 @@ static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, */ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) { - unsigned long addr; struct buffer_data_page *bpage; + unsigned long addr; addr = __get_free_page(GFP_KERNEL); if (!addr) @@ -2387,6 +2401,8 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) bpage = (void *)addr; + rb_init_page(bpage); + return bpage; } @@ -2406,6 +2422,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) * ring_buffer_read_page - extract a page from the ring buffer * @buffer: buffer to extract from * @data_page: the page to use allocated from ring_buffer_alloc_read_page + * @len: amount to extract * @cpu: the cpu of the buffer to extract * @full: should the extraction only happen when the page is full. * @@ -2418,7 +2435,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) * rpage = ring_buffer_alloc_read_page(buffer); * if (!rpage) * return error; - * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0); + * ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0); * if (ret >= 0) * process_page(rpage, ret); * @@ -2435,70 +2452,103 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) * <0 if no data has been transferred. */ int ring_buffer_read_page(struct ring_buffer *buffer, - void **data_page, int cpu, int full) + void **data_page, size_t len, int cpu, int full) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; struct ring_buffer_event *event; struct buffer_data_page *bpage; + struct buffer_page *reader; unsigned long flags; + unsigned int commit; unsigned int read; int ret = -1; + /* + * If len is not big enough to hold the page header, then + * we can not copy anything. + */ + if (len <= BUF_PAGE_HDR_SIZE) + return -1; + + len -= BUF_PAGE_HDR_SIZE; + if (!data_page) - return 0; + return -1; bpage = *data_page; if (!bpage) - return 0; + return -1; spin_lock_irqsave(&cpu_buffer->reader_lock, flags); - /* - * rb_buffer_peek will get the next ring buffer if - * the current reader page is empty. - */ - event = rb_buffer_peek(buffer, cpu, NULL); - if (!event) + reader = rb_get_reader_page(cpu_buffer); + if (!reader) goto out; - /* check for data */ - if (!local_read(&cpu_buffer->reader_page->page->commit)) - goto out; + event = rb_reader_event(cpu_buffer); + + read = reader->read; + commit = rb_page_commit(reader); - read = cpu_buffer->reader_page->read; /* - * If the writer is already off of the read page, then simply - * switch the read page with the given page. Otherwise - * we need to copy the data from the reader to the writer. + * If this page has been partially read or + * if len is not big enough to read the rest of the page or + * a writer is still on the page, then + * we must copy the data from the page to the buffer. + * Otherwise, we can simply swap the page with the one passed in. */ - if (cpu_buffer->reader_page == cpu_buffer->commit_page) { - unsigned int commit = rb_page_commit(cpu_buffer->reader_page); + if (read || (len < (commit - read)) || + cpu_buffer->reader_page == cpu_buffer->commit_page) { struct buffer_data_page *rpage = cpu_buffer->reader_page->page; + unsigned int rpos = read; + unsigned int pos = 0; + unsigned int size; if (full) goto out; - /* The writer is still on the reader page, we must copy */ - memcpy(bpage->data + read, rpage->data + read, commit - read); - /* consume what was read */ - cpu_buffer->reader_page->read = commit; + if (len > (commit - read)) + len = (commit - read); + + size = rb_event_length(event); + + if (len < size) + goto out; + + /* Need to copy one event at a time */ + do { + memcpy(bpage->data + pos, rpage->data + rpos, size); + + len -= size; + + rb_advance_reader(cpu_buffer); + rpos = reader->read; + pos += size; + + event = rb_reader_event(cpu_buffer); + size = rb_event_length(event); + } while (len > size); /* update bpage */ - local_set(&bpage->commit, commit); - if (!read) - bpage->time_stamp = rpage->time_stamp; + local_set(&bpage->commit, pos); + bpage->time_stamp = rpage->time_stamp; + + /* we copied everything to the beginning */ + read = 0; } else { /* swap the pages */ rb_init_page(bpage); - bpage = cpu_buffer->reader_page->page; - cpu_buffer->reader_page->page = *data_page; - cpu_buffer->reader_page->read = 0; + bpage = reader->page; + reader->page = *data_page; + local_set(&reader->write, 0); + reader->read = 0; *data_page = bpage; + + /* update the entry counter */ + rb_remove_entries(cpu_buffer, bpage, read); } ret = read; - /* update the entry counter */ - rb_remove_entries(cpu_buffer, bpage, read); out: spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ea055aa21cd977e78cc6bbbce03479a18ad8cfaf..12539f72f4a56d17f134dfbe177f2d065105ee4b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -11,31 +11,30 @@ * Copyright (C) 2004-2006 Ingo Molnar * Copyright (C) 2004 William Lee Irwin III */ +#include #include +#include +#include #include #include #include +#include #include #include #include #include #include +#include #include #include #include +#include #include #include #include #include #include #include -#include -#include -#include - -#include -#include -#include #include "trace.h" #include "trace_output.h" @@ -3005,6 +3004,246 @@ static struct file_operations tracing_mark_fops = { .write = tracing_mark_write, }; +struct ftrace_buffer_info { + struct trace_array *tr; + void *spare; + int cpu; + unsigned int read; +}; + +static int tracing_buffers_open(struct inode *inode, struct file *filp) +{ + int cpu = (int)(long)inode->i_private; + struct ftrace_buffer_info *info; + + if (tracing_disabled) + return -ENODEV; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + info->tr = &global_trace; + info->cpu = cpu; + info->spare = ring_buffer_alloc_read_page(info->tr->buffer); + /* Force reading ring buffer for first read */ + info->read = (unsigned int)-1; + if (!info->spare) + goto out; + + filp->private_data = info; + + return 0; + + out: + kfree(info); + return -ENOMEM; +} + +static ssize_t +tracing_buffers_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos) +{ + struct ftrace_buffer_info *info = filp->private_data; + unsigned int pos; + ssize_t ret; + size_t size; + + /* Do we have previous read data to read? */ + if (info->read < PAGE_SIZE) + goto read; + + info->read = 0; + + ret = ring_buffer_read_page(info->tr->buffer, + &info->spare, + count, + info->cpu, 0); + if (ret < 0) + return 0; + + pos = ring_buffer_page_len(info->spare); + + if (pos < PAGE_SIZE) + memset(info->spare + pos, 0, PAGE_SIZE - pos); + +read: + size = PAGE_SIZE - info->read; + if (size > count) + size = count; + + ret = copy_to_user(ubuf, info->spare + info->read, size); + if (ret) + return -EFAULT; + *ppos += size; + info->read += size; + + return size; +} + +static int tracing_buffers_release(struct inode *inode, struct file *file) +{ + struct ftrace_buffer_info *info = file->private_data; + + ring_buffer_free_read_page(info->tr->buffer, info->spare); + kfree(info); + + return 0; +} + +struct buffer_ref { + struct ring_buffer *buffer; + void *page; + int ref; +}; + +static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + struct buffer_ref *ref = (struct buffer_ref *)buf->private; + + if (--ref->ref) + return; + + ring_buffer_free_read_page(ref->buffer, ref->page); + kfree(ref); + buf->private = 0; +} + +static int buffer_pipe_buf_steal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + return 1; +} + +static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + struct buffer_ref *ref = (struct buffer_ref *)buf->private; + + ref->ref++; +} + +/* Pipe buffer operations for a buffer. */ +static struct pipe_buf_operations buffer_pipe_buf_ops = { + .can_merge = 0, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .confirm = generic_pipe_buf_confirm, + .release = buffer_pipe_buf_release, + .steal = buffer_pipe_buf_steal, + .get = buffer_pipe_buf_get, +}; + +/* + * Callback from splice_to_pipe(), if we need to release some pages + * at the end of the spd in case we error'ed out in filling the pipe. + */ +static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i) +{ + struct buffer_ref *ref = + (struct buffer_ref *)spd->partial[i].private; + + if (--ref->ref) + return; + + ring_buffer_free_read_page(ref->buffer, ref->page); + kfree(ref); + spd->partial[i].private = 0; +} + +static ssize_t +tracing_buffers_splice_read(struct file *file, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) +{ + struct ftrace_buffer_info *info = file->private_data; + struct partial_page partial[PIPE_BUFFERS]; + struct page *pages[PIPE_BUFFERS]; + struct splice_pipe_desc spd = { + .pages = pages, + .partial = partial, + .flags = flags, + .ops = &buffer_pipe_buf_ops, + .spd_release = buffer_spd_release, + }; + struct buffer_ref *ref; + int size, i; + size_t ret; + + /* + * We can't seek on a buffer input + */ + if (unlikely(*ppos)) + return -ESPIPE; + + + for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) { + struct page *page; + int r; + + ref = kzalloc(sizeof(*ref), GFP_KERNEL); + if (!ref) + break; + + ref->buffer = info->tr->buffer; + ref->page = ring_buffer_alloc_read_page(ref->buffer); + if (!ref->page) { + kfree(ref); + break; + } + + r = ring_buffer_read_page(ref->buffer, &ref->page, + len, info->cpu, 0); + if (r < 0) { + ring_buffer_free_read_page(ref->buffer, + ref->page); + kfree(ref); + break; + } + + /* + * zero out any left over data, this is going to + * user land. + */ + size = ring_buffer_page_len(ref->page); + if (size < PAGE_SIZE) + memset(ref->page + size, 0, PAGE_SIZE - size); + + page = virt_to_page(ref->page); + + spd.pages[i] = page; + spd.partial[i].len = PAGE_SIZE; + spd.partial[i].offset = 0; + spd.partial[i].private = (unsigned long)ref; + spd.nr_pages++; + } + + spd.nr_pages = i; + + /* did we read anything? */ + if (!spd.nr_pages) { + if (flags & SPLICE_F_NONBLOCK) + ret = -EAGAIN; + else + ret = 0; + /* TODO: block */ + return ret; + } + + ret = splice_to_pipe(pipe, &spd); + + return ret; +} + +static const struct file_operations tracing_buffers_fops = { + .open = tracing_buffers_open, + .read = tracing_buffers_read, + .release = tracing_buffers_release, + .splice_read = tracing_buffers_splice_read, + .llseek = no_llseek, +}; + #ifdef CONFIG_DYNAMIC_FTRACE int __weak ftrace_arch_read_dyn_info(char *buf, int size) @@ -3399,6 +3638,7 @@ static __init void create_trace_options_dir(void) static __init int tracer_init_debugfs(void) { struct dentry *d_tracer; + struct dentry *buffers; struct dentry *entry; int cpu; @@ -3471,6 +3711,26 @@ static __init int tracer_init_debugfs(void) pr_warning("Could not create debugfs " "'trace_marker' entry\n"); + buffers = debugfs_create_dir("binary_buffers", d_tracer); + + if (!buffers) + pr_warning("Could not create buffers directory\n"); + else { + int cpu; + char buf[64]; + + for_each_tracing_cpu(cpu) { + sprintf(buf, "%d", cpu); + + entry = debugfs_create_file(buf, 0444, buffers, + (void *)(long)cpu, + &tracing_buffers_fops); + if (!entry) + pr_warning("Could not create debugfs buffers " + "'%s' entry\n", buf); + } + } + #ifdef CONFIG_DYNAMIC_FTRACE entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, &ftrace_update_tot_cnt, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e606633fb498876847eccde2fcb1299751ff5310..561bb5c5d9881850fd39c5a0c839c0c9a0e87801 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -217,6 +217,7 @@ enum trace_flag_type { */ struct trace_array_cpu { atomic_t disabled; + void *buffer_page; /* ring buffer spare */ /* these fields get copied into max-trace: */ unsigned long trace_idx;