提交 cf8102f6 编写于 作者: I Ingo Molnar

Merge tag 'perf-fdarray-for-mingo' of...

Merge tag 'perf-fdarray-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf tooling updates from Arnaldo Carvalho de Melo.

Infrastructure changes:

  * We were not handling POLLHUP notifications for event file descriptors.

    Fix it by filtering entries in the events file descriptor array after
    poll() returns, refcounting mmaps so that when the last fd pointing to
    a perf mmap goes away we do the unmap. (Arnaldo Carvalho de Melo)

User visible changes:

  * Now 'record' and 'trace' properly exit when a target thread exits.
    (Arnaldo Carvalho de Melo)
Signed-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: NIngo Molnar <mingo@kernel.org>
...@@ -10,9 +10,14 @@ LIB_OBJS= ...@@ -10,9 +10,14 @@ LIB_OBJS=
LIB_H += fs/debugfs.h LIB_H += fs/debugfs.h
LIB_H += fs/fs.h LIB_H += fs/fs.h
# See comment below about piggybacking...
LIB_H += fd/array.h
LIB_OBJS += $(OUTPUT)fs/debugfs.o LIB_OBJS += $(OUTPUT)fs/debugfs.o
LIB_OBJS += $(OUTPUT)fs/fs.o LIB_OBJS += $(OUTPUT)fs/fs.o
# XXX piggybacking here, need to introduce libapikfd, or rename this
# to plain libapik.a and make it have it all api goodies
LIB_OBJS += $(OUTPUT)fd/array.o
LIBFILE = libapikfs.a LIBFILE = libapikfs.a
...@@ -29,7 +34,7 @@ $(LIBFILE): $(LIB_OBJS) ...@@ -29,7 +34,7 @@ $(LIBFILE): $(LIB_OBJS)
$(LIB_OBJS): $(LIB_H) $(LIB_OBJS): $(LIB_H)
libapi_dirs: libapi_dirs:
$(QUIET_MKDIR)mkdir -p $(OUTPUT)fs/ $(QUIET_MKDIR)mkdir -p $(OUTPUT)fd $(OUTPUT)fs
$(OUTPUT)%.o: %.c libapi_dirs $(OUTPUT)%.o: %.c libapi_dirs
$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $< $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $<
......
/*
* Copyright (C) 2014, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
*
* Released under the GPL v2. (and only v2, not any later version)
*/
#include "array.h"
#include <errno.h>
#include <fcntl.h>
#include <poll.h>
#include <stdlib.h>
#include <unistd.h>
void fdarray__init(struct fdarray *fda, int nr_autogrow)
{
fda->entries = NULL;
fda->priv = NULL;
fda->nr = fda->nr_alloc = 0;
fda->nr_autogrow = nr_autogrow;
}
int fdarray__grow(struct fdarray *fda, int nr)
{
void *priv;
int nr_alloc = fda->nr_alloc + nr;
size_t psize = sizeof(fda->priv[0]) * nr_alloc;
size_t size = sizeof(struct pollfd) * nr_alloc;
struct pollfd *entries = realloc(fda->entries, size);
if (entries == NULL)
return -ENOMEM;
priv = realloc(fda->priv, psize);
if (priv == NULL) {
free(entries);
return -ENOMEM;
}
fda->nr_alloc = nr_alloc;
fda->entries = entries;
fda->priv = priv;
return 0;
}
struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow)
{
struct fdarray *fda = calloc(1, sizeof(*fda));
if (fda != NULL) {
if (fdarray__grow(fda, nr_alloc)) {
free(fda);
fda = NULL;
} else {
fda->nr_autogrow = nr_autogrow;
}
}
return fda;
}
void fdarray__exit(struct fdarray *fda)
{
free(fda->entries);
free(fda->priv);
fdarray__init(fda, 0);
}
void fdarray__delete(struct fdarray *fda)
{
fdarray__exit(fda);
free(fda);
}
int fdarray__add(struct fdarray *fda, int fd, short revents)
{
int pos = fda->nr;
if (fda->nr == fda->nr_alloc &&
fdarray__grow(fda, fda->nr_autogrow) < 0)
return -ENOMEM;
fda->entries[fda->nr].fd = fd;
fda->entries[fda->nr].events = revents;
fda->nr++;
return pos;
}
int fdarray__filter(struct fdarray *fda, short revents,
void (*entry_destructor)(struct fdarray *fda, int fd))
{
int fd, nr = 0;
if (fda->nr == 0)
return 0;
for (fd = 0; fd < fda->nr; ++fd) {
if (fda->entries[fd].revents & revents) {
if (entry_destructor)
entry_destructor(fda, fd);
continue;
}
if (fd != nr) {
fda->entries[nr] = fda->entries[fd];
fda->priv[nr] = fda->priv[fd];
}
++nr;
}
return fda->nr = nr;
}
int fdarray__poll(struct fdarray *fda, int timeout)
{
return poll(fda->entries, fda->nr, timeout);
}
int fdarray__fprintf(struct fdarray *fda, FILE *fp)
{
int fd, printed = fprintf(fp, "%d [ ", fda->nr);
for (fd = 0; fd < fda->nr; ++fd)
printed += fprintf(fp, "%s%d", fd ? ", " : "", fda->entries[fd].fd);
return printed + fprintf(fp, " ]");
}
#ifndef __API_FD_ARRAY__
#define __API_FD_ARRAY__
#include <stdio.h>
struct pollfd;
/**
* struct fdarray: Array of file descriptors
*
* @priv: Per array entry priv area, users should access just its contents,
* not set it to anything, as it is kept in synch with @entries, being
* realloc'ed, * for instance, in fdarray__{grow,filter}.
*
* I.e. using 'fda->priv[N].idx = * value' where N < fda->nr is ok,
* but doing 'fda->priv = malloc(M)' is not allowed.
*/
struct fdarray {
int nr;
int nr_alloc;
int nr_autogrow;
struct pollfd *entries;
union {
int idx;
} *priv;
};
void fdarray__init(struct fdarray *fda, int nr_autogrow);
void fdarray__exit(struct fdarray *fda);
struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow);
void fdarray__delete(struct fdarray *fda);
int fdarray__add(struct fdarray *fda, int fd, short revents);
int fdarray__poll(struct fdarray *fda, int timeout);
int fdarray__filter(struct fdarray *fda, short revents,
void (*entry_destructor)(struct fdarray *fda, int fd));
int fdarray__grow(struct fdarray *fda, int extra);
int fdarray__fprintf(struct fdarray *fda, FILE *fp);
static inline int fdarray__available_entries(struct fdarray *fda)
{
return fda->nr_alloc - fda->nr;
}
#endif /* __API_FD_ARRAY__ */
...@@ -402,6 +402,7 @@ LIB_OBJS += $(OUTPUT)tests/perf-record.o ...@@ -402,6 +402,7 @@ LIB_OBJS += $(OUTPUT)tests/perf-record.o
LIB_OBJS += $(OUTPUT)tests/rdpmc.o LIB_OBJS += $(OUTPUT)tests/rdpmc.o
LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o
LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o
LIB_OBJS += $(OUTPUT)tests/fdarray.o
LIB_OBJS += $(OUTPUT)tests/pmu.o LIB_OBJS += $(OUTPUT)tests/pmu.o
LIB_OBJS += $(OUTPUT)tests/hists_common.o LIB_OBJS += $(OUTPUT)tests/hists_common.o
LIB_OBJS += $(OUTPUT)tests/hists_link.o LIB_OBJS += $(OUTPUT)tests/hists_link.o
...@@ -769,7 +770,7 @@ $(LIBTRACEEVENT)-clean: ...@@ -769,7 +770,7 @@ $(LIBTRACEEVENT)-clean:
install-traceevent-plugins: $(LIBTRACEEVENT) install-traceevent-plugins: $(LIBTRACEEVENT)
$(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) install_plugins $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) install_plugins
LIBAPIKFS_SOURCES = $(wildcard $(LIB_PATH)fs/*.[ch]) LIBAPIKFS_SOURCES = $(wildcard $(LIB_PATH)fs/*.[ch] $(LIB_PATH)fd/*.[ch])
# if subdir is set, we've been called from above so target has been built # if subdir is set, we've been called from above so target has been built
# already # already
......
...@@ -919,15 +919,8 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm) ...@@ -919,15 +919,8 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm)
signal(SIGINT, sig_handler); signal(SIGINT, sig_handler);
signal(SIGTERM, sig_handler); signal(SIGTERM, sig_handler);
/* copy pollfds -- need to add timerfd and stdin */ /* use pollfds -- need to add timerfd and stdin */
nr_fds = kvm->evlist->nr_fds; nr_fds = kvm->evlist->pollfd.nr;
pollfds = zalloc(sizeof(struct pollfd) * (nr_fds + 2));
if (!pollfds) {
err = -ENOMEM;
goto out;
}
memcpy(pollfds, kvm->evlist->pollfd,
sizeof(struct pollfd) * kvm->evlist->nr_fds);
/* add timer fd */ /* add timer fd */
if (perf_kvm__timerfd_create(kvm) < 0) { if (perf_kvm__timerfd_create(kvm) < 0) {
...@@ -935,17 +928,21 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm) ...@@ -935,17 +928,21 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm)
goto out; goto out;
} }
pollfds[nr_fds].fd = kvm->timerfd; if (perf_evlist__add_pollfd(kvm->evlist, kvm->timerfd))
pollfds[nr_fds].events = POLLIN; goto out;
nr_fds++; nr_fds++;
pollfds[nr_fds].fd = fileno(stdin); if (perf_evlist__add_pollfd(kvm->evlist, fileno(stdin)))
pollfds[nr_fds].events = POLLIN; goto out;
nr_stdin = nr_fds; nr_stdin = nr_fds;
nr_fds++; nr_fds++;
if (fd_set_nonblock(fileno(stdin)) != 0) if (fd_set_nonblock(fileno(stdin)) != 0)
goto out; goto out;
pollfds = kvm->evlist->pollfd.entries;
/* everything is good - enable the events and process */ /* everything is good - enable the events and process */
perf_evlist__enable(kvm->evlist); perf_evlist__enable(kvm->evlist);
...@@ -979,7 +976,6 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm) ...@@ -979,7 +976,6 @@ static int kvm_events_live_report(struct perf_kvm_stat *kvm)
close(kvm->timerfd); close(kvm->timerfd);
tcsetattr(0, TCSAFLUSH, &save); tcsetattr(0, TCSAFLUSH, &save);
free(pollfds);
return err; return err;
} }
......
...@@ -308,7 +308,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) ...@@ -308,7 +308,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
struct record_opts *opts = &rec->opts; struct record_opts *opts = &rec->opts;
struct perf_data_file *file = &rec->file; struct perf_data_file *file = &rec->file;
struct perf_session *session; struct perf_session *session;
bool disabled = false; bool disabled = false, draining = false;
rec->progname = argv[0]; rec->progname = argv[0];
...@@ -457,9 +457,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) ...@@ -457,9 +457,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
} }
if (hits == rec->samples) { if (hits == rec->samples) {
if (done) if (done || draining)
break; break;
err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1); err = perf_evlist__poll(rec->evlist, -1);
/* /*
* Propagate error, only if there's any. Ignore positive * Propagate error, only if there's any. Ignore positive
* number of returned events and interrupt error. * number of returned events and interrupt error.
...@@ -467,6 +467,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) ...@@ -467,6 +467,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
if (err > 0 || (err < 0 && errno == EINTR)) if (err > 0 || (err < 0 && errno == EINTR))
err = 0; err = 0;
waking++; waking++;
if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
draining = true;
} }
/* /*
......
...@@ -964,7 +964,7 @@ static int __cmd_top(struct perf_top *top) ...@@ -964,7 +964,7 @@ static int __cmd_top(struct perf_top *top)
perf_evlist__enable(top->evlist); perf_evlist__enable(top->evlist);
/* Wait for a minimal set of events before starting the snapshot */ /* Wait for a minimal set of events before starting the snapshot */
poll(top->evlist->pollfd, top->evlist->nr_fds, 100); perf_evlist__poll(top->evlist, 100);
perf_top__mmap_read(top); perf_top__mmap_read(top);
...@@ -991,7 +991,7 @@ static int __cmd_top(struct perf_top *top) ...@@ -991,7 +991,7 @@ static int __cmd_top(struct perf_top *top)
perf_top__mmap_read(top); perf_top__mmap_read(top);
if (hits == top->samples) if (hits == top->samples)
ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100); ret = perf_evlist__poll(top->evlist, 100);
} }
ret = 0; ret = 0;
......
...@@ -2044,6 +2044,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) ...@@ -2044,6 +2044,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
int err = -1, i; int err = -1, i;
unsigned long before; unsigned long before;
const bool forks = argc > 0; const bool forks = argc > 0;
bool draining = false;
char sbuf[STRERR_BUFSIZE]; char sbuf[STRERR_BUFSIZE];
trace->live = true; trace->live = true;
...@@ -2171,8 +2172,12 @@ static int trace__run(struct trace *trace, int argc, const char **argv) ...@@ -2171,8 +2172,12 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
if (trace->nr_events == before) { if (trace->nr_events == before) {
int timeout = done ? 100 : -1; int timeout = done ? 100 : -1;
if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0) if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
draining = true;
goto again; goto again;
}
} else { } else {
goto again; goto again;
} }
......
...@@ -157,6 +157,14 @@ static struct test { ...@@ -157,6 +157,14 @@ static struct test {
.desc = "Test tracking with sched_switch", .desc = "Test tracking with sched_switch",
.func = test__switch_tracking, .func = test__switch_tracking,
}, },
{
.desc = "Filter fds with revents mask in a fdarray",
.func = test__fdarray__filter,
},
{
.desc = "Add fd to a fdarray, making it autogrow",
.func = test__fdarray__add,
},
{ {
.func = NULL, .func = NULL,
}, },
......
#include <api/fd/array.h>
#include "util/debug.h"
#include "tests/tests.h"
static void fdarray__init_revents(struct fdarray *fda, short revents)
{
int fd;
fda->nr = fda->nr_alloc;
for (fd = 0; fd < fda->nr; ++fd) {
fda->entries[fd].fd = fda->nr - fd;
fda->entries[fd].revents = revents;
}
}
static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE *fp)
{
int printed = 0;
if (!verbose)
return 0;
printed += fprintf(fp, "\n%s: ", prefix);
return printed + fdarray__fprintf(fda, fp);
}
int test__fdarray__filter(void)
{
int nr_fds, expected_fd[2], fd, err = TEST_FAIL;
struct fdarray *fda = fdarray__new(5, 5);
if (fda == NULL) {
pr_debug("\nfdarray__new() failed!");
goto out;
}
fdarray__init_revents(fda, POLLIN);
nr_fds = fdarray__filter(fda, POLLHUP, NULL);
if (nr_fds != fda->nr_alloc) {
pr_debug("\nfdarray__filter()=%d != %d shouldn't have filtered anything",
nr_fds, fda->nr_alloc);
goto out_delete;
}
fdarray__init_revents(fda, POLLHUP);
nr_fds = fdarray__filter(fda, POLLHUP, NULL);
if (nr_fds != 0) {
pr_debug("\nfdarray__filter()=%d != %d, should have filtered all fds",
nr_fds, fda->nr_alloc);
goto out_delete;
}
fdarray__init_revents(fda, POLLHUP);
fda->entries[2].revents = POLLIN;
expected_fd[0] = fda->entries[2].fd;
pr_debug("\nfiltering all but fda->entries[2]:");
fdarray__fprintf_prefix(fda, "before", stderr);
nr_fds = fdarray__filter(fda, POLLHUP, NULL);
fdarray__fprintf_prefix(fda, " after", stderr);
if (nr_fds != 1) {
pr_debug("\nfdarray__filter()=%d != 1, should have left just one event", nr_fds);
goto out_delete;
}
if (fda->entries[0].fd != expected_fd[0]) {
pr_debug("\nfda->entries[0].fd=%d != %d\n",
fda->entries[0].fd, expected_fd[0]);
goto out_delete;
}
fdarray__init_revents(fda, POLLHUP);
fda->entries[0].revents = POLLIN;
expected_fd[0] = fda->entries[0].fd;
fda->entries[3].revents = POLLIN;
expected_fd[1] = fda->entries[3].fd;
pr_debug("\nfiltering all but (fda->entries[0], fda->entries[3]):");
fdarray__fprintf_prefix(fda, "before", stderr);
nr_fds = fdarray__filter(fda, POLLHUP, NULL);
fdarray__fprintf_prefix(fda, " after", stderr);
if (nr_fds != 2) {
pr_debug("\nfdarray__filter()=%d != 2, should have left just two events",
nr_fds);
goto out_delete;
}
for (fd = 0; fd < 2; ++fd) {
if (fda->entries[fd].fd != expected_fd[fd]) {
pr_debug("\nfda->entries[%d].fd=%d != %d\n", fd,
fda->entries[fd].fd, expected_fd[fd]);
goto out_delete;
}
}
pr_debug("\n");
err = 0;
out_delete:
fdarray__delete(fda);
out:
return err;
}
int test__fdarray__add(void)
{
int err = TEST_FAIL;
struct fdarray *fda = fdarray__new(2, 2);
if (fda == NULL) {
pr_debug("\nfdarray__new() failed!");
goto out;
}
#define FDA_CHECK(_idx, _fd, _revents) \
if (fda->entries[_idx].fd != _fd) { \
pr_debug("\n%d: fda->entries[%d](%d) != %d!", \
__LINE__, _idx, fda->entries[1].fd, _fd); \
goto out_delete; \
} \
if (fda->entries[_idx].events != (_revents)) { \
pr_debug("\n%d: fda->entries[%d].revents(%d) != %d!", \
__LINE__, _idx, fda->entries[_idx].fd, _revents); \
goto out_delete; \
}
#define FDA_ADD(_idx, _fd, _revents, _nr) \
if (fdarray__add(fda, _fd, _revents) < 0) { \
pr_debug("\n%d: fdarray__add(fda, %d, %d) failed!", \
__LINE__,_fd, _revents); \
goto out_delete; \
} \
if (fda->nr != _nr) { \
pr_debug("\n%d: fdarray__add(fda, %d, %d)=%d != %d", \
__LINE__,_fd, _revents, fda->nr, _nr); \
goto out_delete; \
} \
FDA_CHECK(_idx, _fd, _revents)
FDA_ADD(0, 1, POLLIN, 1);
FDA_ADD(1, 2, POLLERR, 2);
fdarray__fprintf_prefix(fda, "before growing array", stderr);
FDA_ADD(2, 35, POLLHUP, 3);
if (fda->entries == NULL) {
pr_debug("\nfdarray__add(fda, 35, POLLHUP) should have allocated fda->pollfd!");
goto out_delete;
}
fdarray__fprintf_prefix(fda, "after 3rd add", stderr);
FDA_ADD(3, 88, POLLIN | POLLOUT, 4);
fdarray__fprintf_prefix(fda, "after 4th add", stderr);
FDA_CHECK(0, 1, POLLIN);
FDA_CHECK(1, 2, POLLERR);
FDA_CHECK(2, 35, POLLHUP);
FDA_CHECK(3, 88, POLLIN | POLLOUT);
#undef FDA_ADD
#undef FDA_CHECK
pr_debug("\n");
err = 0;
out_delete:
fdarray__delete(fda);
out:
return err;
}
...@@ -105,7 +105,7 @@ int test__syscall_open_tp_fields(void) ...@@ -105,7 +105,7 @@ int test__syscall_open_tp_fields(void)
} }
if (nr_events == before) if (nr_events == before)
poll(evlist->pollfd, evlist->nr_fds, 10); perf_evlist__poll(evlist, 10);
if (++nr_polls > 5) { if (++nr_polls > 5) {
pr_debug("%s: no events!\n", __func__); pr_debug("%s: no events!\n", __func__);
......
...@@ -268,7 +268,7 @@ int test__PERF_RECORD(void) ...@@ -268,7 +268,7 @@ int test__PERF_RECORD(void)
* perf_event_attr.wakeup_events, just PERF_EVENT_SAMPLE does. * perf_event_attr.wakeup_events, just PERF_EVENT_SAMPLE does.
*/ */
if (total_events == before && false) if (total_events == before && false)
poll(evlist->pollfd, evlist->nr_fds, -1); perf_evlist__poll(evlist, -1);
sleep(1); sleep(1);
if (++wakeups > 5) { if (++wakeups > 5) {
......
...@@ -105,7 +105,7 @@ int test__task_exit(void) ...@@ -105,7 +105,7 @@ int test__task_exit(void)
} }
if (!exited || !nr_exit) { if (!exited || !nr_exit) {
poll(evlist->pollfd, evlist->nr_fds, -1); perf_evlist__poll(evlist, -1);
goto retry; goto retry;
} }
......
...@@ -49,6 +49,8 @@ int test__thread_mg_share(void); ...@@ -49,6 +49,8 @@ int test__thread_mg_share(void);
int test__hists_output(void); int test__hists_output(void);
int test__hists_cumulate(void); int test__hists_cumulate(void);
int test__switch_tracking(void); int test__switch_tracking(void);
int test__fdarray__filter(void);
int test__fdarray__add(void);
#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) #if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
#ifdef HAVE_DWARF_UNWIND_SUPPORT #ifdef HAVE_DWARF_UNWIND_SUPPORT
......
...@@ -25,6 +25,9 @@ ...@@ -25,6 +25,9 @@
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/hash.h> #include <linux/hash.h>
static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx);
static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx);
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
#define SID(e, x, y) xyarray__entry(e->sample_id, x, y) #define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
...@@ -37,6 +40,7 @@ void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, ...@@ -37,6 +40,7 @@ void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
INIT_HLIST_HEAD(&evlist->heads[i]); INIT_HLIST_HEAD(&evlist->heads[i]);
INIT_LIST_HEAD(&evlist->entries); INIT_LIST_HEAD(&evlist->entries);
perf_evlist__set_maps(evlist, cpus, threads); perf_evlist__set_maps(evlist, cpus, threads);
fdarray__init(&evlist->pollfd, 64);
evlist->workload.pid = -1; evlist->workload.pid = -1;
} }
...@@ -102,7 +106,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist) ...@@ -102,7 +106,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist)
void perf_evlist__exit(struct perf_evlist *evlist) void perf_evlist__exit(struct perf_evlist *evlist)
{ {
zfree(&evlist->mmap); zfree(&evlist->mmap);
zfree(&evlist->pollfd); fdarray__exit(&evlist->pollfd);
} }
void perf_evlist__delete(struct perf_evlist *evlist) void perf_evlist__delete(struct perf_evlist *evlist)
...@@ -402,7 +406,7 @@ int perf_evlist__enable_event_idx(struct perf_evlist *evlist, ...@@ -402,7 +406,7 @@ int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
return perf_evlist__enable_event_thread(evlist, evsel, idx); return perf_evlist__enable_event_thread(evlist, evsel, idx);
} }
static int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
{ {
int nr_cpus = cpu_map__nr(evlist->cpus); int nr_cpus = cpu_map__nr(evlist->cpus);
int nr_threads = thread_map__nr(evlist->threads); int nr_threads = thread_map__nr(evlist->threads);
...@@ -416,16 +420,50 @@ static int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) ...@@ -416,16 +420,50 @@ static int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
nfds += nr_cpus * nr_threads; nfds += nr_cpus * nr_threads;
} }
evlist->pollfd = malloc(sizeof(struct pollfd) * nfds); if (fdarray__available_entries(&evlist->pollfd) < nfds &&
return evlist->pollfd != NULL ? 0 : -ENOMEM; fdarray__grow(&evlist->pollfd, nfds) < 0)
return -ENOMEM;
return 0;
} }
void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx)
{ {
fcntl(fd, F_SETFL, O_NONBLOCK); int pos = fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP);
evlist->pollfd[evlist->nr_fds].fd = fd; /*
evlist->pollfd[evlist->nr_fds].events = POLLIN; * Save the idx so that when we filter out fds POLLHUP'ed we can
evlist->nr_fds++; * close the associated evlist->mmap[] entry.
*/
if (pos >= 0) {
evlist->pollfd.priv[pos].idx = idx;
fcntl(fd, F_SETFL, O_NONBLOCK);
}
return pos;
}
int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
{
return __perf_evlist__add_pollfd(evlist, fd, -1);
}
static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd)
{
struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd);
perf_evlist__mmap_put(evlist, fda->priv[fd].idx);
}
int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
{
return fdarray__filter(&evlist->pollfd, revents_and_mask,
perf_evlist__munmap_filtered);
}
int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
{
return fdarray__poll(&evlist->pollfd, timeout);
} }
static void perf_evlist__id_hash(struct perf_evlist *evlist, static void perf_evlist__id_hash(struct perf_evlist *evlist,
...@@ -638,14 +676,36 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) ...@@ -638,14 +676,36 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
return event; return event;
} }
static bool perf_mmap__empty(struct perf_mmap *md)
{
return perf_mmap__read_head(md) != md->prev;
}
static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx)
{
++evlist->mmap[idx].refcnt;
}
static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx)
{
BUG_ON(evlist->mmap[idx].refcnt == 0);
if (--evlist->mmap[idx].refcnt == 0)
__perf_evlist__munmap(evlist, idx);
}
void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
{ {
struct perf_mmap *md = &evlist->mmap[idx];
if (!evlist->overwrite) { if (!evlist->overwrite) {
struct perf_mmap *md = &evlist->mmap[idx];
unsigned int old = md->prev; unsigned int old = md->prev;
perf_mmap__write_tail(md, old); perf_mmap__write_tail(md, old);
} }
if (md->refcnt == 1 && perf_mmap__empty(md))
perf_evlist__mmap_put(evlist, idx);
} }
static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx) static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
...@@ -653,6 +713,7 @@ static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx) ...@@ -653,6 +713,7 @@ static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
if (evlist->mmap[idx].base != NULL) { if (evlist->mmap[idx].base != NULL) {
munmap(evlist->mmap[idx].base, evlist->mmap_len); munmap(evlist->mmap[idx].base, evlist->mmap_len);
evlist->mmap[idx].base = NULL; evlist->mmap[idx].base = NULL;
evlist->mmap[idx].refcnt = 0;
} }
} }
...@@ -686,6 +747,20 @@ struct mmap_params { ...@@ -686,6 +747,20 @@ struct mmap_params {
static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
struct mmap_params *mp, int fd) struct mmap_params *mp, int fd)
{ {
/*
* The last one will be done at perf_evlist__mmap_consume(), so that we
* make sure we don't prevent tools from consuming every last event in
* the ring buffer.
*
* I.e. we can get the POLLHUP meaning that the fd doesn't exist
* anymore, but the last events for it are still in the ring buffer,
* waiting to be consumed.
*
* Tools can chose to ignore this at their own discretion, but the
* evlist layer can't just drop it when filtering events in
* perf_evlist__filter_pollfd().
*/
evlist->mmap[idx].refcnt = 2;
evlist->mmap[idx].prev = 0; evlist->mmap[idx].prev = 0;
evlist->mmap[idx].mask = mp->mask; evlist->mmap[idx].mask = mp->mask;
evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot, evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot,
...@@ -697,7 +772,6 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, ...@@ -697,7 +772,6 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
return -1; return -1;
} }
perf_evlist__add_pollfd(evlist, fd);
return 0; return 0;
} }
...@@ -722,6 +796,13 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, ...@@ -722,6 +796,13 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
} else { } else {
if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
return -1; return -1;
perf_evlist__mmap_get(evlist, idx);
}
if (__perf_evlist__add_pollfd(evlist, fd, idx) < 0) {
perf_evlist__mmap_put(evlist, idx);
return -1;
} }
if ((evsel->attr.read_format & PERF_FORMAT_ID) && if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
...@@ -881,7 +962,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, ...@@ -881,7 +962,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0) if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
return -ENOMEM; return -ENOMEM;
if (evlist->pollfd == NULL && perf_evlist__alloc_pollfd(evlist) < 0) if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
return -ENOMEM; return -ENOMEM;
evlist->overwrite = overwrite; evlist->overwrite = overwrite;
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#define __PERF_EVLIST_H 1 #define __PERF_EVLIST_H 1
#include <linux/list.h> #include <linux/list.h>
#include <api/fd/array.h>
#include <stdio.h> #include <stdio.h>
#include "../perf.h" #include "../perf.h"
#include "event.h" #include "event.h"
...@@ -17,9 +18,15 @@ struct record_opts; ...@@ -17,9 +18,15 @@ struct record_opts;
#define PERF_EVLIST__HLIST_BITS 8 #define PERF_EVLIST__HLIST_BITS 8
#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) #define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
/**
* struct perf_mmap - perf's ring buffer mmap details
*
* @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
*/
struct perf_mmap { struct perf_mmap {
void *base; void *base;
int mask; int mask;
int refcnt;
unsigned int prev; unsigned int prev;
char event_copy[PERF_SAMPLE_MAX_SIZE]; char event_copy[PERF_SAMPLE_MAX_SIZE];
}; };
...@@ -29,7 +36,6 @@ struct perf_evlist { ...@@ -29,7 +36,6 @@ struct perf_evlist {
struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
int nr_entries; int nr_entries;
int nr_groups; int nr_groups;
int nr_fds;
int nr_mmaps; int nr_mmaps;
size_t mmap_len; size_t mmap_len;
int id_pos; int id_pos;
...@@ -40,8 +46,8 @@ struct perf_evlist { ...@@ -40,8 +46,8 @@ struct perf_evlist {
pid_t pid; pid_t pid;
} workload; } workload;
bool overwrite; bool overwrite;
struct fdarray pollfd;
struct perf_mmap *mmap; struct perf_mmap *mmap;
struct pollfd *pollfd;
struct thread_map *threads; struct thread_map *threads;
struct cpu_map *cpus; struct cpu_map *cpus;
struct perf_evsel *selected; struct perf_evsel *selected;
...@@ -82,7 +88,11 @@ perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist, ...@@ -82,7 +88,11 @@ perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
int cpu, int thread, u64 id); int cpu, int thread, u64 id);
void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd); int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask);
int perf_evlist__poll(struct perf_evlist *evlist, int timeout);
struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id);
......
...@@ -736,7 +736,7 @@ static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist, ...@@ -736,7 +736,7 @@ static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist,
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout)) if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout))
return NULL; return NULL;
n = poll(evlist->pollfd, evlist->nr_fds, timeout); n = perf_evlist__poll(evlist, timeout);
if (n < 0) { if (n < 0) {
PyErr_SetFromErrno(PyExc_OSError); PyErr_SetFromErrno(PyExc_OSError);
return NULL; return NULL;
...@@ -753,9 +753,9 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist, ...@@ -753,9 +753,9 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
PyObject *list = PyList_New(0); PyObject *list = PyList_New(0);
int i; int i;
for (i = 0; i < evlist->nr_fds; ++i) { for (i = 0; i < evlist->pollfd.nr; ++i) {
PyObject *file; PyObject *file;
FILE *fp = fdopen(evlist->pollfd[i].fd, "r"); FILE *fp = fdopen(evlist->pollfd.entries[i].fd, "r");
if (fp == NULL) if (fp == NULL)
goto free_list; goto free_list;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册