提交 363b785f 编写于 作者: D Don Zickus 提交者: Arnaldo Carvalho de Melo

perf tools: Speed up thread map generation

When trying to capture perf data on a system running spejbb2013, perf
hung for about 15 minutes.  This is because it took that long to gather
about 10,000 thread maps and process them.

I don't think a user wants to wait that long.

Instead, recognize that thread maps are roughly equivalent to pid maps
and just quickly copy those instead.

To do this, I synthesize 'fork' events, this eventually calls
thread__fork() and copies the maps over.

The overhead goes from 15 minutes down to about a few seconds.

--
V2: based on Jiri's comments, moved malloc up a level
    and made sure the memory was freed
Signed-off-by: NDon Zickus <dzickus@redhat.com>
Acked-by: NJiri Olsa <jolsa@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Link: http://lkml.kernel.org/r/1394808224-113774-1-git-send-email-dzickus@redhat.comSigned-off-by: NArnaldo Carvalho de Melo <acme@redhat.com>
上级 09a71b97
...@@ -129,6 +129,28 @@ static pid_t perf_event__synthesize_comm(struct perf_tool *tool, ...@@ -129,6 +129,28 @@ static pid_t perf_event__synthesize_comm(struct perf_tool *tool,
return tgid; return tgid;
} }
static int perf_event__synthesize_fork(struct perf_tool *tool,
union perf_event *event, pid_t pid,
pid_t tgid, perf_event__handler_t process,
struct machine *machine)
{
memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size);
/* this is really a clone event but we use fork to synthesize it */
event->fork.ppid = tgid;
event->fork.ptid = tgid;
event->fork.pid = tgid;
event->fork.tid = pid;
event->fork.header.type = PERF_RECORD_FORK;
event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);
if (process(tool, event, &synth_sample, machine) != 0)
return -1;
return 0;
}
int perf_event__synthesize_mmap_events(struct perf_tool *tool, int perf_event__synthesize_mmap_events(struct perf_tool *tool,
union perf_event *event, union perf_event *event,
pid_t pid, pid_t tgid, pid_t pid, pid_t tgid,
...@@ -278,6 +300,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool, ...@@ -278,6 +300,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
static int __event__synthesize_thread(union perf_event *comm_event, static int __event__synthesize_thread(union perf_event *comm_event,
union perf_event *mmap_event, union perf_event *mmap_event,
union perf_event *fork_event,
pid_t pid, int full, pid_t pid, int full,
perf_event__handler_t process, perf_event__handler_t process,
struct perf_tool *tool, struct perf_tool *tool,
...@@ -326,9 +349,15 @@ static int __event__synthesize_thread(union perf_event *comm_event, ...@@ -326,9 +349,15 @@ static int __event__synthesize_thread(union perf_event *comm_event,
if (tgid == -1) if (tgid == -1)
return -1; return -1;
/* process the thread's maps too */ if (_pid == pid) {
rc = perf_event__synthesize_mmap_events(tool, mmap_event, _pid, tgid, /* process the parent's maps too */
process, machine, mmap_data); rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
process, machine, mmap_data);
} else {
/* only fork the tid's map, to save time */
rc = perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
process, machine);
}
if (rc) if (rc)
return rc; return rc;
...@@ -344,7 +373,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, ...@@ -344,7 +373,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
struct machine *machine, struct machine *machine,
bool mmap_data) bool mmap_data)
{ {
union perf_event *comm_event, *mmap_event; union perf_event *comm_event, *mmap_event, *fork_event;
int err = -1, thread, j; int err = -1, thread, j;
comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
...@@ -355,9 +384,14 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, ...@@ -355,9 +384,14 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
if (mmap_event == NULL) if (mmap_event == NULL)
goto out_free_comm; goto out_free_comm;
fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size);
if (fork_event == NULL)
goto out_free_mmap;
err = 0; err = 0;
for (thread = 0; thread < threads->nr; ++thread) { for (thread = 0; thread < threads->nr; ++thread) {
if (__event__synthesize_thread(comm_event, mmap_event, if (__event__synthesize_thread(comm_event, mmap_event,
fork_event,
threads->map[thread], 0, threads->map[thread], 0,
process, tool, machine, process, tool, machine,
mmap_data)) { mmap_data)) {
...@@ -383,6 +417,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, ...@@ -383,6 +417,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
/* if not, generate events for it */ /* if not, generate events for it */
if (need_leader && if (need_leader &&
__event__synthesize_thread(comm_event, mmap_event, __event__synthesize_thread(comm_event, mmap_event,
fork_event,
comm_event->comm.pid, 0, comm_event->comm.pid, 0,
process, tool, machine, process, tool, machine,
mmap_data)) { mmap_data)) {
...@@ -391,6 +426,8 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, ...@@ -391,6 +426,8 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
} }
} }
} }
free(fork_event);
out_free_mmap:
free(mmap_event); free(mmap_event);
out_free_comm: out_free_comm:
free(comm_event); free(comm_event);
...@@ -405,7 +442,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, ...@@ -405,7 +442,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
DIR *proc; DIR *proc;
char proc_path[PATH_MAX]; char proc_path[PATH_MAX];
struct dirent dirent, *next; struct dirent dirent, *next;
union perf_event *comm_event, *mmap_event; union perf_event *comm_event, *mmap_event, *fork_event;
int err = -1; int err = -1;
comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
...@@ -416,6 +453,10 @@ int perf_event__synthesize_threads(struct perf_tool *tool, ...@@ -416,6 +453,10 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
if (mmap_event == NULL) if (mmap_event == NULL)
goto out_free_comm; goto out_free_comm;
fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size);
if (fork_event == NULL)
goto out_free_mmap;
if (machine__is_default_guest(machine)) if (machine__is_default_guest(machine))
return 0; return 0;
...@@ -423,7 +464,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, ...@@ -423,7 +464,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
proc = opendir(proc_path); proc = opendir(proc_path);
if (proc == NULL) if (proc == NULL)
goto out_free_mmap; goto out_free_fork;
while (!readdir_r(proc, &dirent, &next) && next) { while (!readdir_r(proc, &dirent, &next) && next) {
char *end; char *end;
...@@ -435,12 +476,14 @@ int perf_event__synthesize_threads(struct perf_tool *tool, ...@@ -435,12 +476,14 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
* We may race with exiting thread, so don't stop just because * We may race with exiting thread, so don't stop just because
* one thread couldn't be synthesized. * one thread couldn't be synthesized.
*/ */
__event__synthesize_thread(comm_event, mmap_event, pid, 1, __event__synthesize_thread(comm_event, mmap_event, fork_event, pid,
process, tool, machine, mmap_data); 1, process, tool, machine, mmap_data);
} }
err = 0; err = 0;
closedir(proc); closedir(proc);
out_free_fork:
free(fork_event);
out_free_mmap: out_free_mmap:
free(mmap_event); free(mmap_event);
out_free_comm: out_free_comm:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册