提交 12843a3a 编写于 作者: X Xin Pan

Firt timeline version

上级 fee90b50
......@@ -72,7 +72,7 @@ copy(inference_lib DEPENDS paddle_fluid_shared
)
set(module "platform")
copy(platform_lib
copy(platform_lib DEPS profiler_py_proto
SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/dynload/*.h ${src_dir}/${module}/details/*.h
DSTS ${dst_dir}/${module} ${dst_dir}/${module}/dynload ${dst_dir}/${module}/details
)
......
proto_library(profiler_proto SRCS profiler.proto)
py_proto_compile(profiler_py_proto SRCS profiler.proto)
add_custom_target(profiler_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(profiler_py_proto profiler_py_proto_init)
add_custom_command(TARGET profiler_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SOURCE_DIR}/python/paddle/fluid/proto/profiler
COMMAND cp *.py ${PADDLE_SOURCE_DIR}/python/paddle/fluid/proto/profiler
COMMENT "Copy generated python proto into directory paddle/fluid/proto/profiler."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
if(WITH_GPU)
cc_library(enforce SRCS enforce.cc DEPS)
......
......@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/device_tracer.h"
#include <google/protobuf/text_format.h>
#include <fstream>
#include <map>
#include <mutex>
#include "glog/logging.h"
......@@ -177,7 +179,7 @@ class DeviceTracerImpl : public DeviceTracer {
enabled_ = true;
}
proto::Profile GenProfile() {
proto::Profile GenProfile(const std::string &profile_path) {
std::lock_guard<std::mutex> l(trace_mu_);
proto::Profile profile_pb;
profile_pb.set_start_ns(start_ns_);
......@@ -196,13 +198,12 @@ class DeviceTracerImpl : public DeviceTracer {
event->set_device_id(r.device_id);
event_times[event->name()].push_back(r.end_ns - r.start_ns);
}
for (const auto &et : event_times) {
fprintf(
stderr, "%s: total: %fms invoked cuda kernels: %lu\n",
et.first.c_str(),
std::accumulate(et.second.begin(), et.second.end(), 0) / 1000000.0,
et.second.size());
}
std::string profile_str;
google::protobuf::TextFormat::PrintToString(profile_pb, &profile_str);
std::ofstream profile_f;
profile_f.open(profile_path, std::ios::out | std::ios::trunc);
profile_f << profile_str;
profile_f.close();
return profile_pb;
}
......@@ -259,7 +260,9 @@ class DeviceTracerDummy : public DeviceTracer {
void Enable() {}
proto::Profile GenProfile() { return proto::Profile(); }
proto::Profile GenProfile(const std::string &profile_path) {
return proto::Profile();
}
void Disable() {}
};
......
......@@ -55,7 +55,7 @@ class DeviceTracer {
uint32_t correlation_id) = 0;
// Generate a proto after done (Disabled).
virtual proto::Profile GenProfile() = 0;
virtual proto::Profile GenProfile(const std::string& profile_path) = 0;
virtual bool IsEnabled() = 0;
};
......
......@@ -199,7 +199,8 @@ std::vector<std::vector<Event>> GetAllEvents() {
return result;
}
void DisableProfiler(EventSortingKey sorted_key) {
void DisableProfiler(EventSortingKey sorted_key,
const std::string& profile_path) {
PADDLE_ENFORCE(g_state != ProfilerState::kDisabled,
"Can't disable profiling, since it's not starting.");
// Mark the profiling stop.
......@@ -209,7 +210,7 @@ void DisableProfiler(EventSortingKey sorted_key) {
DeviceTracer* tracer = GetDeviceTracer();
if (g_profiler_place == "All" && tracer && tracer->IsEnabled()) {
tracer->Disable();
tracer->GenProfile();
tracer->GenProfile(profile_path);
}
std::vector<std::vector<Event>> all_events = GetAllEvents();
......
......@@ -140,7 +140,8 @@ void EnableProfiler(ProfilerState state);
// Clear the g_all_event_lists, which is total event lists of all threads.
void ResetProfiler();
void DisableProfiler(EventSortingKey sorted_key);
void DisableProfiler(EventSortingKey sorted_key,
const std::string& profile_path);
// Parse the event list and output the profiling report
void ParseEvents(std::vector<std::vector<Event>>&,
......
......@@ -125,5 +125,5 @@ TEST(RecordEvent, RecordEvent) {
EXPECT_EQ(start_profiler_count, 1);
// Will remove parsing-related code from test later
DisableProfiler(EventSortingKey::kTotal);
DisableProfiler(EventSortingKey::kTotal, "/tmp/profiler");
}
......@@ -57,7 +57,7 @@ add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python
COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_PYTHON_BUILD_DIR}/lib* ${PADDLE_PYTHON_BUILD_DIR}/lib-python
DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER})
DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto profiler_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER})
set(paddle_python_deps ${PADDLE_PYTHON_BUILD_DIR}/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model ${MKL_DEPENDS})
if(WITH_SWIG_PY)
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
......@@ -73,7 +73,7 @@ def reset_profiler():
@contextmanager
def profiler(state, sorted_key=None):
def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
"""The profiler interface.
Different from cuda_profiler, this profiler can be used to profile both CPU
and GPU program. By defalut, it records the CPU and GPU operator kernels,
......@@ -95,8 +95,9 @@ def profiler(state, sorted_key=None):
The `max` means sorting by the maximum execution time.
The `min` means sorting by the minimum execution time.
The `ave` means sorting by the average execution time.
profile_path (string) : If state == 'All', it will write a profile
proto output file.
"""
if state not in ['CPU', 'GPU', "All"]:
raise ValueError("The state must be 'CPU' or 'GPU' or 'All'.")
if state == "GPU":
......@@ -122,4 +123,4 @@ def profiler(state, sorted_key=None):
}
# TODO(qingqing) : redirect C++ ostream to Python stream.
# with core.ostream_redirect(stdout=True, stderr=True):
core.disable_profiler(key_map[sorted_key])
core.disable_profiler(key_map[sorted_key], profile_path)
......@@ -22,7 +22,7 @@ import paddle.fluid.core as core
class TestProfiler(unittest.TestCase):
def net_profiler(self, state):
def net_profiler(self, state, profile_path='/tmp/profile'):
enable_if_gpu = state == 'GPU' or state == "All"
if enable_if_gpu and not core.is_compiled_with_cuda():
return
......@@ -47,7 +47,7 @@ class TestProfiler(unittest.TestCase):
exe.run(startup_program)
accuracy.reset(exe)
with profiler.profiler(state, 'total') as prof:
with profiler.profiler(state, 'total', profile_path) as prof:
for iter in range(10):
if iter == 2:
profiler.reset_profiler()
......@@ -68,7 +68,9 @@ class TestProfiler(unittest.TestCase):
self.net_profiler('GPU')
def test_all_profiler(self):
self.net_profiler('All')
self.net_profiler('All', '/tmp/profile_out')
with open('/tmp/profile_out', 'r') as f:
self.assertGreater(len(f.read()), 0)
if __name__ == '__main__':
......
......@@ -73,6 +73,7 @@ packages=['paddle',
'paddle.v2.plot',
'paddle.fluid',
'paddle.fluid.proto',
'paddle.fluid.proto.profiler',
'paddle.fluid.layers',
'py_paddle']
......@@ -109,6 +110,7 @@ setup(name='${PACKAGE_NAME}',
'': '${CMAKE_CURRENT_SOURCE_DIR}',
# The paddle.fluid.proto will be generated while compiling.
# So that package points to other directory.
'paddle.fluid.proto.profiler': '${PADDLE_BINARY_DIR}/paddle/fluid/platform',
'paddle.fluid.proto': '${PADDLE_BINARY_DIR}/paddle/fluid/framework',
'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle'
},
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import json
import sys
import unittest
import google.protobuf.text_format as text_format
import paddle.fluid.proto.profiler.profiler_pb2 as profiler_pb2
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'--profile_path', type=str, default='', help='Input profile file name.')
parser.add_argument(
'--timeline_path', type=str, default='', help='Output timeline file name.')
args = parser.parse_args()
class _ChromeTraceFormatter(object):
def __init__(self):
self._events = []
self._metadata = []
def _create_event(self, ph, category, name, pid, tid, timestamp):
"""Creates a new Chrome Trace event.
For details of the file format, see:
https://github.com/catapult-project/catapult/blob/master/tracing/README.md
Args:
ph: The type of event - usually a single character.
category: The event category as a string.
name: The event name as a string.
pid: Identifier of the process generating this event as an integer.
tid: Identifier of the thread generating this event as an integer.
timestamp: The timestamp of this event as a long integer.
Returns:
A JSON compatible event object.
"""
event = {}
event['ph'] = ph
event['cat'] = category
event['name'] = name
event['pid'] = pid
event['tid'] = tid
event['ts'] = timestamp
return event
def emit_pid(self, name, pid):
"""Adds a process metadata event to the trace.
Args:
name: The process name as a string.
pid: Identifier of the process as an integer.
"""
event = {}
event['name'] = 'process_name'
event['ph'] = 'M'
event['pid'] = pid
event['args'] = {'name': name}
self._metadata.append(event)
def emit_region(self, timestamp, duration, pid, tid, category, name, args):
"""Adds a region event to the trace.
Args:
timestamp: The start timestamp of this region as a long integer.
duration: The duration of this region as a long integer.
pid: Identifier of the process generating this event as an integer.
tid: Identifier of the thread generating this event as an integer.
category: The event category as a string.
name: The event name as a string.
args: A JSON-compatible dictionary of event arguments.
"""
event = self._create_event('X', category, name, pid, tid, timestamp)
event['dur'] = duration
event['args'] = args
self._events.append(event)
def format_to_string(self, pretty=False):
"""Formats the chrome trace to a string.
Args:
pretty: (Optional.) If True, produce human-readable JSON output.
Returns:
A JSON-formatted string in Chrome Trace format.
"""
trace = {}
trace['traceEvents'] = self._metadata + self._events
if pretty:
return json.dumps(trace, indent=4, separators=(',', ': '))
else:
return json.dumps(trace, separators=(',', ':'))
class Timeline(object):
def __init__(self, profile_pb):
self._profile_pb = profile_pb
self._pid = 0
self._devices = dict()
self._chrome_trace = _ChromeTraceFormatter()
def _allocate_pid(self):
cur_pid = self._pid
self._pid += 1
return cur_pid
def _allocate_pids(self):
for event in self._profile_pb.events:
if event.device_id not in self._devices:
pid = self._allocate_pid()
self._devices[event.device_id] = pid
self._chrome_trace.emit_pid("device:%s" % pid, pid)
def _allocate_events(self):
for event in self._profile_pb.events:
pid = self._devices[event.device_id]
args = {'name': event.name}
self._chrome_trace.emit_region(
event.start_ns, (event.end_ns - event.start_ns) / 1000000.0,
pid, 0, 'Op', event.name, args)
def generate_chrome_trace(self):
self._allocate_pids()
self._allocate_events()
return self._chrome_trace.format_to_string()
profile_path = '/tmp/profile'
if args.profile_path:
profile_path = args.profile_path
timeline_path = '/tmp/timeline'
if args.timeline_path:
timeline_path = args.timeline_path
with open(profile_path, 'r') as f:
profile_s = f.read()
profile_pb = profiler_pb2.Profile()
text_format.Merge(profile_s, profile_pb)
tl = Timeline(profile_pb)
with open(timeline_path, 'w') as f:
f.write(tl.generate_chrome_trace())
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册