提交 12843a3a 编写于 作者: X Xin Pan

Firt timeline version

上级 fee90b50
...@@ -72,7 +72,7 @@ copy(inference_lib DEPENDS paddle_fluid_shared ...@@ -72,7 +72,7 @@ copy(inference_lib DEPENDS paddle_fluid_shared
) )
set(module "platform") set(module "platform")
copy(platform_lib copy(platform_lib DEPS profiler_py_proto
SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/dynload/*.h ${src_dir}/${module}/details/*.h SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/dynload/*.h ${src_dir}/${module}/details/*.h
DSTS ${dst_dir}/${module} ${dst_dir}/${module}/dynload ${dst_dir}/${module}/details DSTS ${dst_dir}/${module} ${dst_dir}/${module}/dynload ${dst_dir}/${module}/details
) )
......
proto_library(profiler_proto SRCS profiler.proto) proto_library(profiler_proto SRCS profiler.proto)
py_proto_compile(profiler_py_proto SRCS profiler.proto)
add_custom_target(profiler_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(profiler_py_proto profiler_py_proto_init)
add_custom_command(TARGET profiler_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SOURCE_DIR}/python/paddle/fluid/proto/profiler
COMMAND cp *.py ${PADDLE_SOURCE_DIR}/python/paddle/fluid/proto/profiler
COMMENT "Copy generated python proto into directory paddle/fluid/proto/profiler."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
if(WITH_GPU) if(WITH_GPU)
cc_library(enforce SRCS enforce.cc DEPS) cc_library(enforce SRCS enforce.cc DEPS)
......
...@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/device_tracer.h" #include "paddle/fluid/platform/device_tracer.h"
#include <google/protobuf/text_format.h>
#include <fstream>
#include <map> #include <map>
#include <mutex> #include <mutex>
#include "glog/logging.h" #include "glog/logging.h"
...@@ -177,7 +179,7 @@ class DeviceTracerImpl : public DeviceTracer { ...@@ -177,7 +179,7 @@ class DeviceTracerImpl : public DeviceTracer {
enabled_ = true; enabled_ = true;
} }
proto::Profile GenProfile() { proto::Profile GenProfile(const std::string &profile_path) {
std::lock_guard<std::mutex> l(trace_mu_); std::lock_guard<std::mutex> l(trace_mu_);
proto::Profile profile_pb; proto::Profile profile_pb;
profile_pb.set_start_ns(start_ns_); profile_pb.set_start_ns(start_ns_);
...@@ -196,13 +198,12 @@ class DeviceTracerImpl : public DeviceTracer { ...@@ -196,13 +198,12 @@ class DeviceTracerImpl : public DeviceTracer {
event->set_device_id(r.device_id); event->set_device_id(r.device_id);
event_times[event->name()].push_back(r.end_ns - r.start_ns); event_times[event->name()].push_back(r.end_ns - r.start_ns);
} }
for (const auto &et : event_times) { std::string profile_str;
fprintf( google::protobuf::TextFormat::PrintToString(profile_pb, &profile_str);
stderr, "%s: total: %fms invoked cuda kernels: %lu\n", std::ofstream profile_f;
et.first.c_str(), profile_f.open(profile_path, std::ios::out | std::ios::trunc);
std::accumulate(et.second.begin(), et.second.end(), 0) / 1000000.0, profile_f << profile_str;
et.second.size()); profile_f.close();
}
return profile_pb; return profile_pb;
} }
...@@ -259,7 +260,9 @@ class DeviceTracerDummy : public DeviceTracer { ...@@ -259,7 +260,9 @@ class DeviceTracerDummy : public DeviceTracer {
void Enable() {} void Enable() {}
proto::Profile GenProfile() { return proto::Profile(); } proto::Profile GenProfile(const std::string &profile_path) {
return proto::Profile();
}
void Disable() {} void Disable() {}
}; };
......
...@@ -55,7 +55,7 @@ class DeviceTracer { ...@@ -55,7 +55,7 @@ class DeviceTracer {
uint32_t correlation_id) = 0; uint32_t correlation_id) = 0;
// Generate a proto after done (Disabled). // Generate a proto after done (Disabled).
virtual proto::Profile GenProfile() = 0; virtual proto::Profile GenProfile(const std::string& profile_path) = 0;
virtual bool IsEnabled() = 0; virtual bool IsEnabled() = 0;
}; };
......
...@@ -199,7 +199,8 @@ std::vector<std::vector<Event>> GetAllEvents() { ...@@ -199,7 +199,8 @@ std::vector<std::vector<Event>> GetAllEvents() {
return result; return result;
} }
void DisableProfiler(EventSortingKey sorted_key) { void DisableProfiler(EventSortingKey sorted_key,
const std::string& profile_path) {
PADDLE_ENFORCE(g_state != ProfilerState::kDisabled, PADDLE_ENFORCE(g_state != ProfilerState::kDisabled,
"Can't disable profiling, since it's not starting."); "Can't disable profiling, since it's not starting.");
// Mark the profiling stop. // Mark the profiling stop.
...@@ -209,7 +210,7 @@ void DisableProfiler(EventSortingKey sorted_key) { ...@@ -209,7 +210,7 @@ void DisableProfiler(EventSortingKey sorted_key) {
DeviceTracer* tracer = GetDeviceTracer(); DeviceTracer* tracer = GetDeviceTracer();
if (g_profiler_place == "All" && tracer && tracer->IsEnabled()) { if (g_profiler_place == "All" && tracer && tracer->IsEnabled()) {
tracer->Disable(); tracer->Disable();
tracer->GenProfile(); tracer->GenProfile(profile_path);
} }
std::vector<std::vector<Event>> all_events = GetAllEvents(); std::vector<std::vector<Event>> all_events = GetAllEvents();
......
...@@ -140,7 +140,8 @@ void EnableProfiler(ProfilerState state); ...@@ -140,7 +140,8 @@ void EnableProfiler(ProfilerState state);
// Clear the g_all_event_lists, which is total event lists of all threads. // Clear the g_all_event_lists, which is total event lists of all threads.
void ResetProfiler(); void ResetProfiler();
void DisableProfiler(EventSortingKey sorted_key); void DisableProfiler(EventSortingKey sorted_key,
const std::string& profile_path);
// Parse the event list and output the profiling report // Parse the event list and output the profiling report
void ParseEvents(std::vector<std::vector<Event>>&, void ParseEvents(std::vector<std::vector<Event>>&,
......
...@@ -125,5 +125,5 @@ TEST(RecordEvent, RecordEvent) { ...@@ -125,5 +125,5 @@ TEST(RecordEvent, RecordEvent) {
EXPECT_EQ(start_profiler_count, 1); EXPECT_EQ(start_profiler_count, 1);
// Will remove parsing-related code from test later // Will remove parsing-related code from test later
DisableProfiler(EventSortingKey::kTotal); DisableProfiler(EventSortingKey::kTotal, "/tmp/profiler");
} }
...@@ -57,7 +57,7 @@ add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp ...@@ -57,7 +57,7 @@ add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python
COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_PYTHON_BUILD_DIR}/lib* ${PADDLE_PYTHON_BUILD_DIR}/lib-python COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_PYTHON_BUILD_DIR}/lib* ${PADDLE_PYTHON_BUILD_DIR}/lib-python
DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER}) DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto profiler_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER})
set(paddle_python_deps ${PADDLE_PYTHON_BUILD_DIR}/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model ${MKL_DEPENDS}) set(paddle_python_deps ${PADDLE_PYTHON_BUILD_DIR}/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model ${MKL_DEPENDS})
if(WITH_SWIG_PY) if(WITH_SWIG_PY)
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
...@@ -73,7 +73,7 @@ def reset_profiler(): ...@@ -73,7 +73,7 @@ def reset_profiler():
@contextmanager @contextmanager
def profiler(state, sorted_key=None): def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
"""The profiler interface. """The profiler interface.
Different from cuda_profiler, this profiler can be used to profile both CPU Different from cuda_profiler, this profiler can be used to profile both CPU
and GPU program. By defalut, it records the CPU and GPU operator kernels, and GPU program. By defalut, it records the CPU and GPU operator kernels,
...@@ -95,8 +95,9 @@ def profiler(state, sorted_key=None): ...@@ -95,8 +95,9 @@ def profiler(state, sorted_key=None):
The `max` means sorting by the maximum execution time. The `max` means sorting by the maximum execution time.
The `min` means sorting by the minimum execution time. The `min` means sorting by the minimum execution time.
The `ave` means sorting by the average execution time. The `ave` means sorting by the average execution time.
profile_path (string) : If state == 'All', it will write a profile
proto output file.
""" """
if state not in ['CPU', 'GPU', "All"]: if state not in ['CPU', 'GPU', "All"]:
raise ValueError("The state must be 'CPU' or 'GPU' or 'All'.") raise ValueError("The state must be 'CPU' or 'GPU' or 'All'.")
if state == "GPU": if state == "GPU":
...@@ -122,4 +123,4 @@ def profiler(state, sorted_key=None): ...@@ -122,4 +123,4 @@ def profiler(state, sorted_key=None):
} }
# TODO(qingqing) : redirect C++ ostream to Python stream. # TODO(qingqing) : redirect C++ ostream to Python stream.
# with core.ostream_redirect(stdout=True, stderr=True): # with core.ostream_redirect(stdout=True, stderr=True):
core.disable_profiler(key_map[sorted_key]) core.disable_profiler(key_map[sorted_key], profile_path)
...@@ -22,7 +22,7 @@ import paddle.fluid.core as core ...@@ -22,7 +22,7 @@ import paddle.fluid.core as core
class TestProfiler(unittest.TestCase): class TestProfiler(unittest.TestCase):
def net_profiler(self, state): def net_profiler(self, state, profile_path='/tmp/profile'):
enable_if_gpu = state == 'GPU' or state == "All" enable_if_gpu = state == 'GPU' or state == "All"
if enable_if_gpu and not core.is_compiled_with_cuda(): if enable_if_gpu and not core.is_compiled_with_cuda():
return return
...@@ -47,7 +47,7 @@ class TestProfiler(unittest.TestCase): ...@@ -47,7 +47,7 @@ class TestProfiler(unittest.TestCase):
exe.run(startup_program) exe.run(startup_program)
accuracy.reset(exe) accuracy.reset(exe)
with profiler.profiler(state, 'total') as prof: with profiler.profiler(state, 'total', profile_path) as prof:
for iter in range(10): for iter in range(10):
if iter == 2: if iter == 2:
profiler.reset_profiler() profiler.reset_profiler()
...@@ -68,7 +68,9 @@ class TestProfiler(unittest.TestCase): ...@@ -68,7 +68,9 @@ class TestProfiler(unittest.TestCase):
self.net_profiler('GPU') self.net_profiler('GPU')
def test_all_profiler(self): def test_all_profiler(self):
self.net_profiler('All') self.net_profiler('All', '/tmp/profile_out')
with open('/tmp/profile_out', 'r') as f:
self.assertGreater(len(f.read()), 0)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -73,6 +73,7 @@ packages=['paddle', ...@@ -73,6 +73,7 @@ packages=['paddle',
'paddle.v2.plot', 'paddle.v2.plot',
'paddle.fluid', 'paddle.fluid',
'paddle.fluid.proto', 'paddle.fluid.proto',
'paddle.fluid.proto.profiler',
'paddle.fluid.layers', 'paddle.fluid.layers',
'py_paddle'] 'py_paddle']
...@@ -109,6 +110,7 @@ setup(name='${PACKAGE_NAME}', ...@@ -109,6 +110,7 @@ setup(name='${PACKAGE_NAME}',
'': '${CMAKE_CURRENT_SOURCE_DIR}', '': '${CMAKE_CURRENT_SOURCE_DIR}',
# The paddle.fluid.proto will be generated while compiling. # The paddle.fluid.proto will be generated while compiling.
# So that package points to other directory. # So that package points to other directory.
'paddle.fluid.proto.profiler': '${PADDLE_BINARY_DIR}/paddle/fluid/platform',
'paddle.fluid.proto': '${PADDLE_BINARY_DIR}/paddle/fluid/framework', 'paddle.fluid.proto': '${PADDLE_BINARY_DIR}/paddle/fluid/framework',
'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle' 'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle'
}, },
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import json
import sys
import unittest
import google.protobuf.text_format as text_format
import paddle.fluid.proto.profiler.profiler_pb2 as profiler_pb2
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'--profile_path', type=str, default='', help='Input profile file name.')
parser.add_argument(
'--timeline_path', type=str, default='', help='Output timeline file name.')
args = parser.parse_args()
class _ChromeTraceFormatter(object):
def __init__(self):
self._events = []
self._metadata = []
def _create_event(self, ph, category, name, pid, tid, timestamp):
"""Creates a new Chrome Trace event.
For details of the file format, see:
https://github.com/catapult-project/catapult/blob/master/tracing/README.md
Args:
ph: The type of event - usually a single character.
category: The event category as a string.
name: The event name as a string.
pid: Identifier of the process generating this event as an integer.
tid: Identifier of the thread generating this event as an integer.
timestamp: The timestamp of this event as a long integer.
Returns:
A JSON compatible event object.
"""
event = {}
event['ph'] = ph
event['cat'] = category
event['name'] = name
event['pid'] = pid
event['tid'] = tid
event['ts'] = timestamp
return event
def emit_pid(self, name, pid):
"""Adds a process metadata event to the trace.
Args:
name: The process name as a string.
pid: Identifier of the process as an integer.
"""
event = {}
event['name'] = 'process_name'
event['ph'] = 'M'
event['pid'] = pid
event['args'] = {'name': name}
self._metadata.append(event)
def emit_region(self, timestamp, duration, pid, tid, category, name, args):
"""Adds a region event to the trace.
Args:
timestamp: The start timestamp of this region as a long integer.
duration: The duration of this region as a long integer.
pid: Identifier of the process generating this event as an integer.
tid: Identifier of the thread generating this event as an integer.
category: The event category as a string.
name: The event name as a string.
args: A JSON-compatible dictionary of event arguments.
"""
event = self._create_event('X', category, name, pid, tid, timestamp)
event['dur'] = duration
event['args'] = args
self._events.append(event)
def format_to_string(self, pretty=False):
"""Formats the chrome trace to a string.
Args:
pretty: (Optional.) If True, produce human-readable JSON output.
Returns:
A JSON-formatted string in Chrome Trace format.
"""
trace = {}
trace['traceEvents'] = self._metadata + self._events
if pretty:
return json.dumps(trace, indent=4, separators=(',', ': '))
else:
return json.dumps(trace, separators=(',', ':'))
class Timeline(object):
def __init__(self, profile_pb):
self._profile_pb = profile_pb
self._pid = 0
self._devices = dict()
self._chrome_trace = _ChromeTraceFormatter()
def _allocate_pid(self):
cur_pid = self._pid
self._pid += 1
return cur_pid
def _allocate_pids(self):
for event in self._profile_pb.events:
if event.device_id not in self._devices:
pid = self._allocate_pid()
self._devices[event.device_id] = pid
self._chrome_trace.emit_pid("device:%s" % pid, pid)
def _allocate_events(self):
for event in self._profile_pb.events:
pid = self._devices[event.device_id]
args = {'name': event.name}
self._chrome_trace.emit_region(
event.start_ns, (event.end_ns - event.start_ns) / 1000000.0,
pid, 0, 'Op', event.name, args)
def generate_chrome_trace(self):
self._allocate_pids()
self._allocate_events()
return self._chrome_trace.format_to_string()
profile_path = '/tmp/profile'
if args.profile_path:
profile_path = args.profile_path
timeline_path = '/tmp/timeline'
if args.timeline_path:
timeline_path = args.timeline_path
with open(profile_path, 'r') as f:
profile_s = f.read()
profile_pb = profiler_pb2.Profile()
text_format.Merge(profile_s, profile_pb)
tl = Timeline(profile_pb)
with open(timeline_path, 'w') as f:
f.write(tl.generate_chrome_trace())
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册