Firt timeline version

12843a3a · Xin Pan · fee90b50 · 12843a3a · 12843a3a · 12843a3a
14 changed file
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@@ -72,7 +72,7 @@ copy(inference_lib DEPENDS paddle_fluid_shared
 )

 set(module "platform")
-copy(platform_lib
+copy(platform_lib DEPS profiler_py_proto
  SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/dynload/*.h ${src_dir}/${module}/details/*.h
  DSTS ${dst_dir}/${module} ${dst_dir}/${module}/dynload ${dst_dir}/${module}/details
 )

--- a/paddle/fluid/platform/CMakeLists.txt
+++ b/paddle/fluid/platform/CMakeLists.txt
 proto_library(profiler_proto SRCS profiler.proto)
+py_proto_compile(profiler_py_proto SRCS profiler.proto)
+
+add_custom_target(profiler_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
+
+add_dependencies(profiler_py_proto profiler_py_proto_init)
+
+add_custom_command(TARGET profiler_py_proto POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SOURCE_DIR}/python/paddle/fluid/proto/profiler
+        COMMAND cp *.py ${PADDLE_SOURCE_DIR}/python/paddle/fluid/proto/profiler
+        COMMENT "Copy generated python proto into directory paddle/fluid/proto/profiler."
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})

 if(WITH_GPU)
  cc_library(enforce SRCS enforce.cc DEPS)

--- a/paddle/fluid/platform/device_tracer.cc
+++ b/paddle/fluid/platform/device_tracer.cc
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include "paddle/fluid/platform/device_tracer.h"
+#include <google/protobuf/text_format.h>
+#include <fstream>
 #include <map>
 #include <mutex>
 #include "glog/logging.h"
@@ -177,7 +179,7 @@ class DeviceTracerImpl : public DeviceTracer {
    enabled_ = true;
  }

-  proto::Profile GenProfile() {
+  proto::Profile GenProfile(const std::string &profile_path) {
    std::lock_guard<std::mutex> l(trace_mu_);
    proto::Profile profile_pb;
    profile_pb.set_start_ns(start_ns_);
@@ -196,13 +198,12 @@ class DeviceTracerImpl : public DeviceTracer {
      event->set_device_id(r.device_id);
      event_times[event->name()].push_back(r.end_ns - r.start_ns);
    }
-    for (const auto &et : event_times) {
-      fprintf(
-          stderr, "%s: total: %fms invoked cuda kernels: %lu\n",
-          et.first.c_str(),
-          std::accumulate(et.second.begin(), et.second.end(), 0) / 1000000.0,
-          et.second.size());
-    }
+    std::string profile_str;
+    google::protobuf::TextFormat::PrintToString(profile_pb, &profile_str);
+    std::ofstream profile_f;
+    profile_f.open(profile_path, std::ios::out | std::ios::trunc);
+    profile_f << profile_str;
+    profile_f.close();
    return profile_pb;
  }

@@ -259,7 +260,9 @@ class DeviceTracerDummy : public DeviceTracer {

  void Enable() {}

-  proto::Profile GenProfile() { return proto::Profile(); }
+  proto::Profile GenProfile(const std::string &profile_path) {
+    return proto::Profile();
+  }

  void Disable() {}
 };

--- a/paddle/fluid/platform/device_tracer.h
+++ b/paddle/fluid/platform/device_tracer.h
@@ -55,7 +55,7 @@ class DeviceTracer {
                                uint32_t correlation_id) = 0;

  // Generate a proto after done (Disabled).
-  virtual proto::Profile GenProfile() = 0;
+  virtual proto::Profile GenProfile(const std::string& profile_path) = 0;

  virtual bool IsEnabled() = 0;
 };

--- a/paddle/fluid/platform/profiler.cc
+++ b/paddle/fluid/platform/profiler.cc
@@ -199,7 +199,8 @@ std::vector<std::vector<Event>> GetAllEvents() {
  return result;
 }

-void DisableProfiler(EventSortingKey sorted_key) {
+void DisableProfiler(EventSortingKey sorted_key,
+                     const std::string& profile_path) {
  PADDLE_ENFORCE(g_state != ProfilerState::kDisabled,
                 "Can't disable profiling, since it's not starting.");
  // Mark the profiling stop.
@@ -209,7 +210,7 @@ void DisableProfiler(EventSortingKey sorted_key) {
  DeviceTracer* tracer = GetDeviceTracer();
  if (g_profiler_place == "All" && tracer && tracer->IsEnabled()) {
    tracer->Disable();
-    tracer->GenProfile();
+    tracer->GenProfile(profile_path);
  }

  std::vector<std::vector<Event>> all_events = GetAllEvents();

--- a/paddle/fluid/platform/profiler.h
+++ b/paddle/fluid/platform/profiler.h
@@ -140,7 +140,8 @@ void EnableProfiler(ProfilerState state);
 // Clear the g_all_event_lists, which is total event lists of all threads.
 void ResetProfiler();

-void DisableProfiler(EventSortingKey sorted_key);
+void DisableProfiler(EventSortingKey sorted_key,
+                     const std::string& profile_path);

 // Parse the event list and output the profiling report
 void ParseEvents(std::vector<std::vector<Event>>&,

--- a/paddle/fluid/platform/profiler_test.cc
+++ b/paddle/fluid/platform/profiler_test.cc
@@ -125,5 +125,5 @@ TEST(RecordEvent, RecordEvent) {
  EXPECT_EQ(start_profiler_count, 1);

  // Will remove parsing-related code from test later
-  DisableProfiler(EventSortingKey::kTotal);
+  DisableProfiler(EventSortingKey::kTotal, "/tmp/profiler");
 }
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -57,7 +57,7 @@ add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
    COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
    COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python
    COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_PYTHON_BUILD_DIR}/lib* ${PADDLE_PYTHON_BUILD_DIR}/lib-python
-    DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER})
+    DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto profiler_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER})

 set(paddle_python_deps ${PADDLE_PYTHON_BUILD_DIR}/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model ${MKL_DEPENDS})
 if(WITH_SWIG_PY)

--- a/python/__init__.py
+++ b/python/__init__.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/python/paddle/fluid/profiler.py
+++ b/python/paddle/fluid/profiler.py
@@ -73,7 +73,7 @@ def reset_profiler():


 @contextmanager
-def profiler(state, sorted_key=None):
+def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
    """The profiler interface.
    Different from cuda_profiler, this profiler can be used to profile both CPU
    and GPU program. By defalut, it records the CPU and GPU operator kernels,
@@ -95,8 +95,9 @@ def profiler(state, sorted_key=None):
            The `max` means sorting by the maximum execution time.
            The `min` means sorting by the minimum execution time.
            The `ave` means sorting by the average execution time.
+        profile_path (string) : If state == 'All', it will write a profile
+            proto output file.
    """
-
    if state not in ['CPU', 'GPU', "All"]:
        raise ValueError("The state must be 'CPU' or 'GPU' or 'All'.")
    if state == "GPU":
@@ -122,4 +123,4 @@ def profiler(state, sorted_key=None):
    }
    # TODO(qingqing) : redirect C++ ostream to Python stream.
    # with core.ostream_redirect(stdout=True, stderr=True):
-    core.disable_profiler(key_map[sorted_key])
+    core.disable_profiler(key_map[sorted_key], profile_path)
--- a/python/paddle/fluid/tests/unittests/test_profiler.py
+++ b/python/paddle/fluid/tests/unittests/test_profiler.py
@@ -22,7 +22,7 @@ import paddle.fluid.core as core


 class TestProfiler(unittest.TestCase):
-    def net_profiler(self, state):
+    def net_profiler(self, state, profile_path='/tmp/profile'):
        enable_if_gpu = state == 'GPU' or state == "All"
        if enable_if_gpu and not core.is_compiled_with_cuda():
            return
@@ -47,7 +47,7 @@ class TestProfiler(unittest.TestCase):
        exe.run(startup_program)

        accuracy.reset(exe)
-        with profiler.profiler(state, 'total') as prof:
+        with profiler.profiler(state, 'total', profile_path) as prof:
            for iter in range(10):
                if iter == 2:
                    profiler.reset_profiler()
@@ -68,7 +68,9 @@ class TestProfiler(unittest.TestCase):
        self.net_profiler('GPU')

    def test_all_profiler(self):
-        self.net_profiler('All')
+        self.net_profiler('All', '/tmp/profile_out')
+        with open('/tmp/profile_out', 'r') as f:
+            self.assertGreater(len(f.read()), 0)


 if __name__ == '__main__':

--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -73,6 +73,7 @@ packages=['paddle',
          'paddle.v2.plot',
          'paddle.fluid',
          'paddle.fluid.proto',
+          'paddle.fluid.proto.profiler',
          'paddle.fluid.layers',
          'py_paddle']

@@ -109,6 +110,7 @@ setup(name='${PACKAGE_NAME}',
          '': '${CMAKE_CURRENT_SOURCE_DIR}',
          # The paddle.fluid.proto will be generated while compiling.
          # So that package points to other directory.
+          'paddle.fluid.proto.profiler': '${PADDLE_BINARY_DIR}/paddle/fluid/platform',
          'paddle.fluid.proto': '${PADDLE_BINARY_DIR}/paddle/fluid/framework',
          'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle'
      },

--- a/tools/__init__.py
+++ b/tools/__init__.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/tools/timeline.py
+++ b/tools/timeline.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import json
+import sys
+import unittest
+
+import google.protobuf.text_format as text_format
+import paddle.fluid.proto.profiler.profiler_pb2 as profiler_pb2
+
+parser = argparse.ArgumentParser(description=__doc__)
+parser.add_argument(
+    '--profile_path', type=str, default='', help='Input profile file name.')
+parser.add_argument(
+    '--timeline_path', type=str, default='', help='Output timeline file name.')
+args = parser.parse_args()
+
+
+class _ChromeTraceFormatter(object):
+    def __init__(self):
+        self._events = []
+        self._metadata = []
+
+    def _create_event(self, ph, category, name, pid, tid, timestamp):
+        """Creates a new Chrome Trace event.
+
+        For details of the file format, see:
+        https://github.com/catapult-project/catapult/blob/master/tracing/README.md
+
+        Args:
+          ph:  The type of event - usually a single character.
+          category: The event category as a string.
+          name:  The event name as a string.
+          pid:  Identifier of the process generating this event as an integer.
+          tid:  Identifier of the thread generating this event as an integer.
+          timestamp:  The timestamp of this event as a long integer.
+
+        Returns:
+          A JSON compatible event object.
+        """
+        event = {}
+        event['ph'] = ph
+        event['cat'] = category
+        event['name'] = name
+        event['pid'] = pid
+        event['tid'] = tid
+        event['ts'] = timestamp
+        return event
+
+    def emit_pid(self, name, pid):
+        """Adds a process metadata event to the trace.
+
+        Args:
+          name:  The process name as a string.
+          pid:  Identifier of the process as an integer.
+        """
+        event = {}
+        event['name'] = 'process_name'
+        event['ph'] = 'M'
+        event['pid'] = pid
+        event['args'] = {'name': name}
+        self._metadata.append(event)
+
+    def emit_region(self, timestamp, duration, pid, tid, category, name, args):
+        """Adds a region event to the trace.
+
+        Args:
+          timestamp:  The start timestamp of this region as a long integer.
+          duration:  The duration of this region as a long integer.
+          pid:  Identifier of the process generating this event as an integer.
+          tid:  Identifier of the thread generating this event as an integer.
+          category: The event category as a string.
+          name:  The event name as a string.
+          args:  A JSON-compatible dictionary of event arguments.
+        """
+        event = self._create_event('X', category, name, pid, tid, timestamp)
+        event['dur'] = duration
+        event['args'] = args
+        self._events.append(event)
+
+    def format_to_string(self, pretty=False):
+        """Formats the chrome trace to a string.
+
+        Args:
+          pretty: (Optional.)  If True, produce human-readable JSON output.
+
+        Returns:
+          A JSON-formatted string in Chrome Trace format.
+        """
+        trace = {}
+        trace['traceEvents'] = self._metadata + self._events
+        if pretty:
+            return json.dumps(trace, indent=4, separators=(',', ': '))
+        else:
+            return json.dumps(trace, separators=(',', ':'))
+
+
+class Timeline(object):
+    def __init__(self, profile_pb):
+        self._profile_pb = profile_pb
+        self._pid = 0
+        self._devices = dict()
+        self._chrome_trace = _ChromeTraceFormatter()
+
+    def _allocate_pid(self):
+        cur_pid = self._pid
+        self._pid += 1
+        return cur_pid
+
+    def _allocate_pids(self):
+        for event in self._profile_pb.events:
+            if event.device_id not in self._devices:
+                pid = self._allocate_pid()
+                self._devices[event.device_id] = pid
+                self._chrome_trace.emit_pid("device:%s" % pid, pid)
+
+    def _allocate_events(self):
+        for event in self._profile_pb.events:
+            pid = self._devices[event.device_id]
+            args = {'name': event.name}
+            self._chrome_trace.emit_region(
+                event.start_ns, (event.end_ns - event.start_ns) / 1000000.0,
+                pid, 0, 'Op', event.name, args)
+
+    def generate_chrome_trace(self):
+        self._allocate_pids()
+        self._allocate_events()
+        return self._chrome_trace.format_to_string()
+
+
+profile_path = '/tmp/profile'
+if args.profile_path:
+    profile_path = args.profile_path
+timeline_path = '/tmp/timeline'
+if args.timeline_path:
+    timeline_path = args.timeline_path
+
+with open(profile_path, 'r') as f:
+    profile_s = f.read()
+    profile_pb = profiler_pb2.Profile()
+    text_format.Merge(profile_s, profile_pb)
+
+tl = Timeline(profile_pb)
+with open(timeline_path, 'w') as f:
+    f.write(tl.generate_chrome_trace())