From d2a70243f1179654fd7224a4114cff5d984d424e Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Tue, 16 Jan 2018 13:33:13 +0800 Subject: [PATCH] Refine profiler and expose to Python. --- cmake/external/pybind11.cmake | 2 +- paddle/framework/CMakeLists.txt | 3 +- paddle/framework/executor.cc | 6 ++ paddle/platform/profiler.cc | 37 +++++++--- paddle/platform/profiler.h | 22 ++++-- paddle/platform/profiler_test.cc | 10 ++- paddle/pybind/CMakeLists.txt | 2 +- paddle/pybind/protobuf.cc | 70 +++---------------- paddle/pybind/protobuf.h | 1 + paddle/pybind/pybind.cc | 27 ++++++- python/paddle/v2/fluid/profiler.py | 45 ++++++++++++ python/paddle/v2/fluid/tests/test_profiler.py | 37 +++++++++- 12 files changed, 171 insertions(+), 91 deletions(-) diff --git a/cmake/external/pybind11.cmake b/cmake/external/pybind11.cmake index 4e87dc49d8..ab23663695 100644 --- a/cmake/external/pybind11.cmake +++ b/cmake/external/pybind11.cmake @@ -26,7 +26,7 @@ ExternalProject_Add( extern_pybind ${EXTERNAL_PROJECT_LOG_ARGS} GIT_REPOSITORY "https://github.com/pybind/pybind11.git" - GIT_TAG "v2.1.1" + GIT_TAG "v2.2.1" PREFIX ${PYBIND_SOURCE_DIR} UPDATE_COMMAND "" CONFIGURE_COMMAND "" diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 597ea959f2..9bf712250d 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -68,7 +68,8 @@ cc_library(backward SRCS backward.cc DEPS net_op) cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context fill_constant_op) cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor) -cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog lod_rank_table) +cc_library(executor SRCS executor.cc DEPS op_registry device_context scope +framework_proto backward glog lod_rank_table profiler) cc_library(prune SRCS prune.cc DEPS framework_proto) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index c0418c9266..d7233882e7 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -22,6 +22,7 @@ limitations under the License. */ #include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/op_registry.h" #include "paddle/platform/place.h" +#include "paddle/platform/profiler.h" DEFINE_bool(check_nan_inf, false, "Checking whether operator produce NAN/INF or not. It will be " @@ -116,6 +117,11 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, for (auto& op_desc : block.AllOps()) { auto op = paddle::framework::OpRegistry::CreateOp(*op_desc); VLOG(3) << op->DebugStringEx(local_scope); + + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto dev_ctx = const_cast(pool.Get(place_)); + platform::RecordEvent record_event(op->Type(), dev_ctx); + op->Run(*local_scope, place_); if (FLAGS_check_nan_inf) { for (auto& vname : op->OutputVars(true)) { diff --git a/paddle/platform/profiler.cc b/paddle/platform/profiler.cc index 7e2e2d968e..8175b827c3 100644 --- a/paddle/platform/profiler.cc +++ b/paddle/platform/profiler.cc @@ -163,14 +163,17 @@ void EnableProfiler(ProfilerState state) { Mark("_start_profiler_", nullptr); } -std::vector> DisableProfiler() { - PADDLE_ENFORCE(g_state != ProfilerState::kDisabled, - "Can't disable profiling, since it's not starting."); - // Mark the profiling stop. - Mark("_stop_profiler_", nullptr); - g_state = ProfilerState::kDisabled; - std::vector> result; +void ResetProfiler() { std::lock_guard guard(g_all_event_lists_mutex); + for (auto it = g_all_event_lists.begin(); it != g_all_event_lists.end(); + ++it) { + (*it)->Clear(); + } +} + +std::vector> GetAllEvents() { + std::lock_guard guard(g_all_event_lists_mutex); + std::vector> result; for (auto it = g_all_event_lists.begin(); it != g_all_event_lists.end(); ++it) { result.emplace_back((*it)->Reduce()); @@ -178,6 +181,18 @@ std::vector> DisableProfiler() { return result; } +void DisableProfiler(EventSortingKey sorted_key) { + PADDLE_ENFORCE(g_state != ProfilerState::kDisabled, + "Can't disable profiling, since it's not starting."); + // Mark the profiling stop. + Mark("_stop_profiler_", nullptr); + g_state = ProfilerState::kDisabled; + + std::vector> all_events = GetAllEvents(); + ParseEvents(all_events, sorted_key); + ResetProfiler(); +} + void ParseEvents(std::vector>& events, EventSortingKey sorted_by) { if (g_profiler_place == "") return; @@ -291,12 +306,12 @@ void ParseEvents(std::vector>& events, } // Print report - PrintProfilingReport(events_table, sorted_domain, max_name_width + 4, 12); + PrintProfiler(events_table, sorted_domain, max_name_width + 4, 12); } -void PrintProfilingReport(std::vector>& events_table, - std::string& sorted_domain, const size_t name_width, - const size_t data_width) { +void PrintProfiler(std::vector>& events_table, + std::string& sorted_domain, const size_t name_width, + const size_t data_width) { // Output header information std::cout << "\n------------------------->" << " Profiling Report " diff --git a/paddle/platform/profiler.h b/paddle/platform/profiler.h index 6df48ef880..85823af1d7 100644 --- a/paddle/platform/profiler.h +++ b/paddle/platform/profiler.h @@ -84,6 +84,8 @@ struct EventList { return result; } + void Clear() { event_blocks.clear(); } + std::forward_list> event_blocks; }; @@ -110,12 +112,9 @@ struct RecordEvent { std::string name_; }; -// Enable the profiling function. -void EnableProfiler(ProfilerState state); - // Return the event list of all threads. Asummed the returned value calls // event_lists, event_lists[i][j] represents the j-th Event of i-th thread. -std::vector> DisableProfiler(); +std::vector> GetAllEvents(); // The information of each event given in the profiling report struct EventItem { @@ -130,13 +129,22 @@ struct EventItem { // Candidate keys to sort the profiling report enum EventSortingKey { kDefault, kCalls, kTotal, kMin, kMax, kAve }; +// Enable the profiling function. +void EnableProfiler(ProfilerState state); + +// Clear the g_all_event_lists, which is total event lists of all threads. +void ResetProfiler(); + +void DisableProfiler(EventSortingKey sorted_key); + // Parse the event list and output the profiling report void ParseEvents(std::vector>&, EventSortingKey sorted_by = EventSortingKey::kDefault); // Print results -void PrintProfilingReport(std::vector>& events_table, - std::string& sorted_domain, const size_t name_width, - const size_t data_width); +void PrintProfiler(std::vector>& events_table, + std::string& sorted_domain, const size_t name_width, + const size_t data_width); + } // namespace platform } // namespace paddle diff --git a/paddle/platform/profiler_test.cc b/paddle/platform/profiler_test.cc index 13dea713c7..81f10c9134 100644 --- a/paddle/platform/profiler_test.cc +++ b/paddle/platform/profiler_test.cc @@ -103,18 +103,14 @@ TEST(RecordEvent, RecordEvent) { // Bad Usage: PushEvent("event_without_pop", dev_ctx); PopEvent("event_without_push", dev_ctx); - std::vector> events = paddle::platform::DisableProfiler(); - // Will remove parsing-related code from test later - ParseEvents(events, EventSortingKey::kTotal); + std::vector> events = paddle::platform::GetAllEvents(); int cuda_startup_count = 0; int start_profiler_count = 0; - int stop_profiler_count = 0; for (size_t i = 0; i < events.size(); ++i) { for (size_t j = 0; j < events[i].size(); ++j) { if (events[i][j].name() == "_cuda_startup_") ++cuda_startup_count; if (events[i][j].name() == "_start_profiler_") ++start_profiler_count; - if (events[i][j].name() == "_stop_profiler_") ++stop_profiler_count; if (events[i][j].name() == "push") { EXPECT_EQ(events[i][j + 1].name(), "pop"); #ifdef PADDLE_WITH_CUDA @@ -127,5 +123,7 @@ TEST(RecordEvent, RecordEvent) { } EXPECT_EQ(cuda_startup_count % 5, 0); EXPECT_EQ(start_profiler_count, 1); - EXPECT_EQ(stop_profiler_count, 1); + + // Will remove parsing-related code from test later + DisableProfiler(EventSortingKey::kTotal); } diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt index 7b37430707..e78673e0ba 100644 --- a/paddle/pybind/CMakeLists.txt +++ b/paddle/pybind/CMakeLists.txt @@ -1,7 +1,7 @@ if(WITH_PYTHON) cc_library(paddle_pybind SHARED SRCS pybind.cc exception.cc protobuf.cc const_value.cc - DEPS pybind python backward proto_desc paddle_memory executor prune init + DEPS pybind python backward proto_desc paddle_memory executor prune init profiler ${GLOB_OP_LIB}) if(NOT APPLE AND NOT ANDROID) target_link_libraries(paddle_pybind rt) diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index 4f95948153..d80f6b71e9 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -21,74 +21,24 @@ limitations under the License. */ #include "paddle/framework/program_desc.h" #include "paddle/framework/var_desc.h" -// Cast boost::variant for PyBind. -// Copy from -// https://github.com/pybind/pybind11/issues/576#issuecomment-269563199 +using boost::variant; + namespace pybind11 { namespace detail { -// Can be replaced by a generic lambda in C++14 -struct variant_caster_visitor : public boost::static_visitor { - return_value_policy policy; - handle parent; - - variant_caster_visitor(return_value_policy policy, handle parent) - : policy(policy), parent(parent) {} - - template - handle operator()(T const &src) const { - return make_caster::cast(src, policy, parent); - } -}; - -template -struct variant_caster; - -template