From 57a4f16d9e7d97d23e26d09d30e5c2c56a8ce220 Mon Sep 17 00:00:00 2001 From: jakpiase <62569058+jakpiase@users.noreply.github.com> Date: Tue, 8 Dec 2020 09:20:05 +0100 Subject: [PATCH] added internal and external reorders to profiler (#29443) * added external reorder to profiler * added external and internal reorders to profiler * added internal and external reorder to profiler * added formatting to int/ext reorder commit * removed unnecessary comment --- .../fluid/framework/data_layout_transform.cc | 4 ++- .../fluid/operators/mkldnn/conv_mkldnn_op.cc | 30 +++++++++++++------ paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc | 19 ++++++++---- .../fluid/operators/mkldnn/mul_mkldnn_op.cc | 17 ++++++++--- .../operators/mkldnn/quantize_mkldnn_op.cc | 8 +++-- .../operators/mkldnn/requantize_mkldnn_op.cc | 8 +++-- .../fluid/operators/mkldnn/sum_mkldnn_op.cc | 8 +++-- paddle/fluid/platform/mkldnn_helper.h | 3 ++ paddle/fluid/platform/mkldnn_reuse.h | 13 ++++++++ paddle/fluid/platform/profiler_helper.h | 30 +++++++++++++++++-- 10 files changed, 113 insertions(+), 27 deletions(-) diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc index 8563b5b6d3..30464bbca9 100644 --- a/paddle/fluid/framework/data_layout_transform.cc +++ b/paddle/fluid/framework/data_layout_transform.cc @@ -13,8 +13,8 @@ // limitations under the License. #include "paddle/fluid/framework/data_layout_transform.h" - #include +#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/operators/math/math_function.h" #ifdef PADDLE_WITH_MKLDNN @@ -194,6 +194,8 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout, handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p); mkldnn::stream astream(cpu_engine); + platform::RecordEvent record_reorder("ext_reorder", + platform::EventRole::kUniqueOp); reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p); astream.wait(); } else { diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc index 99175a73e2..2e6d809c98 100644 --- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc @@ -808,9 +808,13 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { user_src_memory_p = std::static_pointer_cast( dev_ctx.GetBlob(user_src_key)); user_src_memory_p->set_data_handle(to_void_cast(input_data)); - src_memory_reorder_p->execute(astream, *user_src_memory_p, - *src_memory_p); - astream.wait(); + { + platform::RecordEvent record_reorder("int_reorder", + platform::EventRole::kUniqueOp); + src_memory_reorder_p->execute(astream, *user_src_memory_p, + *src_memory_p); + astream.wait(); + } } else if (src_memory_p) { src_memory_p->set_data_handle(to_void_cast(input_data)); } @@ -840,9 +844,13 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { if (residual_reorder_p) { auto user_residual_data_p = std::static_pointer_cast( dev_ctx.GetBlob(user_residual_key)); - residual_reorder_p->execute(astream, *user_residual_data_p, - *dst_memory_p); - astream.wait(); + { + platform::RecordEvent record_reorder("int_reorder", + platform::EventRole::kUniqueOp); + residual_reorder_p->execute(astream, *user_residual_data_p, + *dst_memory_p); + astream.wait(); + } } auto bias_memory_p = @@ -1094,9 +1102,13 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel { auto reorder_p = handler.AcquireReorder(reorder_dst_memory_p, diff_weights_memory_p); - reorder_p->execute(astream, *diff_weights_memory_p, - *reorder_dst_memory_p); - astream.wait(); + { + platform::RecordEvent record_reorder("int_reorder", + platform::EventRole::kUniqueOp); + reorder_p->execute(astream, *diff_weights_memory_p, + *reorder_dst_memory_p); + astream.wait(); + } // So here we have a data in goihw , which can be interpreted as OIHW // (OIDHW for conv3d) diff --git a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc index 6f0987deea..820c46c67d 100644 --- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc @@ -281,8 +281,13 @@ class FCPrimitiveFactory { auto reorder = mkldnn::reorder(src_mem, *dst_mem); mkldnn::stream astream(engine_); - reorder.execute(astream, src_mem, *dst_mem); - astream.wait(); + + { + platform::RecordEvent record_reorder("int_reorder", + platform::EventRole::kUniqueOp); + reorder.execute(astream, src_mem, *dst_mem); + astream.wait(); + } return dst_mem; } @@ -305,9 +310,13 @@ class FCPrimitiveFactory { auto reorder = mkldnn::reorder(*src_mem, *dst_mem, attributes); mkldnn::stream astream(engine_); - reorder.execute(astream, - {{MKLDNN_ARG_FROM, *src_mem}, {MKLDNN_ARG_TO, *dst_mem}}); - astream.wait(); + { + platform::RecordEvent record_reorder("int_reorder", + platform::EventRole::kUniqueOp); + reorder.execute(astream, + {{MKLDNN_ARG_FROM, *src_mem}, {MKLDNN_ARG_TO, *dst_mem}}); + astream.wait(); + } return dst_mem; } diff --git a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc index 4f0b7cab47..258b6971a0 100644 --- a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc @@ -110,8 +110,12 @@ class MulPrimitiveFactory { auto reorder = mkldnn::reorder(reorder_pd); mkldnn::stream astream(engine_); - reorder.execute(astream, src_mem, dst_mem); - astream.wait(); + { + platform::RecordEvent record_reorder("int_reorder", + platform::EventRole::kUniqueOp); + reorder.execute(astream, src_mem, dst_mem); + astream.wait(); + } return dst_mem; } @@ -267,8 +271,13 @@ class MulPrimitiveFactory { auto reorder = mkldnn::reorder(src_mem, dst_mem); mkldnn::stream astream(engine_); - reorder.execute(astream, src_mem, dst_mem); - astream.wait(); + + { + platform::RecordEvent record_reorder("int_reorder", + platform::EventRole::kUniqueOp); + reorder.execute(astream, src_mem, dst_mem); + astream.wait(); + } return dst_mem; } diff --git a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc index e5dedd403f..3e04e2dcf0 100644 --- a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc @@ -139,8 +139,12 @@ class QuantOpKernel : public framework::OpKernel { } mkldnn::stream astream(engine); - reorder_p->execute(astream, *src_memory, *dst_memory); - astream.wait(); + { + platform::RecordEvent record_reorder("int_reorder", + platform::EventRole::kUniqueOp); + reorder_p->execute(astream, *src_memory, *dst_memory); + astream.wait(); + } output->set_layout(DataLayout::kMKLDNN); output->set_format(GetMKLDNNFormat(*dst_memory)); diff --git a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc index 4666e5b74a..a3b078205e 100644 --- a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc @@ -138,8 +138,12 @@ class ReQuantOpKernel : public framework::OpKernel { } dnnl::stream astream(engine); - reorder_p->execute(astream, *src_memory, *dst_memory); - astream.wait(); + { + platform::RecordEvent record_reorder("int_reorder", + platform::EventRole::kUniqueOp); + reorder_p->execute(astream, *src_memory, *dst_memory); + astream.wait(); + } output->set_layout(framework::DataLayout::kMKLDNN); output->set_format(platform::GetMKLDNNFormat(*dst_memory)); diff --git a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc index 4df7818072..e1031c02be 100644 --- a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc @@ -197,8 +197,12 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel { output, in_out.format(), ctx.GetPlace()); auto reorder_p = reorder_handler.AcquireReorder(target_mem, dst_mem); - reorder_p->execute(astream, *dst_mem, *target_mem); - astream.wait(); + { + platform::RecordEvent record_reorder("int_reorder", + platform::EventRole::kUniqueOp); + reorder_p->execute(astream, *dst_mem, *target_mem); + astream.wait(); + } } output->set_layout(framework::DataLayout::kMKLDNN); output->set_format(platform::GetMKLDNNFormat(*dst_mem)); diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index 797ff42f3c..99044c53d2 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -23,6 +23,7 @@ limitations under the License. */ #include "mkldnn.hpp" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/place.h" +#include "paddle/fluid/platform/profiler.h" namespace paddle { #ifdef PADDLE_WITH_MKLDNN using MKLDNNMemoryFormat = mkldnn::memory::format_tag; @@ -188,6 +189,8 @@ inline void Reorder(mkldnn::memory src, mkldnn::memory dst, const mkldnn::engine& engine) { auto reorder_prim = mkldnn::reorder(src, dst); mkldnn::stream astream(engine); + platform::RecordEvent record_reorder("int_reorder", + platform::EventRole::kUniqueOp); reorder_prim.execute(astream, src, dst); astream.wait(); } diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 90266f6c20..6976e55b23 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -238,6 +238,9 @@ class MKLDNNHandlerT { } mkldnn::stream astream(engine_); + + platform::RecordEvent record_reorder("int_reorder", + platform::EventRole::kUniqueOp); reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, {MKLDNN_ARG_TO, *target_memory_p}}); astream.wait(); @@ -264,6 +267,8 @@ class MKLDNNHandlerT { dev_ctx_.SetBlob(key_reorder_p, reorder_p); mkldnn::stream astream(engine_); + platform::RecordEvent record_reorder("int_reorder", + platform::EventRole::kUniqueOp); reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, {MKLDNN_ARG_TO, *target_memory_p}}); astream.wait(); @@ -282,6 +287,8 @@ class MKLDNNHandlerT { auto reorder_p = std::static_pointer_cast( dev_ctx_.GetBlob(key_reorder_p)); if (reorder_p != nullptr) { + platform::RecordEvent record_reorder("int_reorder", + platform::EventRole::kUniqueOp); reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, {MKLDNN_ARG_TO, *target_memory_p}}); astream.wait(); @@ -427,6 +434,8 @@ class MKLDNNHandler { std::make_shared(*user_memory_p, *target_memory_p); dev_ctx_.SetBlob(key_reorder_p, reorder_p); mkldnn::stream astream(engine_); + platform::RecordEvent record_reorder("int_reorder", + platform::EventRole::kUniqueOp); reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, {MKLDNN_ARG_TO, *target_memory_p}}); astream.wait(); @@ -474,6 +483,8 @@ class MKLDNNHandler { std::shared_ptr(new mkldnn::reorder(*reorder_pd)); dev_ctx_.SetBlob(key_reorder_p, reorder_p); + platform::RecordEvent record_reorder("int_reorder", + platform::EventRole::kUniqueOp); reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, {MKLDNN_ARG_TO, *target_memory_p}}); astream.wait(); @@ -484,6 +495,8 @@ class MKLDNNHandler { auto reorder_p = std::static_pointer_cast( dev_ctx_.GetBlob(key_reorder_p)); if (reorder_p != nullptr) { + platform::RecordEvent record_reorder("int_reorder", + platform::EventRole::kUniqueOp); reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, {MKLDNN_ARG_TO, *target_memory_p}}); astream.wait(); diff --git a/paddle/fluid/platform/profiler_helper.h b/paddle/fluid/platform/profiler_helper.h index c79195aa0d..9629686132 100644 --- a/paddle/fluid/platform/profiler_helper.h +++ b/paddle/fluid/platform/profiler_helper.h @@ -649,8 +649,14 @@ void PrintProfiler( } std::cout << std::setw(data_width) << event_item.min_time << std::setw(data_width) << event_item.max_time - << std::setw(data_width) << event_item.ave_time - << std::setw(data_width) << event_item.ratio << std::endl; + << std::setw(data_width) << event_item.ave_time; + if (event_item.name.find("ext_reorder") != std::string::npos || + event_item.name.find("int_reorder") != std::string::npos) { + std::cout << event_item.ratio << '*'; + } else { + std::cout << std::setw(data_width) << event_item.ratio; + } + std::cout << std::endl; PrintProfiler(child_table, child_map, sorted_func, sorted_by, overhead, sorted_domain, name_width, data_width, merge_thread, @@ -715,12 +721,32 @@ void AnalyzeEvent( if (child_index[j] == 0) { main_event_items.push_back(event_items[j]); total += event_items[j].total_time; + } else if ((child_index[j] == 1 && + (event_items[j].name.find("ext_reorder") != + std::string::npos || + event_items[j].name.find("int_reorder") != + std::string::npos)) && + platform::GetTracerOption() != TracerOption::kAllOpDetail) { + size_t first_slash_pos = event_items[j].name.find('/'); + if (first_slash_pos != std::string::npos) { + std::string fname = event_items[j].name.substr(0, first_slash_pos); + child_map->insert( + std::pair(fname, event_items[j])); + } } } // average time for (auto &item : main_event_items) { item.ave_time = item.total_time / item.calls; item.ratio = item.total_time / total; + if (platform::GetTracerOption() != TracerOption::kAllOpDetail) { + for (auto it = child_map->begin(); it != child_map->end(); ++it) { + if ((*it).first == item.name) { + (*it).second.ratio = (*it).second.total_time / item.total_time; + break; // to find only first item + } + } + } } for (auto it = sub_child_map.begin(); it != sub_child_map.end(); it++) { it->second.ratio = it->second.total_time / total; -- GitLab