未验证 提交 57a4f16d 编写于 作者: J jakpiase 提交者: GitHub

added internal and external reorders to profiler (#29443)

* added external reorder to profiler

* added external and internal reorders to profiler

* added internal and external reorder to profiler

* added formatting to int/ext reorder commit

* removed unnecessary comment
上级 2480bdef
...@@ -13,8 +13,8 @@ ...@@ -13,8 +13,8 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/data_layout_transform.h"
#include <string> #include <string>
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
...@@ -194,6 +194,8 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout, ...@@ -194,6 +194,8 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p); handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p);
mkldnn::stream astream(cpu_engine); mkldnn::stream astream(cpu_engine);
platform::RecordEvent record_reorder("ext_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p); reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
astream.wait(); astream.wait();
} else { } else {
......
...@@ -808,9 +808,13 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -808,9 +808,13 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
user_src_memory_p = std::static_pointer_cast<mkldnn::memory>( user_src_memory_p = std::static_pointer_cast<mkldnn::memory>(
dev_ctx.GetBlob(user_src_key)); dev_ctx.GetBlob(user_src_key));
user_src_memory_p->set_data_handle(to_void_cast<T>(input_data)); user_src_memory_p->set_data_handle(to_void_cast<T>(input_data));
src_memory_reorder_p->execute(astream, *user_src_memory_p, {
*src_memory_p); platform::RecordEvent record_reorder("int_reorder",
astream.wait(); platform::EventRole::kUniqueOp);
src_memory_reorder_p->execute(astream, *user_src_memory_p,
*src_memory_p);
astream.wait();
}
} else if (src_memory_p) { } else if (src_memory_p) {
src_memory_p->set_data_handle(to_void_cast<T>(input_data)); src_memory_p->set_data_handle(to_void_cast<T>(input_data));
} }
...@@ -840,9 +844,13 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -840,9 +844,13 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
if (residual_reorder_p) { if (residual_reorder_p) {
auto user_residual_data_p = std::static_pointer_cast<mkldnn::memory>( auto user_residual_data_p = std::static_pointer_cast<mkldnn::memory>(
dev_ctx.GetBlob(user_residual_key)); dev_ctx.GetBlob(user_residual_key));
residual_reorder_p->execute(astream, *user_residual_data_p, {
*dst_memory_p); platform::RecordEvent record_reorder("int_reorder",
astream.wait(); platform::EventRole::kUniqueOp);
residual_reorder_p->execute(astream, *user_residual_data_p,
*dst_memory_p);
astream.wait();
}
} }
auto bias_memory_p = auto bias_memory_p =
...@@ -1094,9 +1102,13 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -1094,9 +1102,13 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
auto reorder_p = auto reorder_p =
handler.AcquireReorder(reorder_dst_memory_p, diff_weights_memory_p); handler.AcquireReorder(reorder_dst_memory_p, diff_weights_memory_p);
reorder_p->execute(astream, *diff_weights_memory_p, {
*reorder_dst_memory_p); platform::RecordEvent record_reorder("int_reorder",
astream.wait(); platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *diff_weights_memory_p,
*reorder_dst_memory_p);
astream.wait();
}
// So here we have a data in goihw , which can be interpreted as OIHW // So here we have a data in goihw , which can be interpreted as OIHW
// (OIDHW for conv3d) // (OIDHW for conv3d)
......
...@@ -281,8 +281,13 @@ class FCPrimitiveFactory { ...@@ -281,8 +281,13 @@ class FCPrimitiveFactory {
auto reorder = mkldnn::reorder(src_mem, *dst_mem); auto reorder = mkldnn::reorder(src_mem, *dst_mem);
mkldnn::stream astream(engine_); mkldnn::stream astream(engine_);
reorder.execute(astream, src_mem, *dst_mem);
astream.wait(); {
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder.execute(astream, src_mem, *dst_mem);
astream.wait();
}
return dst_mem; return dst_mem;
} }
...@@ -305,9 +310,13 @@ class FCPrimitiveFactory { ...@@ -305,9 +310,13 @@ class FCPrimitiveFactory {
auto reorder = mkldnn::reorder(*src_mem, *dst_mem, attributes); auto reorder = mkldnn::reorder(*src_mem, *dst_mem, attributes);
mkldnn::stream astream(engine_); mkldnn::stream astream(engine_);
reorder.execute(astream, {
{{MKLDNN_ARG_FROM, *src_mem}, {MKLDNN_ARG_TO, *dst_mem}}); platform::RecordEvent record_reorder("int_reorder",
astream.wait(); platform::EventRole::kUniqueOp);
reorder.execute(astream,
{{MKLDNN_ARG_FROM, *src_mem}, {MKLDNN_ARG_TO, *dst_mem}});
astream.wait();
}
return dst_mem; return dst_mem;
} }
......
...@@ -110,8 +110,12 @@ class MulPrimitiveFactory { ...@@ -110,8 +110,12 @@ class MulPrimitiveFactory {
auto reorder = mkldnn::reorder(reorder_pd); auto reorder = mkldnn::reorder(reorder_pd);
mkldnn::stream astream(engine_); mkldnn::stream astream(engine_);
reorder.execute(astream, src_mem, dst_mem); {
astream.wait(); platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder.execute(astream, src_mem, dst_mem);
astream.wait();
}
return dst_mem; return dst_mem;
} }
...@@ -267,8 +271,13 @@ class MulPrimitiveFactory { ...@@ -267,8 +271,13 @@ class MulPrimitiveFactory {
auto reorder = mkldnn::reorder(src_mem, dst_mem); auto reorder = mkldnn::reorder(src_mem, dst_mem);
mkldnn::stream astream(engine_); mkldnn::stream astream(engine_);
reorder.execute(astream, src_mem, dst_mem);
astream.wait(); {
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder.execute(astream, src_mem, dst_mem);
astream.wait();
}
return dst_mem; return dst_mem;
} }
......
...@@ -139,8 +139,12 @@ class QuantOpKernel : public framework::OpKernel<T> { ...@@ -139,8 +139,12 @@ class QuantOpKernel : public framework::OpKernel<T> {
} }
mkldnn::stream astream(engine); mkldnn::stream astream(engine);
reorder_p->execute(astream, *src_memory, *dst_memory); {
astream.wait(); platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *src_memory, *dst_memory);
astream.wait();
}
output->set_layout(DataLayout::kMKLDNN); output->set_layout(DataLayout::kMKLDNN);
output->set_format(GetMKLDNNFormat(*dst_memory)); output->set_format(GetMKLDNNFormat(*dst_memory));
......
...@@ -138,8 +138,12 @@ class ReQuantOpKernel : public framework::OpKernel<T> { ...@@ -138,8 +138,12 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
} }
dnnl::stream astream(engine); dnnl::stream astream(engine);
reorder_p->execute(astream, *src_memory, *dst_memory); {
astream.wait(); platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *src_memory, *dst_memory);
astream.wait();
}
output->set_layout(framework::DataLayout::kMKLDNN); output->set_layout(framework::DataLayout::kMKLDNN);
output->set_format(platform::GetMKLDNNFormat(*dst_memory)); output->set_format(platform::GetMKLDNNFormat(*dst_memory));
......
...@@ -197,8 +197,12 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -197,8 +197,12 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
output, in_out.format(), ctx.GetPlace()); output, in_out.format(), ctx.GetPlace());
auto reorder_p = reorder_handler.AcquireReorder(target_mem, dst_mem); auto reorder_p = reorder_handler.AcquireReorder(target_mem, dst_mem);
reorder_p->execute(astream, *dst_mem, *target_mem); {
astream.wait(); platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *dst_mem, *target_mem);
astream.wait();
}
} }
output->set_layout(framework::DataLayout::kMKLDNN); output->set_layout(framework::DataLayout::kMKLDNN);
output->set_format(platform::GetMKLDNNFormat(*dst_mem)); output->set_format(platform::GetMKLDNNFormat(*dst_mem));
......
...@@ -23,6 +23,7 @@ limitations under the License. */ ...@@ -23,6 +23,7 @@ limitations under the License. */
#include "mkldnn.hpp" #include "mkldnn.hpp"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
namespace paddle { namespace paddle {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
using MKLDNNMemoryFormat = mkldnn::memory::format_tag; using MKLDNNMemoryFormat = mkldnn::memory::format_tag;
...@@ -188,6 +189,8 @@ inline void Reorder(mkldnn::memory src, mkldnn::memory dst, ...@@ -188,6 +189,8 @@ inline void Reorder(mkldnn::memory src, mkldnn::memory dst,
const mkldnn::engine& engine) { const mkldnn::engine& engine) {
auto reorder_prim = mkldnn::reorder(src, dst); auto reorder_prim = mkldnn::reorder(src, dst);
mkldnn::stream astream(engine); mkldnn::stream astream(engine);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_prim.execute(astream, src, dst); reorder_prim.execute(astream, src, dst);
astream.wait(); astream.wait();
} }
......
...@@ -238,6 +238,9 @@ class MKLDNNHandlerT { ...@@ -238,6 +238,9 @@ class MKLDNNHandlerT {
} }
mkldnn::stream astream(engine_); mkldnn::stream astream(engine_);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}}); {MKLDNN_ARG_TO, *target_memory_p}});
astream.wait(); astream.wait();
...@@ -264,6 +267,8 @@ class MKLDNNHandlerT { ...@@ -264,6 +267,8 @@ class MKLDNNHandlerT {
dev_ctx_.SetBlob(key_reorder_p, reorder_p); dev_ctx_.SetBlob(key_reorder_p, reorder_p);
mkldnn::stream astream(engine_); mkldnn::stream astream(engine_);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}}); {MKLDNN_ARG_TO, *target_memory_p}});
astream.wait(); astream.wait();
...@@ -282,6 +287,8 @@ class MKLDNNHandlerT { ...@@ -282,6 +287,8 @@ class MKLDNNHandlerT {
auto reorder_p = std::static_pointer_cast<mkldnn::reorder>( auto reorder_p = std::static_pointer_cast<mkldnn::reorder>(
dev_ctx_.GetBlob(key_reorder_p)); dev_ctx_.GetBlob(key_reorder_p));
if (reorder_p != nullptr) { if (reorder_p != nullptr) {
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}}); {MKLDNN_ARG_TO, *target_memory_p}});
astream.wait(); astream.wait();
...@@ -427,6 +434,8 @@ class MKLDNNHandler { ...@@ -427,6 +434,8 @@ class MKLDNNHandler {
std::make_shared<mkldnn::reorder>(*user_memory_p, *target_memory_p); std::make_shared<mkldnn::reorder>(*user_memory_p, *target_memory_p);
dev_ctx_.SetBlob(key_reorder_p, reorder_p); dev_ctx_.SetBlob(key_reorder_p, reorder_p);
mkldnn::stream astream(engine_); mkldnn::stream astream(engine_);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}}); {MKLDNN_ARG_TO, *target_memory_p}});
astream.wait(); astream.wait();
...@@ -474,6 +483,8 @@ class MKLDNNHandler { ...@@ -474,6 +483,8 @@ class MKLDNNHandler {
std::shared_ptr<mkldnn::reorder>(new mkldnn::reorder(*reorder_pd)); std::shared_ptr<mkldnn::reorder>(new mkldnn::reorder(*reorder_pd));
dev_ctx_.SetBlob(key_reorder_p, reorder_p); dev_ctx_.SetBlob(key_reorder_p, reorder_p);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}}); {MKLDNN_ARG_TO, *target_memory_p}});
astream.wait(); astream.wait();
...@@ -484,6 +495,8 @@ class MKLDNNHandler { ...@@ -484,6 +495,8 @@ class MKLDNNHandler {
auto reorder_p = std::static_pointer_cast<mkldnn::reorder>( auto reorder_p = std::static_pointer_cast<mkldnn::reorder>(
dev_ctx_.GetBlob(key_reorder_p)); dev_ctx_.GetBlob(key_reorder_p));
if (reorder_p != nullptr) { if (reorder_p != nullptr) {
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}}); {MKLDNN_ARG_TO, *target_memory_p}});
astream.wait(); astream.wait();
......
...@@ -649,8 +649,14 @@ void PrintProfiler( ...@@ -649,8 +649,14 @@ void PrintProfiler(
} }
std::cout << std::setw(data_width) << event_item.min_time std::cout << std::setw(data_width) << event_item.min_time
<< std::setw(data_width) << event_item.max_time << std::setw(data_width) << event_item.max_time
<< std::setw(data_width) << event_item.ave_time << std::setw(data_width) << event_item.ave_time;
<< std::setw(data_width) << event_item.ratio << std::endl; if (event_item.name.find("ext_reorder") != std::string::npos ||
event_item.name.find("int_reorder") != std::string::npos) {
std::cout << event_item.ratio << '*';
} else {
std::cout << std::setw(data_width) << event_item.ratio;
}
std::cout << std::endl;
PrintProfiler(child_table, child_map, sorted_func, sorted_by, overhead, PrintProfiler(child_table, child_map, sorted_func, sorted_by, overhead,
sorted_domain, name_width, data_width, merge_thread, sorted_domain, name_width, data_width, merge_thread,
...@@ -715,12 +721,32 @@ void AnalyzeEvent( ...@@ -715,12 +721,32 @@ void AnalyzeEvent(
if (child_index[j] == 0) { if (child_index[j] == 0) {
main_event_items.push_back(event_items[j]); main_event_items.push_back(event_items[j]);
total += event_items[j].total_time; total += event_items[j].total_time;
} else if ((child_index[j] == 1 &&
(event_items[j].name.find("ext_reorder") !=
std::string::npos ||
event_items[j].name.find("int_reorder") !=
std::string::npos)) &&
platform::GetTracerOption() != TracerOption::kAllOpDetail) {
size_t first_slash_pos = event_items[j].name.find('/');
if (first_slash_pos != std::string::npos) {
std::string fname = event_items[j].name.substr(0, first_slash_pos);
child_map->insert(
std::pair<std::string, EventItem>(fname, event_items[j]));
}
} }
} }
// average time // average time
for (auto &item : main_event_items) { for (auto &item : main_event_items) {
item.ave_time = item.total_time / item.calls; item.ave_time = item.total_time / item.calls;
item.ratio = item.total_time / total; item.ratio = item.total_time / total;
if (platform::GetTracerOption() != TracerOption::kAllOpDetail) {
for (auto it = child_map->begin(); it != child_map->end(); ++it) {
if ((*it).first == item.name) {
(*it).second.ratio = (*it).second.total_time / item.total_time;
break; // to find only first item
}
}
}
} }
for (auto it = sub_child_map.begin(); it != sub_child_map.end(); it++) { for (auto it = sub_child_map.begin(); it != sub_child_map.end(); it++) {
it->second.ratio = it->second.total_time / total; it->second.ratio = it->second.total_time / total;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册