未验证 提交 57a4f16d 编写于 作者: J jakpiase 提交者: GitHub

added internal and external reorders to profiler (#29443)

* added external reorder to profiler

* added external and internal reorders to profiler

* added internal and external reorder to profiler

* added formatting to int/ext reorder commit

* removed unnecessary comment
上级 2480bdef
......@@ -13,8 +13,8 @@
// limitations under the License.
#include "paddle/fluid/framework/data_layout_transform.h"
#include <string>
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/operators/math/math_function.h"
#ifdef PADDLE_WITH_MKLDNN
......@@ -194,6 +194,8 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p);
mkldnn::stream astream(cpu_engine);
platform::RecordEvent record_reorder("ext_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
astream.wait();
} else {
......
......@@ -808,9 +808,13 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
user_src_memory_p = std::static_pointer_cast<mkldnn::memory>(
dev_ctx.GetBlob(user_src_key));
user_src_memory_p->set_data_handle(to_void_cast<T>(input_data));
src_memory_reorder_p->execute(astream, *user_src_memory_p,
*src_memory_p);
astream.wait();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
src_memory_reorder_p->execute(astream, *user_src_memory_p,
*src_memory_p);
astream.wait();
}
} else if (src_memory_p) {
src_memory_p->set_data_handle(to_void_cast<T>(input_data));
}
......@@ -840,9 +844,13 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
if (residual_reorder_p) {
auto user_residual_data_p = std::static_pointer_cast<mkldnn::memory>(
dev_ctx.GetBlob(user_residual_key));
residual_reorder_p->execute(astream, *user_residual_data_p,
*dst_memory_p);
astream.wait();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
residual_reorder_p->execute(astream, *user_residual_data_p,
*dst_memory_p);
astream.wait();
}
}
auto bias_memory_p =
......@@ -1094,9 +1102,13 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
auto reorder_p =
handler.AcquireReorder(reorder_dst_memory_p, diff_weights_memory_p);
reorder_p->execute(astream, *diff_weights_memory_p,
*reorder_dst_memory_p);
astream.wait();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *diff_weights_memory_p,
*reorder_dst_memory_p);
astream.wait();
}
// So here we have a data in goihw , which can be interpreted as OIHW
// (OIDHW for conv3d)
......
......@@ -281,8 +281,13 @@ class FCPrimitiveFactory {
auto reorder = mkldnn::reorder(src_mem, *dst_mem);
mkldnn::stream astream(engine_);
reorder.execute(astream, src_mem, *dst_mem);
astream.wait();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder.execute(astream, src_mem, *dst_mem);
astream.wait();
}
return dst_mem;
}
......@@ -305,9 +310,13 @@ class FCPrimitiveFactory {
auto reorder = mkldnn::reorder(*src_mem, *dst_mem, attributes);
mkldnn::stream astream(engine_);
reorder.execute(astream,
{{MKLDNN_ARG_FROM, *src_mem}, {MKLDNN_ARG_TO, *dst_mem}});
astream.wait();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder.execute(astream,
{{MKLDNN_ARG_FROM, *src_mem}, {MKLDNN_ARG_TO, *dst_mem}});
astream.wait();
}
return dst_mem;
}
......
......@@ -110,8 +110,12 @@ class MulPrimitiveFactory {
auto reorder = mkldnn::reorder(reorder_pd);
mkldnn::stream astream(engine_);
reorder.execute(astream, src_mem, dst_mem);
astream.wait();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder.execute(astream, src_mem, dst_mem);
astream.wait();
}
return dst_mem;
}
......@@ -267,8 +271,13 @@ class MulPrimitiveFactory {
auto reorder = mkldnn::reorder(src_mem, dst_mem);
mkldnn::stream astream(engine_);
reorder.execute(astream, src_mem, dst_mem);
astream.wait();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder.execute(astream, src_mem, dst_mem);
astream.wait();
}
return dst_mem;
}
......
......@@ -139,8 +139,12 @@ class QuantOpKernel : public framework::OpKernel<T> {
}
mkldnn::stream astream(engine);
reorder_p->execute(astream, *src_memory, *dst_memory);
astream.wait();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *src_memory, *dst_memory);
astream.wait();
}
output->set_layout(DataLayout::kMKLDNN);
output->set_format(GetMKLDNNFormat(*dst_memory));
......
......@@ -138,8 +138,12 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
}
dnnl::stream astream(engine);
reorder_p->execute(astream, *src_memory, *dst_memory);
astream.wait();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *src_memory, *dst_memory);
astream.wait();
}
output->set_layout(framework::DataLayout::kMKLDNN);
output->set_format(platform::GetMKLDNNFormat(*dst_memory));
......
......@@ -197,8 +197,12 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
output, in_out.format(), ctx.GetPlace());
auto reorder_p = reorder_handler.AcquireReorder(target_mem, dst_mem);
reorder_p->execute(astream, *dst_mem, *target_mem);
astream.wait();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *dst_mem, *target_mem);
astream.wait();
}
}
output->set_layout(framework::DataLayout::kMKLDNN);
output->set_format(platform::GetMKLDNNFormat(*dst_mem));
......
......@@ -23,6 +23,7 @@ limitations under the License. */
#include "mkldnn.hpp"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
namespace paddle {
#ifdef PADDLE_WITH_MKLDNN
using MKLDNNMemoryFormat = mkldnn::memory::format_tag;
......@@ -188,6 +189,8 @@ inline void Reorder(mkldnn::memory src, mkldnn::memory dst,
const mkldnn::engine& engine) {
auto reorder_prim = mkldnn::reorder(src, dst);
mkldnn::stream astream(engine);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_prim.execute(astream, src, dst);
astream.wait();
}
......
......@@ -238,6 +238,9 @@ class MKLDNNHandlerT {
}
mkldnn::stream astream(engine_);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}});
astream.wait();
......@@ -264,6 +267,8 @@ class MKLDNNHandlerT {
dev_ctx_.SetBlob(key_reorder_p, reorder_p);
mkldnn::stream astream(engine_);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}});
astream.wait();
......@@ -282,6 +287,8 @@ class MKLDNNHandlerT {
auto reorder_p = std::static_pointer_cast<mkldnn::reorder>(
dev_ctx_.GetBlob(key_reorder_p));
if (reorder_p != nullptr) {
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}});
astream.wait();
......@@ -427,6 +434,8 @@ class MKLDNNHandler {
std::make_shared<mkldnn::reorder>(*user_memory_p, *target_memory_p);
dev_ctx_.SetBlob(key_reorder_p, reorder_p);
mkldnn::stream astream(engine_);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}});
astream.wait();
......@@ -474,6 +483,8 @@ class MKLDNNHandler {
std::shared_ptr<mkldnn::reorder>(new mkldnn::reorder(*reorder_pd));
dev_ctx_.SetBlob(key_reorder_p, reorder_p);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}});
astream.wait();
......@@ -484,6 +495,8 @@ class MKLDNNHandler {
auto reorder_p = std::static_pointer_cast<mkldnn::reorder>(
dev_ctx_.GetBlob(key_reorder_p));
if (reorder_p != nullptr) {
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}});
astream.wait();
......
......@@ -649,8 +649,14 @@ void PrintProfiler(
}
std::cout << std::setw(data_width) << event_item.min_time
<< std::setw(data_width) << event_item.max_time
<< std::setw(data_width) << event_item.ave_time
<< std::setw(data_width) << event_item.ratio << std::endl;
<< std::setw(data_width) << event_item.ave_time;
if (event_item.name.find("ext_reorder") != std::string::npos ||
event_item.name.find("int_reorder") != std::string::npos) {
std::cout << event_item.ratio << '*';
} else {
std::cout << std::setw(data_width) << event_item.ratio;
}
std::cout << std::endl;
PrintProfiler(child_table, child_map, sorted_func, sorted_by, overhead,
sorted_domain, name_width, data_width, merge_thread,
......@@ -715,12 +721,32 @@ void AnalyzeEvent(
if (child_index[j] == 0) {
main_event_items.push_back(event_items[j]);
total += event_items[j].total_time;
} else if ((child_index[j] == 1 &&
(event_items[j].name.find("ext_reorder") !=
std::string::npos ||
event_items[j].name.find("int_reorder") !=
std::string::npos)) &&
platform::GetTracerOption() != TracerOption::kAllOpDetail) {
size_t first_slash_pos = event_items[j].name.find('/');
if (first_slash_pos != std::string::npos) {
std::string fname = event_items[j].name.substr(0, first_slash_pos);
child_map->insert(
std::pair<std::string, EventItem>(fname, event_items[j]));
}
}
}
// average time
for (auto &item : main_event_items) {
item.ave_time = item.total_time / item.calls;
item.ratio = item.total_time / total;
if (platform::GetTracerOption() != TracerOption::kAllOpDetail) {
for (auto it = child_map->begin(); it != child_map->end(); ++it) {
if ((*it).first == item.name) {
(*it).second.ratio = (*it).second.total_time / item.total_time;
break; // to find only first item
}
}
}
}
for (auto it = sub_child_map.begin(); it != sub_child_map.end(); it++) {
it->second.ratio = it->second.total_time / total;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册