From 50326563d5c9c6925f7ecb12e3c5a346d26f0fa0 Mon Sep 17 00:00:00 2001 From: Leo Zhao <48052473+LeoZhao-Intel@users.noreply.github.com> Date: Tue, 4 Jun 2019 21:29:30 +0800 Subject: [PATCH] enable mkldnn primitive reuse for platform reorder (#17826) test=develop --- .../fluid/framework/data_layout_transform.cc | 22 +++-- paddle/fluid/platform/mkldnn_reuse.h | 87 +++++++++++++++++++ 2 files changed, 102 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc index 72c50518af0..bbcd34260e3 100644 --- a/paddle/fluid/framework/data_layout_transform.cc +++ b/paddle/fluid/framework/data_layout_transform.cc @@ -13,11 +13,13 @@ // limitations under the License. #include "paddle/fluid/framework/data_layout_transform.h" +#include #include #include "paddle/fluid/operators/math/math_function.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/mkldnn_reuse.h" #endif namespace paddle { @@ -145,7 +147,6 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, memory::data_type in_type = ToMKLDNNDataType(in.type()); PADDLE_ENFORCE(in_type != memory::data_type::data_undef, "Input tensor type is not supported: %s", in.type()); - memory::data_type out_type = in_type; auto in_format = platform::MKLDNNFormatForSize(in_tz.size(), in.format()); auto out_format = @@ -156,14 +157,21 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, if (in_format != out_format) { void* in_data = GetDataFromTensor(in, in_type); - auto out_data = out->mutable_data(expected_kernel_type.place_, in.type()); + const std::string key = platform::ReorderMKLDNNHandler::GetHash( + in_tz, in_format, out_format, std::to_string(in_type)); - auto in_memory = - memory({{{in_tz}, in_type, in_format}, cpu_engine}, in_data); - auto out_memory = - memory({{{out_tz}, out_type, out_format}, cpu_engine}, out_data); + platform::ReorderMKLDNNHandler handler(in_tz, in.type(), in_type, *dev_ctx, + cpu_engine, key); - platform::Reorder(in_memory, out_memory); + auto reorder_src_memory_p = handler.AcquireSrcMemory(in_format, in_data); + auto reorder_dst_memory_p = + handler.AcquireDstMemory(out, out_format, expected_kernel_type.place_); + auto reorder_p = + handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p); + + std::vector pipeline; + pipeline.push_back(*reorder_p); + mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait(); } else { out->ShareDataWith(in); } diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 4011f08cea8..87405fb5507 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -400,6 +400,93 @@ class TransposeMKLDNNHandler : public MKLDNNHandler { std::vector logical_axis_; }; +class ReorderMKLDNNHandler : public MKLDNNHandler { + public: + ReorderMKLDNNHandler(std::vector& dims, // NOLINT + framework::proto::VarType::Type vtype, + mkldnn::memory::data_type dtype, + const platform::MKLDNNDeviceContext& dev_ctx, + mkldnn::engine engine, const std::string& base_key) + : platform::MKLDNNHandler(dev_ctx, engine, base_key), + dims_(dims), + vtype_(vtype), + dtype_(dtype) {} + + std::shared_ptr AcquireSrcMemory( + const mkldnn::memory::format& fmt, void* ptr) { + auto local_key = key_ + "@user_src_mem_p"; + auto mem_p = + std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); + PADDLE_ENFORCE((mem_p != nullptr) || (is_reusing_ == false), + " find mem primitive in device context"); + if (mem_p == nullptr) { + auto src_md = platform::MKLDNNMemDesc(dims_, dtype_, fmt); + mem_p = std::make_shared( + mkldnn::memory::primitive_desc{src_md, engine_}, ptr); + dev_ctx_.SetBlob(local_key, mem_p); + } else { + mem_p->set_data_handle(ptr); + is_reusing_ = true; + } + return mem_p; + } + + std::shared_ptr AcquireDstMemory( + framework::Tensor* output, const mkldnn::memory::format& fmt, + platform::Place place) { + auto local_key = key_ + "@user_dst_mem_p"; + auto mem_p = + std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); + PADDLE_ENFORCE((mem_p != nullptr) || (is_reusing_ == false), + " find mem primitive in device context"); + if (mem_p == nullptr) { + auto dst_md = platform::MKLDNNMemDesc(dims_, dtype_, fmt); + auto dst_mdp = mkldnn::memory::primitive_desc{dst_md, engine_}; + + auto dst_data = output->mutable_data(place, vtype_); + + mem_p = std::make_shared(dst_mdp, dst_data); + dev_ctx_.SetBlob(local_key, mem_p); + } else { + auto dst_data = output->mutable_data(place, vtype_); + mem_p->set_data_handle(dst_data); + is_reusing_ = true; + } + return mem_p; + } + + std::shared_ptr AcquireReorder( + std::shared_ptr dst_memory_p, + std::shared_ptr src_memory_p) { + auto prim_key = key_ + "@reorder_p"; + auto reorder_p = + std::static_pointer_cast(dev_ctx_.GetBlob(prim_key)); + PADDLE_ENFORCE((reorder_p != nullptr) || (is_reusing_ == false), + "Fail to find convolution primitive in device context"); + if (reorder_p == nullptr) { + reorder_p = + std::make_shared(*(src_memory_p), *(dst_memory_p)); + dev_ctx_.SetBlob(prim_key, reorder_p); + } else { + is_reusing_ = true; + } + return reorder_p; + } + + static std::string GetHash(std::vector& shape, // NOLINT + mkldnn::memory::format in_fmt, + mkldnn::memory::format out_fmt, + const std::string& suffix) { + return dims2str(shape) + std::to_string(in_fmt) + "->" + + std::to_string(out_fmt) + "#" + suffix; + } + + private: + std::vector dims_; + framework::proto::VarType::Type vtype_; + mkldnn::memory::data_type dtype_; +}; + template struct convolutional_algorithm; -- GitLab