From e4c2a8544ae17e3b58a8054184e05e4e2eb5ff4d Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Sat, 18 Sep 2021 04:10:45 +0200 Subject: [PATCH] [oneDNN] Disable caching of Reorder operation (#35664) * - REorder disabling caching * - compilation fix * - another compilation fix * - another compilation fix * - compilation fix * - Fix * - yet another compilation fix * - suppresingly another compilation fix * - lint * - fix after review * - fix --- log | Bin 0 -> 2816 bytes .../fluid/framework/data_layout_transform.cc | 6 +- .../mkldnn/elementwise_add_mkldnn_op.cc | 6 +- .../fluid/operators/mkldnn/cast_mkldnn_op.cc | 8 +- .../fluid/operators/mkldnn/conv_mkldnn_op.cc | 8 +- .../operators/mkldnn/expand_v2_mkldnn_op.cc | 4 +- .../operators/mkldnn/matmul_mkldnn_op.cc | 5 +- .../operators/mkldnn/reshape_mkldnn_op.cc | 13 +- .../fluid/operators/mkldnn/slice_mkldnn_op.cc | 17 +-- .../fluid/operators/mkldnn/split_mkldnn_op.cc | 14 +-- .../fluid/operators/mkldnn/sum_mkldnn_op.cc | 6 +- .../reduce_ops/mkldnn/reduce_mkldnn_op.h | 6 +- paddle/fluid/platform/mkldnn_reuse.h | 113 ++++-------------- 13 files changed, 54 insertions(+), 152 deletions(-) create mode 100644 log diff --git a/log b/log new file mode 100644 index 0000000000000000000000000000000000000000..c02e10686b5fbcc78a8591519aaa3389dac63a56 GIT binary patch literal 2816 zcmds3U2oeq6y39b#f5-jGORjC+XWlj1V|&dE9Sb{>LA+_L5s9agdzo!vg5V=@4b}l zN{;O`8L$DHUnG;_<+Ngtf*xLf+~HQ<9(CS(yv%TQ-B~Xgn9-=ByB4rcm7iO!M^x;DvU^LS8x# z69JN=1(`9`kGkC#o$oI1L5vXT%rf49mYUvfFFgkLHd1G%K4-PLAKV&v)Y#kbcK8{! z+vbjhgD@uyWWTFcn z1IDRkxmX%|Lr9v+94c8q9w%^olEAa437kCtyr19J{l`Bw0D3)`92JanC61=5l>BHD z2uJyi;#$)RPk-L&d69=b0WbZk5E_BN7#;cg{@U0jvmCu@xNCM_vFs*n!zru{^D@s@ zw6HRlUM>O~_no5!L*L!O<7b^-rkHc^?$=>D8vTMIDPc$E0*RD*Hm>+9%88O02{#@1 zEUv*}7U-GO0_sNs8&(Lp405!D*}x|2Z)segL5}fICTQGVxiAM8q z?Z?rzHJB&0!7AObf5RW^)_Z`tK`XlvP3ZWnLQ?~HCvsy~C=cgS(dm z)ri-&-iA3c%o{k^VW)ur12U#VsmJ*PM~`O-JWuYa_R)_xep*BZ6SSnw W5 { auto tz = paddle::framework::vectorize(dout->dims()); memory::data_type dout_type = framework::ToMKLDNNDataType(dout->type()); - std::string key = platform::CreateKey(dev_ctx, tz, dout->format(), - dout->format(), dout_type); - platform::ReorderMKLDNNHandler handler(tz, dout->type(), dout_type, dev_ctx, - onednn_engine, key); + platform::ReorderMKLDNNHandler handler(tz, dout->type(), dout_type, + onednn_engine); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto reorder_src_memory_p = handler.AcquireSrcMemory( diff --git a/paddle/fluid/operators/mkldnn/cast_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/cast_mkldnn_op.cc index 9cfeace6bef..6f79c2dccf5 100644 --- a/paddle/fluid/operators/mkldnn/cast_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/cast_mkldnn_op.cc @@ -43,11 +43,9 @@ class CastMKLDNNKernel : public framework::OpKernel { auto x_tz = framework::vectorize(x->dims()); - std::string key = - platform::CreateKey(dev_ctx, x_tz, x->format(), x->format(), x_type); - platform::ReorderMKLDNNHandler reorder_handler( - x_tz, x_paddle_type, x_type, out_paddle_type, out_type, dev_ctx, - dev_ctx.GetEngine(), key); + platform::ReorderMKLDNNHandler reorder_handler(x_tz, x_paddle_type, x_type, + out_paddle_type, out_type, + dev_ctx.GetEngine()); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( x->format(), platform::to_void_cast(x->data())); diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc index b353ce4c322..09386fc31ee 100644 --- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc @@ -1125,12 +1125,8 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel { mkldnn::memory::format_tag out_format = weights_tz.size() == 6 ? mkldnn::memory::format_tag::goidhw : mkldnn::memory::format_tag::goihw; - std::string key = platform::CreateKey(dev_ctx, weights_tz, filter_fmt, - out_format, in_type); - key = platform::ExtendKeyWithThreadInfoIfNeeded(dev_ctx, key); - - platform::ReorderMKLDNNHandler handler( - weights_tz, filter->type(), in_type, dev_ctx, mkldnn_engine, key); + platform::ReorderMKLDNNHandler handler(weights_tz, filter->type(), + in_type, mkldnn_engine); auto reorder_dst_memory_p = handler.AcquireDstMemory(filter_grad, out_format, ctx.GetPlace()); diff --git a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc index 829c948c1a5..d537c3dbf9f 100644 --- a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc @@ -114,10 +114,8 @@ class ExpandGradMKLDNNKernel : public paddle::framework::OpKernel { if (dout_vec_dims == dx_vec_dims) { mkldnn::memory::data_type dout_type = paddle::framework::ToMKLDNNDataType(dout->type()); - std::string key = paddle::platform::CreateKey( - dev_ctx, dout_vec_dims, dout->format(), dout->format(), dout_type); paddle::platform::ReorderMKLDNNHandler reorder_handler( - dout_vec_dims, dout->type(), dout_type, dev_ctx, onednn_engine, key); + dout_vec_dims, dout->type(), dout_type, onednn_engine); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( dout->format(), paddle::platform::to_void_cast(dout->data())); diff --git a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc index 723c3c8352d..b78acd32e6d 100644 --- a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc @@ -58,11 +58,8 @@ static Tensor FoldFirstAndLastDims(const MKLDNNDeviceContext& dev_ctx, memory::data_type input_type = paddle::framework::ToMKLDNNDataType(input->type()); - std::string key = paddle::platform::CreateKey( - dev_ctx, input_dims, input->format(), input->format(), input_type); paddle::platform::ReorderMKLDNNHandler reorder_handler( - output_dims, input->type(), input_type, dev_ctx, dev_ctx.GetEngine(), - key); + output_dims, input->type(), input_type, dev_ctx.GetEngine()); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( memory::format_tag::abc, diff --git a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc index d5e428bd805..e6a7f3e74fc 100644 --- a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc @@ -93,10 +93,8 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { } mkldnn::memory::data_type x_type = framework::ToMKLDNNDataType(x->type()); - std::string key = - platform::CreateKey(dev_ctx, x_vec_dims, x->format(), x_type); - platform::ReorderMKLDNNHandler reorder_handler( - x_vec_dims, x->type(), x_type, dev_ctx, onednn_engine, key); + platform::ReorderMKLDNNHandler reorder_handler(x_vec_dims, x->type(), + x_type, onednn_engine); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( x->format(), platform::to_void_cast(x->data())); @@ -253,11 +251,8 @@ class ReshapeGradMKLDNNKernel : public ReshapeMKLDNNKernel { mkldnn::memory::data_type dout_type = framework::ToMKLDNNDataType(dout->type()); - std::string key = - platform::CreateKey(dev_ctx, dout_vec_dims, this->getPlainFormatTag(dx), - dx->format(), dout_type); - platform::ReorderMKLDNNHandler reorder_handler( - dout_vec_dims, dout->type(), dout_type, dev_ctx, onednn_engine, key); + platform::ReorderMKLDNNHandler reorder_handler(dout_vec_dims, dout->type(), + dout_type, onednn_engine); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( dout->format(), platform::to_void_cast(dout->data())); diff --git a/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc index facf5ca4b83..6bc3413604e 100644 --- a/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc @@ -98,18 +98,16 @@ class SliceMKLDNNKernel : public framework::OpKernel { out->Resize(framework::make_ddim(slice_dims)); mkldnn::memory::data_type x_type = framework::ToMKLDNNDataType(x->type()); - auto key = platform::CreateKey(dev_ctx, x_vec_dims, axes, starts, ends, - x->format(), x_type); - platform::ReorderMKLDNNHandler reorder_handler( - x_vec_dims, x->type(), x_type, dev_ctx, onednn_engine, key); + platform::ReorderMKLDNNHandler reorder_handler(x_vec_dims, x->type(), + x_type, onednn_engine); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( x->format(), platform::to_void_cast(x->data())); auto slice_mem_p = reorder_handler.AcquireSubmemory(slice_dims, offsets, reorder_src_memory_p); auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory( - out, slice_dims, 0, get_plain_format_tag(x), ctx.GetPlace()); + out, slice_dims, get_plain_format_tag(x), ctx.GetPlace()); auto reorder_p = reorder_handler.AcquireReorder(reorder_dst_memory_p, slice_mem_p); @@ -201,16 +199,13 @@ class SliceGradMKLDNNKernel : public framework::OpKernel { mkldnn::memory::format_tag reorder_format_tag = platform::GetMKLDNNFormat(md.reshape(slice_dims)); - auto key = platform::CreateKey(dev_ctx, dout_vec_dims, axes, starts, ends, - reorder_format_tag, dout_type); - - platform::ReorderMKLDNNHandler reorder_handler( - slice_dims, dout->type(), dout_type, dev_ctx, onednn_engine, key); + platform::ReorderMKLDNNHandler reorder_handler(slice_dims, dout->type(), + dout_type, onednn_engine); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( reorder_format_tag, platform::to_void_cast(dout->data())); auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory( - dx, dx_vec_dims, 0, reorder_format_tag, ctx.GetPlace()); + dx, dx_vec_dims, reorder_format_tag, ctx.GetPlace()); memset(dx->data(), 0, reorder_dst_memory_p->get_desc().get_size()); auto slice_mem_p = reorder_handler.AcquireSubmemory(slice_dims, offsets, diff --git a/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc index 8a58d9f26f8..411f33276c3 100644 --- a/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc @@ -91,27 +91,25 @@ class SplitMKLDNNKernel : public framework::OpKernel { auto x_vec_dims = framework::vectorize(x_dims); mkldnn::memory::data_type x_type = framework::ToMKLDNNDataType(x->type()); - auto key = platform::CreateKey(dev_ctx, x_vec_dims, axis, num, sections, - x->format(), x_type); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); std::vector offset(x_vec_dims.size(), 0); - platform::ReorderMKLDNNHandler reorder_handler( - x_vec_dims, x->type(), x_type, dev_ctx, onednn_engine, key); + platform::ReorderMKLDNNHandler reorder_handler(x_vec_dims, x->type(), + x_type, onednn_engine); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( x->format(), platform::to_void_cast(x->data())); for (size_t i = 0; i < outs_number; ++i) { auto out_vec_dims = framework::vectorize(outs[i]->dims()); - auto slice_mem_p = reorder_handler.AcquireSubmemory( - out_vec_dims, offset, reorder_src_memory_p, i); + auto slice_mem_p = reorder_handler.AcquireSubmemory(out_vec_dims, offset, + reorder_src_memory_p); auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory( - outs[i], out_vec_dims, i, x->format(), ctx.GetPlace()); + outs[i], out_vec_dims, x->format(), ctx.GetPlace()); auto reorder_p = - reorder_handler.AcquireReorder(reorder_dst_memory_p, slice_mem_p, i); + reorder_handler.AcquireReorder(reorder_dst_memory_p, slice_mem_p); reorder_p->execute(astream, *slice_mem_p, *reorder_dst_memory_p); diff --git a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc index 4cc9f53b9b6..8208a484b4a 100644 --- a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc @@ -155,15 +155,11 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel { // For in-place execution which sum does not have we need to fake it // so from oneDNN dst memory we reorder data into input if (in_place) { - const std::string reorder_key = - platform::CreateKey(dev_ctx, framework::vectorize(output->dims()), - ctx.OutputName("Out") + "-I"); - auto& in_out = in_vars[0]->Get(); auto output_tz = framework::vectorize(output->dims()); platform::ReorderMKLDNNHandler reorder_handler( output_tz, output->type(), framework::ToMKLDNNDataType(in_out.type()), - dev_ctx, dev_ctx.GetEngine(), reorder_key); + dev_ctx.GetEngine()); auto target_mem = reorder_handler.AcquireDstMemory( output, in_out.format(), ctx.GetPlace()); diff --git a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h index 0165cfd8b80..17801454da2 100644 --- a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h +++ b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h @@ -71,10 +71,8 @@ class ReduceMKLDNNKernel : public framework::OpKernel { if (input_dims == output_dims) { mkldnn::memory::data_type input_type = framework::ToMKLDNNDataType(input->type()); - std::string key = platform::CreateKey( - dev_ctx, input_dims, input->format(), input->format(), input_type); - platform::ReorderMKLDNNHandler reorder_handler( - input_dims, input->type(), input_type, dev_ctx, onednn_engine, key); + platform::ReorderMKLDNNHandler reorder_handler(input_dims, input->type(), + input_type, onednn_engine); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( input->format(), platform::to_void_cast(input->data())); diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 49160f94632..29a3f8e9dcd 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -1071,138 +1071,73 @@ class ActivationMKLDNNHandler } }; -class ReorderMKLDNNHandler : public MKLDNNHandler { +class ReorderMKLDNNHandler { public: ReorderMKLDNNHandler(std::vector& dims, // NOLINT framework::proto::VarType::Type vtype, - mkldnn::memory::data_type dtype, - const platform::MKLDNNDeviceContext& dev_ctx, - mkldnn::engine engine, const std::string& base_key) - : platform::MKLDNNHandler(dev_ctx, engine, base_key), - dims_(dims), + mkldnn::memory::data_type dtype, mkldnn::engine engine) + : dims_(dims), vtype_(vtype), vtype_dst_(vtype), dtype_(dtype), - dtype_dst_(dtype) {} + dtype_dst_(dtype), + engine_(engine) {} ReorderMKLDNNHandler(std::vector& dims, // NOLINT framework::proto::VarType::Type vtype, mkldnn::memory::data_type dtype, framework::proto::VarType::Type vtype_dst, mkldnn::memory::data_type dtype_dst, - const platform::MKLDNNDeviceContext& dev_ctx, - mkldnn::engine engine, const std::string& base_key) - : platform::MKLDNNHandler(dev_ctx, engine, base_key), - dims_(dims), + mkldnn::engine engine) + : dims_(dims), vtype_(vtype), vtype_dst_(vtype_dst), dtype_(dtype), - dtype_dst_(dtype_dst) {} + dtype_dst_(dtype_dst), + engine_(engine) {} std::shared_ptr AcquireSrcMemory( const MKLDNNMemoryFormat& fmt, void* ptr) { - return this->AcquireMemory(dims_, dtype_, fmt, ptr, "@user_src_mem_p"); + auto md = mkldnn::memory::desc(dims_, dtype_, fmt); + return std::make_shared(md, engine_, ptr); } std::shared_ptr AcquireSubmemory( const std::vector& dims, const std::vector& offset, - const std::shared_ptr& mem_p, int submemory_number = 0) { - std::string local_key = key_; - local_key.append("@submem") - .append(std::to_string(submemory_number)) - .append("_p"); - - auto sub_mem_p = - std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); - if (sub_mem_p == nullptr) { - auto sub_md = mem_p->get_desc().submemory_desc(dims, {offset}); - sub_mem_p = std::make_shared(sub_md, engine_, - mem_p->get_data_handle()); - dev_ctx_.SetBlob(local_key, sub_mem_p); - } else { - sub_mem_p->set_data_handle(mem_p->get_data_handle()); - } + const std::shared_ptr& mem_p) { + auto sub_md = mem_p->get_desc().submemory_desc(dims, {offset}); + auto sub_mem_p = std::make_shared(sub_md, engine_, + mem_p->get_data_handle()); return sub_mem_p; } std::shared_ptr AcquireDstMemory( framework::Tensor* output, const MKLDNNMemoryFormat& fmt, platform::Place place) { - auto local_key = key_ + "@user_dst_mem_p"; - auto mem_p = - std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); - if (mem_p == nullptr) { - auto dst_md = platform::MKLDNNMemDesc(dims_, dtype_dst_, fmt); - auto dst_data = - output->mutable_data(place, vtype_dst_, dst_md.get_size()); - - mem_p = std::make_shared(dst_md, engine_, dst_data); - dev_ctx_.SetBlob(local_key, mem_p); - } else { - // Even if memory object exists , we may be using it for diffrent tensor - auto dst_data = - output->mutable_data(place, vtype_dst_, mem_p->get_desc().get_size()); - mem_p->set_data_handle(dst_data); - } - return mem_p; + auto dst_md = platform::MKLDNNMemDesc(dims_, dtype_dst_, fmt); + auto dst_data = output->mutable_data(place, vtype_dst_, dst_md.get_size()); + return std::make_shared(dst_md, engine_, dst_data); } std::shared_ptr AcquireDstMemory( framework::Tensor* output, const std::vector& dims, - const int memory_number, const MKLDNNMemoryFormat& fmt, - platform::Place place) { - auto local_key = - key_ + "@user_dst_mem" + std::to_string(memory_number) + "_p"; - auto mem_p = - std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); - if (mem_p == nullptr) { - auto dst_md = platform::MKLDNNMemDesc(dims, dtype_dst_, fmt); - auto dst_data = - output->mutable_data(place, vtype_dst_, dst_md.get_size()); - - mem_p = std::make_shared(dst_md, engine_, dst_data); - dev_ctx_.SetBlob(local_key, mem_p); - } else { - // Even if memory object exists , we may be using it for diffrent tensor - auto dst_data = - output->mutable_data(place, vtype_dst_, mem_p->get_desc().get_size()); - mem_p->set_data_handle(dst_data); - } - return mem_p; - } - - std::shared_ptr AcquireReorder( - std::shared_ptr dst_memory_p, - std::shared_ptr src_memory_p, int reorder_number) { - auto prim_key = key_ + "@reorder" + std::to_string(reorder_number) + "_p"; - auto reorder_p = - std::static_pointer_cast(dev_ctx_.GetBlob(prim_key)); - if (reorder_p == nullptr) { - reorder_p = - std::make_shared(*(src_memory_p), *(dst_memory_p)); - dev_ctx_.SetBlob(prim_key, reorder_p); - } - return reorder_p; + const MKLDNNMemoryFormat& fmt, platform::Place place) { + auto dst_md = platform::MKLDNNMemDesc(dims, dtype_dst_, fmt); + auto dst_data = output->mutable_data(place, vtype_dst_, dst_md.get_size()); + return std::make_shared(dst_md, engine_, dst_data); } std::shared_ptr AcquireReorder( std::shared_ptr dst_memory_p, std::shared_ptr src_memory_p) { - auto prim_key = key_ + "@reorder_p"; - auto reorder_p = - std::static_pointer_cast(dev_ctx_.GetBlob(prim_key)); - if (reorder_p == nullptr) { - reorder_p = - std::make_shared(*(src_memory_p), *(dst_memory_p)); - dev_ctx_.SetBlob(prim_key, reorder_p); - } - return reorder_p; + return std::make_shared(*(src_memory_p), *(dst_memory_p)); } private: std::vector dims_; framework::proto::VarType::Type vtype_, vtype_dst_; mkldnn::memory::data_type dtype_, dtype_dst_; + mkldnn::engine engine_; }; template -- GitLab