diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc index 6a6741d8fc54d22addca91b75dfabf5950c1a35a..7aaa607f1585c98fe2dd816e8d66e5c6fd171e80 100644 --- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc +++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc @@ -77,8 +77,7 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel { } else { functor.RunMidWise(n, pre, post); } - z->set_layout(DataLayout::kMKLDNN); - z->set_format(x->format()); + z->set_mkldnn_prim_desc(x->get_mkldnn_prim_desc()); } else { PADDLE_ENFORCE(x->layout() == DataLayout::kMKLDNN && x->format() != memory::format::format_undef, @@ -116,7 +115,8 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel { auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_pd); // create mkldnn memory for dst - memory dst_memory = memory(sum_pd.dst_primitive_desc(), z_data); + auto dst_mem_pd = sum_pd.dst_primitive_desc(); + memory dst_memory = memory(dst_mem_pd, z_data); std::vector inputs; inputs.push_back(srcs[0]); @@ -129,9 +129,7 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel { pipeline.push_back(sum_prim); stream(stream::kind::eager).submit(pipeline).wait(); - z->set_layout(DataLayout::kMKLDNN); - z->set_format( - (memory::format)dst_memory.get_primitive_desc().desc().data.format); + z->set_mkldnn_prim_desc(dst_mem_pd); } } }; @@ -152,24 +150,19 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel { auto* out = dout; auto *x = dout, *y = dout; - auto set_mkldnn_format = [](Tensor* in, const Tensor* out) { - in->set_layout(DataLayout::kMKLDNN); - in->set_format(out->format()); - }; - if (dx != nullptr && dy != nullptr && dx->dims() == dy->dims()) { if (dx->dims() == dy->dims()) { auto blas = math::GetBlas(ctx); if (dx) { blas.VCOPY(dout->numel(), dout->data(), dx->mutable_data(ctx.GetPlace())); - set_mkldnn_format(dx, dout); + dx->set_mkldnn_prim_desc(dout->get_mkldnn_prim_desc()); } if (dy) { blas.VCOPY(dout->numel(), dout->data(), dy->mutable_data(ctx.GetPlace())); - set_mkldnn_format(dy, dout); + dy->set_mkldnn_prim_desc(dout->get_mkldnn_prim_desc()); } } } else { diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc index 5b7505f3c4acdef94fead04efd00b47825274117..43559940d925e6fff29f0c5c66ec1a3dc717aaf4 100644 --- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc @@ -96,8 +96,7 @@ void eltwise_forward(const framework::ExecutionContext &ctx, std::vector src_tz = framework::vectorize2int(x->dims()); - auto src_format = - src_tz.size() == 2 ? mkldnn::memory::format::nc : x->format(); + auto src_format = x->format(); const std::string key = gethash(src_tz, algorithm); const std::string key_src_data = @@ -127,10 +126,8 @@ void eltwise_forward(const framework::ExecutionContext &ctx, if (p_fwd == nullptr) { // create mkldnn memory for input X - auto src_md = platform::MKLDNNMemDesc( - src_tz, platform::MKLDNNGetDataType(), src_format); auto src_memory = std::shared_ptr( - new memory({src_md, mkldnn_engine}, to_void_cast(x_data))); + new memory(x->get_mkldnn_prim_desc(), to_void_cast(x_data))); // save src_memory to be referred in backward path dev_ctx.SetBlob(key_src_mem, src_memory); @@ -177,8 +174,7 @@ void eltwise_forward(const framework::ExecutionContext &ctx, pipeline.push_back(*p_fwd); stream(stream::kind::eager).submit(pipeline).wait(); - y->set_layout(DataLayout::kMKLDNN); - y->set_format(GetMKLDNNFormat(*dst_memory)); + y->set_mkldnn_prim_desc(dst_memory->get_primitive_desc()); } template @@ -196,9 +192,6 @@ void eltwise_grad(const framework::ExecutionContext &ctx, std::vector diff_dst_tz = framework::vectorize2int(diff_y->dims()); - auto diff_y_format = - diff_dst_tz.size() == 2 ? mkldnn::memory::format::nc : diff_y->format(); - const std::string key = gethash(diff_dst_tz, algorithm); const std::string key_src_data = key + ctx.op().Input("Out") + "@eltwise_fwd_src_data"; @@ -210,8 +203,8 @@ void eltwise_grad(const framework::ExecutionContext &ctx, key + std::to_string(*p_src_layout) + "@eltwise_fwd_src_mem"; const std::string key_fwd_pd = key + std::to_string(*p_src_layout) + "@eltwise_fwd_pd"; - const std::string key_with_layouts = - key + std::to_string(*p_src_layout) + "-" + std::to_string(diff_y_format); + const std::string key_with_layouts = key + std::to_string(*p_src_layout) + + "-" + std::to_string(diff_y->format()); const std::string key_diff_src_mem = key_with_layouts + "@eltwise_diff_src_mem"; const std::string key_diff_dst_mem = @@ -234,10 +227,8 @@ void eltwise_grad(const framework::ExecutionContext &ctx, if (p_grad == nullptr) { // create mkldnn memory for input diff_y - auto diff_dst_md = platform::MKLDNNMemDesc( - diff_dst_tz, platform::MKLDNNGetDataType(), diff_y_format); auto diff_dst_memory = std::shared_ptr( - new memory({diff_dst_md, mkldnn_engine}, to_void_cast(diff_y_data))); + new memory(diff_y->get_mkldnn_prim_desc(), to_void_cast(diff_y_data))); dev_ctx.SetBlob(key_diff_dst_mem, diff_dst_memory); // retrieve eltwise primitive desc from device context @@ -281,8 +272,7 @@ void eltwise_grad(const framework::ExecutionContext &ctx, pipeline.push_back(*p_grad); stream(stream::kind::eager).submit(pipeline).wait(); - diff_x->set_layout(DataLayout::kMKLDNN); - diff_x->set_format(GetMKLDNNFormat(*diff_src_memory)); + diff_x->set_mkldnn_prim_desc(diff_src_memory->get_primitive_desc()); } template diff --git a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc index bddca232e6c8a2a7fde998877006e37ee6d3d0dc..04e45d4853907bb7d6b5ce362892a2183fd4b60e 100644 --- a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc @@ -206,17 +206,14 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel { if (fuse_with_relu) flags |= mkldnn::fuse_bn_relu; // create mkldnn memory from input x tensor - mkldnn::memory::format input_format = - platform::MKLDNNFormatForSize(src_tz.size(), x->format()); // keys for backward pass const std::string key = BatchNormMKLDNNHandler::GetHash( - src_tz, epsilon, flags, global_stats, input_format, + src_tz, epsilon, flags, global_stats, x->format(), ctx.op().Output("SavedMean")); const std::string key_batch_norm_fwd_pd = key + "@bn_fwd_pd"; - auto user_src_md = platform::MKLDNNMemDesc( - {src_tz}, platform::MKLDNNGetDataType(), input_format); + auto user_src_md = x->get_mkldnn_prim_desc().desc(); // create primitive descriptor for batch norm forward using bn_fwd_types = bn_type_traits; @@ -230,8 +227,8 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel { BatchNormMKLDNNHandler handler(batch_norm_fwd_pd, dev_ctx, mkldnn_engine, key); - auto src_memory = - handler.AcquireSrcMemory(user_src_md, to_void_cast(x_data)); + auto src_memory = handler.AcquireSrcMemory(x->get_mkldnn_prim_desc(), + to_void_cast(x_data)); // crate mkldnn memory for weights(scale/shift) auto scaleshift_memory = @@ -265,8 +262,7 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel { variance_memory, false); } - y->set_layout(DataLayout::kMKLDNN); - y->set_format(platform::GetMKLDNNFormat(*dst_memory)); + y->set_mkldnn_prim_desc(dst_memory->get_primitive_desc()); std::vector pipeline; pipeline.push_back(*batch_norm_p); @@ -336,9 +332,6 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel { using bn_bwd_types = bn_type_traits; - mkldnn::memory::format dst_format = - platform::MKLDNNFormatForSize(src_tz.size(), diff_y->format()); - mkldnn::memory::format input_format = platform::MKLDNNFormatForSize(src_tz.size(), x->format()); @@ -346,14 +339,14 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel { // keys from forward pass const std::string key = BatchNormMKLDNNHandler::GetHash( - src_tz, epsilon, flags, false, input_format, + src_tz, epsilon, flags, false, x->format(), ctx.op().Input("SavedMean")); const std::string key_batch_norm_fwd_pd = key + "@bn_fwd_pd"; // keys for primitives reuse const std::string key_with_hash = key + BatchNormMKLDNNHandler::GetHash(src_tz, epsilon, flags, false, - input_format); + x->format()); const std::string key_batch_norm_bwd_p = key_with_hash + "@batch_norm_bwd_p"; const std::string key_batch_norm_src_mem_p = @@ -373,9 +366,8 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel { primitive reorder_diff_dst; bool is_diff_dst_reordered = false; - auto user_diff_dst_memory = memory( - {{{diff_dst_tz}, memory::data_type::f32, dst_format}, mkldnn_engine}, - to_void_cast(diff_y_data)); + auto user_diff_dst_memory = + memory(diff_y->get_mkldnn_prim_desc(), to_void_cast(diff_y_data)); // MKLDNN requires a single piece of memory for scale and shift/bias data const size_t scaleshift_size = 2 * ic; @@ -459,10 +451,7 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel { dev_ctx.SetBlob(key_batch_norm_diff_dst_mem_p, diff_dst_memory); // set layout/format of output tensors - diff_x->set_layout(DataLayout::kMKLDNN); - diff_x->set_format((memory::format)diff_src_memory->get_primitive_desc() - .desc() - .data.format); + diff_x->set_mkldnn_prim_desc(diff_src_memory->get_primitive_desc()); } else { // primitives already exist UpdateMemoryData(dev_ctx, key_batch_norm_src_mem_p, to_void_cast(x_data)); @@ -487,10 +476,7 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel { } // set layout/format of output tensors - diff_x->set_layout(DataLayout::kMKLDNN); - diff_x->set_format((memory::format)diff_src_memory->get_primitive_desc() - .desc() - .data.format); + diff_x->set_mkldnn_prim_desc(diff_src_memory->get_primitive_desc()); } // execute optional reorder and batch_norm backward primitive diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index 7ad674056f0d753d79408a11eff1aca47a84998a..54c6a71111a2cc2f9e5004922ae5d3541a9d0a70 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -47,11 +47,6 @@ static memory::primitive_desc CreateMemPrimDesc(const Tensor& input, return mem_prim_desc; } -static mkldnn::memory::format GetDstMemFormat( - const concat::primitive_desc& concat_pd) { - return (memory::format)concat_pd.dst_primitive_desc().desc().data.format; -} - static platform::CPUPlace GetCpuPlace( const paddle::framework::ExecutionContext& ctx) { auto place = ctx.GetPlace(); @@ -139,8 +134,7 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel { auto concat = prim_creator.CreateConcatPrimitive(concat_pd, output, place); stream(stream::kind::eager).submit({concat}).wait(); - output->set_layout(DataLayout::kMKLDNN); - output->set_format(GetDstMemFormat(concat_pd)); + output->set_mkldnn_prim_desc(concat_pd.dst_primitive_desc()); } }; } // namespace operators diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc index 7ac64e6ba134c034acc58c7310cd51da0f03d16d..29307e30768ac22435204d44b686c026d5f9362e 100644 --- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc @@ -282,8 +282,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { pipeline.push_back(*conv_p); stream(stream::kind::eager).submit(pipeline).wait(); - auto dst_mpd = dst_memory_p->get_primitive_desc(); - output->set_mkldnn_prim_desc(dst_mpd); + output->set_mkldnn_prim_desc(dst_memory_p->get_primitive_desc()); } void ComputeINT8(const paddle::framework::ExecutionContext& ctx) const { const bool is_test = ctx.Attr("is_test"); @@ -972,8 +971,7 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel { pipeline.push_back(*conv_bwd_data_p); - input_grad->set_layout(DataLayout::kMKLDNN); - input_grad->set_format(GetMKLDNNFormat(*diff_src_memory_p)); + input_grad->set_mkldnn_prim_desc(diff_src_memory_p->get_primitive_desc()); } stream(stream::kind::eager).submit(pipeline).wait(); } diff --git a/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc index 317d4cebe26b81ff03c212e6328233d5152ed1b4..79a0c5c7683d677daeb4feea10deab86407f944c 100644 --- a/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc @@ -221,8 +221,7 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel { pipeline.push_back(*conv_p); mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait(); - output->set_layout(DataLayout::kMKLDNN); - output->set_format(platform::GetMKLDNNFormat(*dst_memory_p)); + output->set_mkldnn_prim_desc(dst_memory_p->get_primitive_desc()); } private: diff --git a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc index 097ba01d401dbc7969e30f576cac2567c874ed99..4ff27ab12280b56abdf72056fe69ec713f2f2f46 100644 --- a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc @@ -81,10 +81,7 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel { auto e_mid = framework::EigenTensor::From(*mid); e_mid = e_mid.constant(k); - auto dims = paddle::framework::vectorize2int(x->dims()); - - auto src_md = paddle::platform::MKLDNNMemDesc( - dims, mkldnn::memory::data_type::f32, x->format()); + auto src_md = x->get_mkldnn_prim_desc().desc(); auto forward_desc = mkldnn::lrn_forward::desc{mkldnn::prop_kind::forward, mkldnn::lrn_across_channels, @@ -94,7 +91,7 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel { beta, k}; - auto src_memory_pd = mkldnn::memory::primitive_desc{src_md, mkldnn_engine}; + auto src_memory_pd = x->get_mkldnn_prim_desc(); if (!is_test) { const std::string key = ctx.op().Output("Out"); @@ -111,16 +108,15 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel { src_memory->set_data_handle( static_cast(const_cast(input_data))); - auto dst_memory = mkldnn::memory(forward_pd->dst_primitive_desc(), - static_cast(output_data)); + auto dst_memory_pd = forward_pd->dst_primitive_desc(); + auto dst_memory = + mkldnn::memory(dst_memory_pd, static_cast(output_data)); auto workspace_memory = insert_to_context( key_workspace_memory, dev_ctx, forward_pd->workspace_primitive_desc()); run_primitive(*forward_pd, *src_memory, *workspace_memory, dst_memory); - - out->set_layout(framework::DataLayout::kMKLDNN); - out->set_format(platform::GetMKLDNNFormat(dst_memory)); + out->set_mkldnn_prim_desc(dst_memory_pd); } else { auto forward_pd = mkldnn::lrn_forward::primitive_desc{forward_desc, mkldnn_engine}; @@ -128,13 +124,12 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel { src_memory_pd, static_cast(const_cast(input_data))}; auto workspace_memory = mkldnn::memory{forward_pd.workspace_primitive_desc()}; + auto dst_memory_pd = forward_pd.dst_primitive_desc(); auto dst_memory = mkldnn::memory(forward_pd.dst_primitive_desc(), static_cast(output_data)); run_primitive(forward_pd, src_memory, workspace_memory, dst_memory); - - out->set_layout(framework::DataLayout::kMKLDNN); - out->set_format(platform::GetMKLDNNFormat(dst_memory)); + out->set_mkldnn_prim_desc(dst_memory_pd); } } }; diff --git a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc index dc1176f0848b93dd6872f676c3a71dab4f3455fd..0ce552219458859e147ba207c94270bf84a1fe75 100644 --- a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc @@ -158,6 +158,14 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel { auto softmax_p = handler.AcquireSoftmax(softmax_dst_memory_p, softmax_src_memory_p); + // We cannot use softmax_dst_memory_p to get prim desc as + // it contains flattened dims (2D) while output tensor can + // have 2,3,4+ dims + auto output_mem_pd = paddle::platform::create_prim_desc_from_dims( + paddle::framework::vectorize2int(output->dims()), + mkldnn::memory::format::blocked); + output->set_mkldnn_prim_desc(output_mem_pd); + std::vector pipeline{ *(static_cast(softmax_p.get()))}; stream(stream::kind::eager).submit(pipeline).wait(); diff --git a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc index 6f64157b64e2f6247db8b49dc94cd10bfb6e861f..aef5b7d4311adfedb3db157f17506c3a2c76fbf6 100644 --- a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc @@ -106,12 +106,12 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel { memory::desc(dst_tz, memory::data_type::f32, memory::format::any); auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_mpd); - + auto dst_mem_pd = sum_pd.dst_primitive_desc(); std::shared_ptr dst_mem; if (in_place) { - dst_mem.reset(new memory(sum_pd.dst_primitive_desc())); + dst_mem.reset(new memory(dst_mem_pd)); } else { - dst_mem.reset(new memory(sum_pd.dst_primitive_desc(), output_data)); + dst_mem.reset(new memory(dst_mem_pd, output_data)); } std::vector inputs; for (size_t i = 0; i < srcs_mem.size(); ++i) { @@ -136,8 +136,7 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel { if (in_place) pipeline.push_back(reorder_prim); stream(stream::kind::eager).submit(pipeline).wait(); - output->set_layout(DataLayout::kMKLDNN); - output->set_format(output_format); + output->set_mkldnn_prim_desc(dst_mem_pd); } else { // Fallback to naive version // TODO(@mozga-intel) Add MKLDNN SelectedRows & LoDTensorArray support SumKernel reference_kernel;