From db468d7dea23403f1bdd83223cc258bbd142e4d7 Mon Sep 17 00:00:00 2001 From: jakpiase Date: Thu, 21 Apr 2022 10:48:04 +0200 Subject: [PATCH] oneDNN md-in-tensor 2nd batch of changes (#41997) --- .../operators/mkldnn/activation_mkldnn_op.cc | 10 ++-- .../operators/mkldnn/batch_norm_mkldnn_op.cc | 51 +++------------- .../fluid/operators/mkldnn/clip_mkldnn_op.cc | 6 +- .../operators/mkldnn/concat_mkldnn_op.cc | 17 ++---- .../operators/mkldnn/expand_v2_mkldnn_op.cc | 15 +++-- .../mkldnn/gaussian_random_mkldnn_op.cc | 11 +++- .../operators/mkldnn/interpolate_mkldnn_op.cc | 20 +++---- .../operators/mkldnn/layer_norm_mkldnn_op.cc | 23 ++++---- .../operators/mkldnn/log_softmax_mkldnn_op.cc | 9 +-- .../fluid/operators/mkldnn/lrn_mkldnn_op.cc | 21 ++----- .../fluid/operators/mkldnn/pool_mkldnn_op.cc | 59 ++++--------------- .../fluid/operators/mkldnn/prelu_mkldnn_op.cc | 29 ++++----- .../fluid/operators/mkldnn/scale_mkldnn_op.cc | 3 +- .../fluid/operators/mkldnn/shape_mkldnn_op.cc | 12 ++-- .../mkldnn/shuffle_channel_mkldnn_op.cc | 8 +-- .../operators/mkldnn/softmax_mkldnn_op.cc | 27 +++------ .../operators/mkldnn/softplus_mkldnn_op.h | 13 ++-- .../mkldnn/test_expand_v2_mkldnn_op.py | 31 +++++----- 18 files changed, 121 insertions(+), 244 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc index bdd868c1e2..ecee094de3 100644 --- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc @@ -107,8 +107,7 @@ void eltwise_forward(const framework::ExecutionContext &ctx, astream, {{DNNL_ARG_FROM, *src_memory_p}, {DNNL_ARG_TO, *dst_memory_p}}); astream.wait(); - out->set_layout(DataLayout::kMKLDNN); - out->set_format(GetMKLDNNFormat(*dst_memory_p)); + out->set_mem_desc(dst_memory_p->get_desc()); } template @@ -136,8 +135,7 @@ void eltwise_grad(const framework::ExecutionContext &ctx, {DNNL_ARG_DIFF_SRC, *diff_src_memory_p}}); astream.wait(); - dx->set_layout(DataLayout::kMKLDNN); - dx->set_format(GetMKLDNNFormat(*diff_src_memory_p)); + dx->set_mem_desc(diff_src_memory_p->get_desc()); } template @@ -165,8 +163,7 @@ void eltwise_grad_use_out(const framework::ExecutionContext &ctx, {DNNL_ARG_DIFF_SRC, *diff_src_memory_p}}); astream.wait(); - dx->set_layout(DataLayout::kMKLDNN); - dx->set_format(GetMKLDNNFormat(*diff_src_memory_p)); + dx->set_mem_desc(diff_src_memory_p->get_desc()); } template @@ -347,6 +344,7 @@ namespace ops = paddle::operators; FOR_EACH_MKLDNN_KERNEL_FUNCTOR(REGISTER_ACTIVATION_MKLDNN_KERNEL); +// round eltwise primitive doesn't support BF16, nor does it support grad REGISTER_ACTIVATION_MKLDNN_KERNEL_FWD_ONLY(round, RoundMKLDNNFunctor); namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc index 900d3e54c7..3abdb90540 100644 --- a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc @@ -54,17 +54,6 @@ class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT< std::vector DataLayout_error_msg = {"kNHWC", "kNCHW", "kAnyLayout", "kMKLDNN"}; - PADDLE_ENFORCE_EQ( - x->layout(), DataLayout::kMKLDNN, - platform::errors::InvalidArgument( - "Wrong layout set for X tensor. Expected layout is `kMKLDNN`, " - "But received %s.", - DataLayout_error_msg[static_cast(DataLayout::kMKLDNN)])); - PADDLE_ENFORCE_NE( - x->format(), MKLDNNMemoryFormat::undef, - platform::errors::InvalidArgument("Wrong format set for X tensor")); - - auto src_tz = phi::vectorize(x->dims()); // Flags are added by bitwise OR operation auto flags = dnnl::normalization_flags::use_scale_shift; // 001 @@ -73,14 +62,10 @@ class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT< if (fuse_with_relu && test_mode) flags |= dnnl::normalization_flags::fuse_norm_relu; // 100 - auto md = dnnl::memory::desc( - src_tz, platform::MKLDNNGetDataType(), - platform::MKLDNNFormatForSize(src_tz.size(), x->format())); - this->AcquireForwardPrimitiveDescriptor( global_stats == true ? dnnl::prop_kind::forward_scoring : dnnl::prop_kind::forward_training, - md, epsilon, flags); + x->mem_desc(), epsilon, flags); } BatchNormMKLDNNHandler(const paddle::framework::ExecutionContext &ctx, @@ -89,14 +74,6 @@ class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT< : platform::MKLDNNHandlerNoCachingT( mkldnn_engine, ctx.GetPlace()) { - PADDLE_ENFORCE_EQ(out_grad->layout(), DataLayout::kMKLDNN, - platform::errors::InvalidArgument( - "Wrong layout set for Input out_grad tensor")); - PADDLE_ENFORCE_NE(out_grad->format(), MKLDNNMemoryFormat::undef, - platform::errors::InvalidArgument( - "Wrong format set for Input out_grad tensor")); - - auto src_tz = phi::vectorize(in_x->dims()); auto scale_tz = phi::vectorize(scale->dims()); PADDLE_ENFORCE_EQ( scale_tz.size(), 1, @@ -104,26 +81,14 @@ class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT< "Dims of scale tensor must be 1, but received scale's size is %d", scale_tz.size())); - MKLDNNMemoryFormat diff_fmt = - platform::MKLDNNFormatForSize(src_tz.size(), out_grad->format()); - - MKLDNNMemoryFormat src_fmt = - platform::MKLDNNFormatForSize(src_tz.size(), in_x->format()); - - auto dims = phi::vectorize(in_x->dims()); - auto diff_dst_md = - dnnl::memory::desc(dims, platform::MKLDNNGetDataType(), diff_fmt); - auto src_md = - dnnl::memory::desc(dims, platform::MKLDNNGetDataType(), src_fmt); - const float epsilon = ctx.Attr("epsilon"); this->AcquireForwardPrimitiveDescriptor( - dnnl::prop_kind::forward_training, src_md, epsilon, + dnnl::prop_kind::forward_training, in_x->mem_desc(), epsilon, dnnl::normalization_flags::use_scale_shift); this->AcquireBackwardPrimitiveDescriptor( - dnnl::prop_kind::backward, diff_dst_md, src_md, epsilon, - dnnl::normalization_flags::use_scale_shift); + dnnl::prop_kind::backward, out_grad->mem_desc(), in_x->mem_desc(), + epsilon, dnnl::normalization_flags::use_scale_shift); } std::shared_ptr AcquireScaleShiftMemory(const Tensor *scale, @@ -227,8 +192,7 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel { variance_memory = handler.AcquireVarianceMemory(batch_variance); } - y->set_layout(DataLayout::kMKLDNN); - y->set_format(platform::GetMKLDNNFormat(*dst_memory)); + y->set_mem_desc(dst_memory->get_desc()); auto &astream = platform::MKLDNNDeviceContext::tls().get_stream(); batch_norm_p->execute(astream, {{DNNL_ARG_SRC, *src_memory}, @@ -322,9 +286,8 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel { std::copy(std::next(it, C), std::end(diff_scaleshift_data), diff_shift_data); - // set layout/format of output tensors - diff_x->set_layout(DataLayout::kMKLDNN); - diff_x->set_format(platform::GetMKLDNNFormat(*diff_src_memory)); + // set memory descriptor of out tensor + diff_x->set_mem_desc(diff_src_memory->get_desc()); } }; } // namespace operators diff --git a/paddle/fluid/operators/mkldnn/clip_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/clip_mkldnn_op.cc index 83ccd80e17..bfa7db82bd 100644 --- a/paddle/fluid/operators/mkldnn/clip_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/clip_mkldnn_op.cc @@ -46,8 +46,7 @@ class ClipMKLDNNKernel : public paddle::framework::OpKernel { {DNNL_ARG_TO, *dst_memory_p}}); astream.wait(); - out->set_layout(paddle::framework::DataLayout::kMKLDNN); - out->set_format(paddle::platform::GetMKLDNNFormat(*dst_memory_p)); + out->set_mem_desc(dst_memory_p->get_desc()); } }; @@ -83,8 +82,7 @@ class ClipGradMKLDNNKernel : public paddle::framework::OpKernel { {DNNL_ARG_DIFF_SRC, *diff_src_memory_p}}); astream.wait(); - dx->set_layout(paddle::framework::DataLayout::kMKLDNN); - dx->set_format(paddle::platform::GetMKLDNNFormat(*diff_dst_memory_p)); + dx->set_mem_desc(diff_dst_memory_p->get_desc()); } }; diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index 4b8e5f0334..5095fa0671 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -68,8 +68,7 @@ class ConcatMKLDNNHandler // Create memory descriptors for each of inputs for (size_t i = 0; i < inputs.size(); ++i) { - const auto dims = phi::vectorize(inputs[i]->dims()); - srcs_md.emplace_back(memory::desc(dims, dt, inputs[i]->format())); + srcs_md.push_back(inputs[i]->mem_desc()); } auto dst_dims = phi::vectorize(output->dims()); @@ -99,9 +98,6 @@ static void EnforceLayouts(const std::vector inputs) { PADDLE_ENFORCE_EQ( input->layout(), DataLayout::kMKLDNN, platform::errors::InvalidArgument("Wrong layout set for Input tensor")); - PADDLE_ENFORCE_NE( - input->format(), MKLDNNMemoryFormat::undef, - platform::errors::InvalidArgument("Wrong format set for Input tensor")); } } @@ -147,8 +143,7 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel { concat_p->execute(astream, args); astream.wait(); - output->set_layout(DataLayout::kMKLDNN); - output->set_format(platform::GetMKLDNNFormat(*dst_mem)); + output->set_mem_desc(dst_mem->get_desc()); } }; @@ -192,7 +187,7 @@ class ConcatGradMKLDNNOpKernel : public paddle::framework::OpKernel { dout_vec_dims, framework::TransToProtoVarType(dout->dtype()), dout_type, onednn_engine); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( - dout->format(), platform::to_void_cast(dout->data())); + dout->mem_desc(), platform::to_void_cast(dout->data())); for (size_t i = 0; i < dx.size(); ++i) { if (out_var_names[i] != framework::kEmptyVarName && @@ -202,7 +197,8 @@ class ConcatGradMKLDNNOpKernel : public paddle::framework::OpKernel { dx_vec_dims, offset, reorder_src_memory_p); auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory( - dx[i], dx_vec_dims, dout->format(), ctx.GetPlace()); + dx[i], dx_vec_dims, + platform::GetPlainMKLDNNFormat(dx_vec_dims.size()), ctx.GetPlace()); auto reorder_p = reorder_handler.AcquireReorder(reorder_dst_memory_p, slice_mem_p); @@ -210,8 +206,7 @@ class ConcatGradMKLDNNOpKernel : public paddle::framework::OpKernel { offset[axis] += dx[i]->dims()[axis]; - dx[i]->set_layout(framework::DataLayout::kMKLDNN); - dx[i]->set_format(platform::GetMKLDNNFormat(*reorder_dst_memory_p)); + dx[i]->set_mem_desc(reorder_dst_memory_p->get_desc()); } } astream.wait(); diff --git a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc index 7a81e90e45..05d6bae5f7 100644 --- a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc @@ -115,10 +115,11 @@ class ExpandGradMKLDNNKernel : public paddle::framework::OpKernel { dout_type, onednn_engine); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( - dout->format(), paddle::platform::to_void_cast(dout->data())); + dout->mem_desc(), paddle::platform::to_void_cast(dout->data())); - auto reorder_dst_memory_p = - reorder_handler.AcquireDstMemory(dx, dout->format(), ctx.GetPlace()); + auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory( + dx, paddle::platform::GetPlainMKLDNNFormat(dx_vec_dims.size()), + ctx.GetPlace()); auto reorder_p = reorder_handler.AcquireReorder(reorder_src_memory_p, reorder_dst_memory_p); @@ -126,9 +127,7 @@ class ExpandGradMKLDNNKernel : public paddle::framework::OpKernel { reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p); astream.wait(); - dx->set_layout(paddle::framework::DataLayout::kMKLDNN); - dx->set_format( - paddle::platform::GetMKLDNNFormat(reorder_dst_memory_p->get_desc())); + dx->set_mem_desc(reorder_dst_memory_p->get_desc()); } else { paddle::platform::ReductionMKLDNNHandler handler( dnnl::algorithm::reduction_sum, 0.0f, 0.0f, onednn_engine, @@ -145,8 +144,8 @@ class ExpandGradMKLDNNKernel : public paddle::framework::OpKernel { reduction_p->execute(astream, reduction_args); astream.wait(); dx->set_layout(paddle::framework::DataLayout::kMKLDNN); - dx->set_format(paddle::platform::GetMKLDNNFormat( - dst_memory_p->get_desc().reshape(vectorize(dx->dims())))); + dx->set_mem_desc( + dst_memory_p->get_desc().reshape(vectorize(dx->dims()))); } } }; diff --git a/paddle/fluid/operators/mkldnn/gaussian_random_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/gaussian_random_mkldnn_op.cc index de999035fa..1a122503c0 100644 --- a/paddle/fluid/operators/mkldnn/gaussian_random_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/gaussian_random_mkldnn_op.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/operators/fill_constant_op.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/mkldnn_reuse.h" namespace paddle { namespace operators { @@ -42,8 +42,13 @@ class GaussianMKLDNNKernel : public paddle::framework::OpKernel { data[i] = dist(*engine); } - tensor->set_layout(DataLayout::kMKLDNN); - tensor->set_format(platform::GetPlainMKLDNNFormat(tensor->dims().size())); + dnnl::memory::desc out_mem_desc( + phi::vectorize(tensor->dims()), + framework::ToMKLDNNDataType( + framework::TransToProtoVarType(tensor->dtype())), + platform::GetPlainMKLDNNFormat(tensor->dims().size())); + + tensor->set_mem_desc(out_mem_desc); } }; } // namespace operators diff --git a/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc index 04b90d2f1f..37d6c07290 100644 --- a/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc @@ -34,17 +34,14 @@ class InterpolateMKLDNNHandler public: InterpolateMKLDNNHandler(const dnnl::algorithm algo, const dnnl::engine engine, platform::Place cpu_place, - const Tensor* x, Tensor* z) + const Tensor* x, Tensor* out) : platform::MKLDNNHandlerNoCachingT( engine, cpu_place) { - const auto src_x_tz = phi::vectorize(x->dims()); - const auto dst_tz = phi::vectorize(z->dims()); - const auto src_md = dnnl::memory::desc( - src_x_tz, platform::MKLDNNGetDataType(), x->format()); + const auto dst_tz = phi::vectorize(out->dims()); const auto dst_md = memory::desc(dst_tz, platform::MKLDNNGetDataType(), MKLDNNMemoryFormat::any); this->AcquireForwardPrimitiveDescriptor(dnnl::prop_kind::forward_inference, - algo, src_md, dst_md); + algo, x->mem_desc(), dst_md); } }; @@ -133,7 +130,7 @@ class InterpolateMKLDNNKernel : public framework::OpKernel { const auto& mkldnn_engine = dev_ctx.GetEngine(); const auto* x = ctx.Input("X"); - auto* z = ctx.Output("Out"); + auto* out = ctx.Output("Out"); const auto interp_method = ctx.Attr("interp_method"); const dnnl::algorithm algo = (interp_method == "nearest") @@ -142,13 +139,13 @@ class InterpolateMKLDNNKernel : public framework::OpKernel { const auto out_dims_vec = ComputeOutputShape(ctx); framework::DDim dim_out = phi::make_ddim(out_dims_vec); - z->Resize(dim_out); + out->Resize(dim_out); InterpolateMKLDNNHandler handler(algo, mkldnn_engine, ctx.GetPlace(), x, - z); + out); auto src_memory_p = handler.AcquireSrcMemory(x); - auto dst_memory_p = handler.AcquireDstMemory(z); + auto dst_memory_p = handler.AcquireDstMemory(out); auto resampling_prim = handler.AcquireForwardPrimitive(); const std::unordered_map args = { @@ -158,8 +155,7 @@ class InterpolateMKLDNNKernel : public framework::OpKernel { resampling_prim->execute(astream, args); astream.wait(); - z->set_layout(DataLayout::kMKLDNN); - z->set_format(platform::GetMKLDNNFormat(*dst_memory_p)); + out->set_mem_desc(dst_memory_p->get_desc()); } }; diff --git a/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc index 2e82b47e8d..8f98a0b9fb 100644 --- a/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc @@ -25,22 +25,21 @@ class LayerNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT< public: LayerNormMKLDNNHandler(const std::vector& dims, const float& epsilon, const dnnl::normalization_flags& flags, - const bool& is_test, const MKLDNNMemoryFormat fmt, + const bool& is_test, const Tensor* x, const dnnl::engine engine, platform::Place cpu_place) : platform::MKLDNNHandlerNoCachingT( engine, cpu_place) { - auto md = dnnl::memory::desc(dims, platform::MKLDNNGetDataType(), fmt); if (!is_test) { // TODO(grygielski) Delete forcing stats_md after DNNL 1.2 is introduced auto stats_md = dnnl::memory::desc( {begin(dims), end(dims) - 1}, platform::MKLDNNGetDataType(), - platform::MKLDNNFormatForSize(dims.size() - 1, - MKLDNNMemoryFormat::nchw)); + platform::GetPlainMKLDNNFormat(dims.size() - 1)); this->AcquireForwardPrimitiveDescriptor(dnnl::prop_kind::forward_training, - md, stats_md, epsilon, flags); + x->mem_desc(), stats_md, epsilon, + flags); } else { this->AcquireForwardPrimitiveDescriptor( - dnnl::prop_kind::forward_inference, md, epsilon, flags); + dnnl::prop_kind::forward_inference, x->mem_desc(), epsilon, flags); } } @@ -83,7 +82,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel { auto* x = ctx.Input("X"); auto* scale = ctx.Input("Scale"); auto* bias = ctx.Input("Bias"); - auto* y = ctx.Output("Y"); + auto* out = ctx.Output("Y"); const float epsilon = ctx.Attr("epsilon"); const auto begin_norm_axis = ctx.Attr("begin_norm_axis"); @@ -107,12 +106,11 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel { flags |= dnnl::normalization_flags::use_scale_shift; } - LayerNormMKLDNNHandler handler(src_tz, epsilon, flags, is_test, - x->format(), mkldnn_engine, - ctx.GetPlace()); + LayerNormMKLDNNHandler handler(src_tz, epsilon, flags, is_test, x, + mkldnn_engine, ctx.GetPlace()); auto src_memory = handler.AcquireSrcMemory(x); - auto dst_memory = handler.AcquireDstMemory(y); + auto dst_memory = handler.AcquireDstMemory(out); auto layer_norm_p = handler.AcquireForwardPrimitive(); @@ -140,8 +138,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel { layer_norm_p->execute(astream, args); astream.wait(); - y->set_layout(phi::DataLayout::kMKLDNN); - y->set_format(platform::GetMKLDNNFormat(*dst_memory)); + out->set_mem_desc(dst_memory->get_desc()); } }; diff --git a/paddle/fluid/operators/mkldnn/log_softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/log_softmax_mkldnn_op.cc index 626d3ef40b..a4d768e84d 100644 --- a/paddle/fluid/operators/mkldnn/log_softmax_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/log_softmax_mkldnn_op.cc @@ -28,12 +28,8 @@ class LogSoftmaxMKLDNNHandler const int axis) : platform::MKLDNNHandlerNoCachingT( mkldnn_engine, cpu_place) { - const auto logsoftmax_tz = phi::vectorize(x->dims()); - const auto md = dnnl::memory::desc( - logsoftmax_tz, platform::MKLDNNGetDataType(), x->format()); - this->AcquireForwardPrimitiveDescriptor(dnnl::prop_kind::forward_inference, - md, axis); + x->mem_desc(), axis); } }; @@ -63,8 +59,7 @@ class LogSoftmaxMKLDNNKernel : public framework::OpKernel { {DNNL_ARG_DST, *dst_memory_p}}); astream.wait(); - out->set_layout(framework::DataLayout::kMKLDNN); - out->set_format(x->format()); + out->set_mem_desc(dst_memory_p->get_desc()); } }; } // namespace operators diff --git a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc index 849dba8538..d3a36555c3 100644 --- a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc @@ -44,15 +44,11 @@ class LRNMKLDNNHandler const float k = ctx.Attr("k"); bool is_test = ctx.Attr("is_test"); - auto dims = phi::vectorize(input->dims()); - - auto src_md = dnnl::memory::desc(dims, platform::MKLDNNGetDataType(), - input->format()); - this->AcquireForwardPrimitiveDescriptor( is_test ? dnnl::prop_kind::forward_inference : dnnl::prop_kind::forward_training, - dnnl::algorithm::lrn_across_channels, src_md, n, alpha, beta, k); + dnnl::algorithm::lrn_across_channels, input->mem_desc(), n, alpha, beta, + k); } LRNMKLDNNHandler(const framework::ExecutionContext& ctx, @@ -72,20 +68,13 @@ class LRNMKLDNNHandler const float beta = ctx.Attr("beta"); const float k = ctx.Attr("k"); - auto dims = phi::vectorize(in_x->dims()); - - auto src_md = dnnl::memory::desc(dims, platform::MKLDNNGetDataType(), - in_x->format()); - auto diff_md = dnnl::memory::desc(dims, platform::MKLDNNGetDataType(), - out_grad->format()); - this->AcquireForwardPrimitiveDescriptor( dnnl::prop_kind::forward_training, dnnl::algorithm::lrn_across_channels, - src_md, n, alpha, beta, k); + in_x->mem_desc(), n, alpha, beta, k); this->AcquireBackwardPrimitiveDescriptor( - dnnl::algorithm::lrn_across_channels, src_md, diff_md, n, alpha, beta, - k); + dnnl::algorithm::lrn_across_channels, in_x->mem_desc(), + out_grad->mem_desc(), n, alpha, beta, k); } std::shared_ptr AcquireWorkspaceMemory(Tensor* workspace) { diff --git a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc index 1078b451c5..77763531c8 100644 --- a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc @@ -41,13 +41,6 @@ class PoolingMKLDNNHandler : platform::MKLDNNHandlerNoCachingT( mkldnn_engine, ctx.GetPlace()) { - PADDLE_ENFORCE_EQ(input->layout(), DataLayout::kMKLDNN, - platform::errors::InvalidArgument( - "Wrong layout set for Input tensor.")); - PADDLE_ENFORCE_NE(input->format(), MKLDNNMemoryFormat::undef, - platform::errors::InvalidArgument( - "Wrong format set for Input tensor.")); - const std::string pooling_type = ctx.Attr("pooling_type"); std::vector ksize_temp = ctx.Attr>("ksize"); @@ -91,29 +84,18 @@ class PoolingMKLDNNHandler phi::funcs::UpdatePadding(&paddings, global_pooling, 0, padding_algorithm, data_dims, strides, ksize); - const auto src_tz = phi::vectorize(input->dims()); - const auto dst_tz = phi::vectorize(output->dims()); - const auto is_test = ctx.Attr("is_test"); + const bool ceil_mode = ctx.Attr("ceil_mode"); + const auto exclude_padding = ctx.Attr("exclusive"); + auto mkldnn_paddings = platform::ToMkldnnPadding(paddings); const auto dt = framework::ToMKLDNNDataType( framework::TransToProtoVarType(input->dtype())); - - const auto exclude_padding = ctx.Attr("exclusive"); - - const auto src_md = dnnl::memory::desc(src_tz, dt, input->format()); - /* create memory descriptor for pooling without specified format - * ('any') which lets a primitive (pooling in this case) choose - * the memory format preferred for best performance - */ - + const auto src_tz = phi::vectorize(input->dims()); + const auto dst_tz = phi::vectorize(output->dims()); const auto dst_md = platform::MKLDNNMemDesc(dst_tz, dt, MKLDNNMemoryFormat::any); - auto mkldnn_paddings = platform::ToMkldnnPadding(paddings); - - const bool ceil_mode = ctx.Attr("ceil_mode"); - if (ceil_mode) { CorrectOutputSize(src_tz, dst_tz, ksize, paddings, strides, mkldnn_paddings[1]); @@ -128,7 +110,8 @@ class PoolingMKLDNNHandler ? dnnl::algorithm::pooling_max : (exclude_padding ? dnnl::algorithm::pooling_avg_exclude_padding : dnnl::algorithm::pooling_avg_include_padding), - src_md, dst_md, strides, ksize, mkldnn_paddings[0], mkldnn_paddings[1]); + input->mem_desc(), dst_md, strides, ksize, mkldnn_paddings[0], + mkldnn_paddings[1]); } PoolingMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, @@ -138,20 +121,6 @@ class PoolingMKLDNNHandler : platform::MKLDNNHandlerNoCachingT( mkldnn_engine, ctx.GetPlace()) { - PADDLE_ENFORCE_EQ( - in_x->layout(), DataLayout::kMKLDNN, - platform::errors::InvalidArgument("Wrong layout set for Input tensor")); - PADDLE_ENFORCE_NE( - in_x->format(), MKLDNNMemoryFormat::undef, - platform::errors::InvalidArgument("Wrong format set for Input tensor")); - - PADDLE_ENFORCE_EQ(out_grad->layout(), DataLayout::kMKLDNN, - platform::errors::InvalidArgument( - "Wrong layout set for Input output_grad tensor")); - PADDLE_ENFORCE_NE(out_grad->format(), MKLDNNMemoryFormat::undef, - platform::errors::InvalidArgument( - "Wrong format set for Input output_grad tensor")); - PADDLE_ENFORCE_EQ( ctx.Attr("is_test"), false, platform::errors::InvalidArgument( @@ -187,10 +156,7 @@ class PoolingMKLDNNHandler const auto dt = framework::ToMKLDNNDataType( framework::TransToProtoVarType(in_x->dtype())); - auto src_md = dnnl::memory::desc(src_tz, dt, in_x->format()); auto dst_md = dnnl::memory::desc(diff_dst_tz, dt, MKLDNNMemoryFormat::any); - auto diff_dst_md = dnnl::memory::desc( - diff_dst_tz, platform::MKLDNNGetDataType(), out_grad->format()); auto diff_src_md = dnnl::memory::desc( diff_src_tz, platform::MKLDNNGetDataType(), MKLDNNMemoryFormat::any); @@ -211,14 +177,15 @@ class PoolingMKLDNNHandler ? dnnl::algorithm::pooling_max : (exclude_padding ? dnnl::algorithm::pooling_avg_exclude_padding : dnnl::algorithm::pooling_avg_include_padding), - src_md, dst_md, strides, ksize, mkldnn_paddings[0], mkldnn_paddings[1]); + in_x->mem_desc(), dst_md, strides, ksize, mkldnn_paddings[0], + mkldnn_paddings[1]); this->AcquireBackwardPrimitiveDescriptor( pooling_type == "max" ? dnnl::algorithm::pooling_max : (exclude_padding ? dnnl::algorithm::pooling_avg_exclude_padding : dnnl::algorithm::pooling_avg_include_padding), - diff_src_md, diff_dst_md, strides, ksize, mkldnn_paddings[0], + diff_src_md, out_grad->mem_desc(), strides, ksize, mkldnn_paddings[0], mkldnn_paddings[1]); } @@ -327,8 +294,7 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { } astream.wait(); - output->set_layout(DataLayout::kMKLDNN); - output->set_format(platform::GetMKLDNNFormat(*dst_memory)); + output->set_mem_desc(dst_memory->get_desc()); } }; @@ -369,8 +335,7 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel { } astream.wait(); - in_x_grad->set_layout(DataLayout::kMKLDNN); - in_x_grad->set_format(platform::GetMKLDNNFormat(*diff_src_memory)); + in_x_grad->set_mem_desc(diff_src_memory->get_desc()); } // Compute() }; diff --git a/paddle/fluid/operators/mkldnn/prelu_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/prelu_mkldnn_op.cc index 86ecb01c89..e459f8b8e1 100644 --- a/paddle/fluid/operators/mkldnn/prelu_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/prelu_mkldnn_op.cc @@ -41,9 +41,6 @@ class PReluMKLDNNHandler platform::CreateKey(dev_ctx, phi::vectorize(x->dims()), uniq_name)) { if (unlikely(!this->isCached())) { - auto x_md = memory::desc(phi::vectorize(x->dims()), - MKLDNNGetDataType(), x->format()); - auto weights_dims = phi::vectorize(weights->dims()); // weights must have same size as X only for "element" case @@ -59,30 +56,28 @@ class PReluMKLDNNHandler memory::format_tag::any); this->AcquireForwardPrimitiveDescriptor(dnnl::prop_kind::forward_training, - x_md, weights_md); + x->mem_desc(), weights_md); if (!is_test) - this->AcquireBackwardPrimitiveDescriptor(x_md, weights_md, x_md, - weights_md); + this->AcquireBackwardPrimitiveDescriptor(x->mem_desc(), weights_md, + x->mem_desc(), weights_md); } } std::shared_ptr AcquireWeightsMemoryPossiblyWithReorder( - const Tensor* input, const bool is_test) { - const T* input_data = input->data(); + const Tensor* weights, const bool is_test) { + const T* weights_data = weights->data(); // if weights are 1D, every format tag is correct, so we accept // format_tag::any's output and no reorder is needed - if (input->dims().size() == 1) { + if (weights->dims().size() == 1) { return this->AcquireMemoryFromPrimitive(this->fwd_pd_->weights_desc(), - to_void_cast(input_data), + to_void_cast(weights_data), "@alpha_mem_p"); } - auto user_weights_md = memory::desc( - phi::vectorize(input->dims()), MKLDNNGetDataType(), input->format()); return this->AcquireMemoryWithReorder( - user_weights_md, this->fwd_pd_->weights_desc(), - to_void_cast(input_data), "@alpha_mem_p", is_test); + weights->mem_desc(), this->fwd_pd_->weights_desc(), + to_void_cast(weights_data), "@alpha_mem_p", is_test); } std::shared_ptr AcquireDiffWeightsMemory(Tensor* output) { @@ -128,8 +123,7 @@ class PReluMKLDNNKernel : public framework::OpKernel { {DNNL_ARG_DST, *dst_memory_p}}); astream.wait(); - out->set_layout(framework::DataLayout::kMKLDNN); - out->set_format(GetMKLDNNFormat(*dst_memory_p)); + out->set_mem_desc(dst_memory_p->get_desc()); } }; @@ -174,8 +168,7 @@ class PReluGradMKLDNNKernel : public framework::OpKernel { {DNNL_ARG_DIFF_WEIGHTS, *diff_weights_memory_p}}); astream.wait(); - dx->set_layout(framework::DataLayout::kMKLDNN); - dx->set_format(GetMKLDNNFormat(*diff_src_memory_p)); + dx->set_mem_desc(diff_src_memory_p->get_desc()); } }; } // namespace operators diff --git a/paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc index d7b4574fb0..6139b3c9be 100644 --- a/paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc @@ -54,8 +54,7 @@ class ScaleMKLDNNKernel : public framework::OpKernel { {DNNL_ARG_TO, *dst_memory_p}}); astream.wait(); - out->set_layout(framework::DataLayout::kMKLDNN); - out->set_format(platform::GetMKLDNNFormat(*dst_memory_p)); + out->set_mem_desc(dst_memory_p->get_desc()); } }; } // namespace operators diff --git a/paddle/fluid/operators/mkldnn/shape_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/shape_mkldnn_op.cc index a3b764b0e1..f04c73ec0b 100644 --- a/paddle/fluid/operators/mkldnn/shape_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/shape_mkldnn_op.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/mkldnn_reuse.h" namespace paddle { namespace operators { @@ -40,9 +40,13 @@ class ShapeMKLDNNKernel : public framework::OpKernel { out_data[i] = in_dims[i]; } - auto* out = ctx.Output("Out"); - out->set_layout(framework::DataLayout::kMKLDNN); - out->set_format(platform::GetPlainMKLDNNFormat(out->dims().size())); + dnnl::memory::desc out_mem_desc( + phi::vectorize(out_t->dims()), + framework::ToMKLDNNDataType( + framework::TransToProtoVarType(out_t->dtype())), + platform::GetPlainMKLDNNFormat(out_t->dims().size())); + + out_t->set_mem_desc(out_mem_desc); } }; } // namespace operators diff --git a/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc index 408de57bf9..79b0692748 100644 --- a/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc @@ -29,11 +29,8 @@ class ShuffleChannelMKLDNNHandler : platform::MKLDNNHandlerNoCachingT(engine, cpu_place) { static constexpr int channel_axis = 1; - const auto md = dnnl::memory::desc(phi::vectorize(x->dims()), - MKLDNNGetDataType(), x->format()); - this->AcquireForwardPrimitiveDescriptor(dnnl::prop_kind::forward_training, - md, channel_axis, group); + x->mem_desc(), channel_axis, group); } }; @@ -64,8 +61,7 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel { {DNNL_ARG_DST, *dst_memory_p}}); astream.wait(); - out->set_layout(framework::DataLayout::kMKLDNN); - out->set_format(x->format()); + out->set_mem_desc(dst_memory_p->get_desc()); } }; } // namespace operators diff --git a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc index a0e50aa297..ef5d95dca3 100644 --- a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc @@ -47,12 +47,8 @@ class SoftmaxMKLDNNHandler platform::errors::InvalidArgument( "The shape of input and output tensor must be identical.")); - auto softmax_tz = phi::vectorize(input->dims()); - auto md = memory::desc(softmax_tz, platform::MKLDNNGetDataType(), - input->format()); - - this->AcquireForwardPrimitiveDescriptor(prop_kind::forward_scoring, md, - axis); + this->AcquireForwardPrimitiveDescriptor(prop_kind::forward_scoring, + input->mem_desc(), axis); } SoftmaxMKLDNNHandler(const framework::ExecutionContext& ctx, @@ -73,17 +69,11 @@ class SoftmaxMKLDNNHandler auto dims = out_grad->dims(); // input and output share the same shape const int axis = phi::funcs::CanonicalAxis(ctx.Attr("axis"), dims.size()); - auto softmax_tz = phi::vectorize(dims); - - auto data_softmax_md = MKLDNNMemDesc( - softmax_tz, platform::MKLDNNGetDataType(), out->format()); - auto diff_softmax_md = MKLDNNMemDesc( - softmax_tz, platform::MKLDNNGetDataType(), out_grad->format()); this->AcquireForwardPrimitiveDescriptor(prop_kind::forward_scoring, - data_softmax_md, axis); - this->AcquireBackwardPrimitiveDescriptor(diff_softmax_md, data_softmax_md, - axis); + out->mem_desc(), axis); + this->AcquireBackwardPrimitiveDescriptor(out_grad->mem_desc(), + out->mem_desc(), axis); } }; @@ -128,9 +118,7 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel { }); } - output->set_layout(framework::DataLayout::kMKLDNN); - // Softmax output format is the same as input one - output->set_format(input->format()); + output->set_mem_desc(softmax_dst_memory_p->get_desc()); } }; @@ -162,8 +150,7 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel { {DNNL_ARG_DIFF_SRC, *diff_src_memory_p}}); astream.wait(); - in_x_grad->set_layout(framework::DataLayout::kMKLDNN); - in_x_grad->set_format(platform::GetMKLDNNFormat(*diff_src_memory_p)); + in_x_grad->set_mem_desc(diff_src_memory_p->get_desc()); } }; } // namespace operators diff --git a/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h b/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h index 143038e738..b6111e99b6 100644 --- a/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h +++ b/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h @@ -29,12 +29,11 @@ class SoftplusMKLDNNHandler : platform::MKLDNNHandlerNoCachingT(engine, ctx.GetPlace()) { auto x_tz = phi::vectorize(x->dims()); - auto x_md = - dnnl::memory::desc(x_tz, platform::MKLDNNGetDataType(), x->format()); auto beta_tz = std::vector(x_tz.size(), 1); - auto beta_md = dnnl::memory::desc(beta_tz, platform::MKLDNNGetDataType(), - x->format()); + auto beta_md = + dnnl::memory::desc(beta_tz, platform::MKLDNNGetDataType(), + platform::GetPlainMKLDNNFormat(x_tz.size())); dnnl::post_ops post_ops; post_ops.append_eltwise(1.0f, dnnl::algorithm::eltwise_soft_relu, 0.0f, @@ -50,7 +49,8 @@ class SoftplusMKLDNNHandler attrs.set_post_ops(post_ops); this->AcquireForwardPrimitiveDescriptor(attrs, dnnl::algorithm::binary_mul, - x_md, beta_md, x_md); + x->mem_desc(), beta_md, + x->mem_desc()); } std::shared_ptr AcquireBetaMemory(const float* beta) { @@ -129,8 +129,7 @@ void custom_softplus_eltwise_forward(const framework::ExecutionContext& ctx) { binary_p->execute(astream, args); astream.wait(); - out->set_layout(framework::DataLayout::kMKLDNN); - out->set_format(platform::GetMKLDNNFormat(*dst_memory_p)); + out->set_mem_desc(dst_memory_p->get_desc()); } } // namespace operators } // namespace paddle diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py index b814eaed62..6229b7f559 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py @@ -28,18 +28,22 @@ class TestExpandV2OneDNNOp(OpTest): self.op_type = "expand_v2" self.init_data() self.x = np.random.random(self.ori_shape).astype("float32") - self.set_inputs() self.attrs = {'shape': self.shape, 'use_mkldnn': True} + self.set_inputs() + self.set_additional_inputs() output = np.tile(self.x, self.expand_times) self.outputs = {'Out': output} def set_inputs(self): self.inputs = {'X': self.x} + def set_additional_inputs(self): + pass + def init_data(self): - self.ori_shape = [1, 140] - self.shape = [12, 140] - self.expand_times = [12, 1] + self.ori_shape = [1, 1, 1, 140] + self.shape = [2, 3, 4, 140] + self.expand_times = [2, 3, 4, 1] def test_check_output(self): self.check_output_with_place(core.CPUPlace()) @@ -74,7 +78,7 @@ class TestExpandV2ExpandShapesTensor1OneDNNOp(TestExpandV2OneDNNOp): self.ori_shape = [100, 1] self.expand_times = [1, 2] self.expand_shape = [100, 2] - self.shape = [-1, -1] + self.shape = [100, 2] def calc_expand_shapes_tensor(self): self.expand_shapes_tensor = [] @@ -82,12 +86,9 @@ class TestExpandV2ExpandShapesTensor1OneDNNOp(TestExpandV2OneDNNOp): self.expand_shapes_tensor.append(("x" + str(index), np.ones( (1)).astype('int32') * ele)) - def set_inputs(self): + def set_additional_inputs(self): self.calc_expand_shapes_tensor() - self.inputs = { - 'X': self.x, - 'expand_shapes_tensor': self.expand_shapes_tensor - } + self.inputs['expand_shapes_tensor'] = self.expand_shapes_tensor class TestExpandV2ExpandShapesTensor2OneDNNOp( @@ -104,13 +105,10 @@ class TestExpandV2ShapesTensorOneDNNOp(TestExpandV2OneDNNOp): self.ori_shape = [100] self.expand_times = [2, 1] self.expand_shape = [2, 100] - self.shape = [-1, -1] + self.shape = [2, 100] - def set_inputs(self): - self.inputs = { - 'X': self.x, - 'Shape': np.array(self.expand_shape).astype("int32") - } + def set_additional_inputs(self): + self.inputs['Shape'] = np.array(self.expand_shape).astype("int32") # BF16 TESTS @@ -118,6 +116,7 @@ def create_expand_v2_bf16_test_class(parent): @OpTestTool.skip_if_not_cpu_bf16() class TestExpandV2BF16OneDNNOp(parent): def set_inputs(self): + self.attrs['mkldnn_data_type'] = 'bfloat16' self.inputs = {"X": convert_float_to_uint16(self.x)} def calculate_grads(self): -- GitLab