From 7db7a0ec0928d354c75dab4418f4eb602db3851c Mon Sep 17 00:00:00 2001 From: piotrekobiIntel Date: Wed, 24 Nov 2021 14:58:43 +0100 Subject: [PATCH] Changed second batch of deprecated mkldnn header and function names to new oneDNN names (#37351) * Add second batch of deprecated mkldnn namespace and macro changes * Unlock CI * Fix temporary namespace alias placing --- paddle/fluid/framework/tensor.h | 6 +- .../operators/mkldnn/activation_mkldnn_op.cc | 70 ++++--- .../operators/mkldnn/batch_norm_mkldnn_op.cc | 89 ++++----- .../fluid/operators/mkldnn/clip_mkldnn_op.cc | 16 +- .../operators/mkldnn/concat_mkldnn_op.cc | 18 +- .../fluid/operators/mkldnn/conv_mkldnn_op.cc | 184 +++++++++--------- .../mkldnn/conv_transpose_mkldnn_op.cc | 83 ++++---- paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc | 116 ++++++----- .../fluid/operators/mkldnn/lrn_mkldnn_op.cc | 72 +++---- .../operators/mkldnn/matmul_mkldnn_op.cc | 14 +- .../fluid/operators/mkldnn/mul_mkldnn_op.cc | 37 ++-- .../fluid/operators/mkldnn/pool_mkldnn_op.cc | 98 +++++----- .../fluid/operators/mkldnn/scale_mkldnn_op.cc | 7 +- .../operators/mkldnn/softmax_mkldnn_op.cc | 27 ++- .../fluid/operators/mkldnn/stack_mkldnn_op.cc | 16 +- .../fluid/operators/mkldnn/sum_mkldnn_op.cc | 28 +-- paddle/fluid/platform/device_context.cc | 6 +- paddle/fluid/platform/device_context.h | 17 +- paddle/fluid/platform/mkldnn_helper.h | 20 +- 19 files changed, 455 insertions(+), 469 deletions(-) diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index ec990213433..7f8d7bffa98 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -90,9 +90,9 @@ class Tensor { #ifdef PADDLE_WITH_MKLDNN public: - inline mkldnn::memory::format_tag format() const { return format_; } + inline dnnl::memory::format_tag format() const { return format_; } - inline void set_format(const mkldnn::memory::format_tag format) { + inline void set_format(const dnnl::memory::format_tag format) { format_ = format; } @@ -106,7 +106,7 @@ class Tensor { * this field. */ - mkldnn::memory::format_tag format_ = mkldnn::memory::format_tag::undef; + dnnl::memory::format_tag format_ = dnnl::memory::format_tag::undef; #endif public: diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc index 4bde641d2c1..9c5d03c17af 100644 --- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc @@ -30,9 +30,9 @@ namespace operators { using framework::DataLayout; using framework::Tensor; -using mkldnn::memory; -using mkldnn::primitive; -using mkldnn::stream; +using dnnl::memory; +using dnnl::primitive; +using dnnl::stream; using platform::GetMKLDNNFormat; using platform::MKLDNNDeviceContext; using platform::to_void_cast; @@ -75,7 +75,7 @@ class MKLDNNActivationGradKernel template void eltwise_forward(const framework::ExecutionContext &ctx, - mkldnn::algorithm algorithm) { + dnnl::algorithm algorithm) { PADDLE_ENFORCE_EQ(platform::is_cpu_place(ctx.GetPlace()), true, paddle::platform::errors::PreconditionNotMet( "Operator DNNL eletwise_forward must use CPUPlace")); @@ -101,8 +101,8 @@ void eltwise_forward(const framework::ExecutionContext &ctx, auto activation_p = handler.AcquireForwardPrimitive(); auto &astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream(); - activation_p->execute(astream, {{MKLDNN_ARG_FROM, *src_memory_p}, - {MKLDNN_ARG_TO, *dst_memory_p}}); + activation_p->execute( + astream, {{DNNL_ARG_FROM, *src_memory_p}, {DNNL_ARG_TO, *dst_memory_p}}); astream.wait(); y->set_layout(DataLayout::kMKLDNN); @@ -111,7 +111,7 @@ void eltwise_forward(const framework::ExecutionContext &ctx, template void eltwise_grad(const framework::ExecutionContext &ctx, - mkldnn::algorithm algorithm) { + dnnl::algorithm algorithm) { auto &dev_ctx = ctx.template device_context(); const auto &mkldnn_engine = dev_ctx.GetEngine(); @@ -129,23 +129,23 @@ void eltwise_grad(const framework::ExecutionContext &ctx, auto &astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream(); activation_backward_p->execute(astream, - {{MKLDNN_ARG_SRC, *src_memory_p}, - {MKLDNN_ARG_DIFF_DST, *diff_dst_memory_p}, - {MKLDNN_ARG_DIFF_SRC, *diff_src_memory_p}}); + {{DNNL_ARG_SRC, *src_memory_p}, + {DNNL_ARG_DIFF_DST, *diff_dst_memory_p}, + {DNNL_ARG_DIFF_SRC, *diff_src_memory_p}}); astream.wait(); diff_x->set_layout(DataLayout::kMKLDNN); diff_x->set_format(GetMKLDNNFormat(*diff_src_memory_p)); } -template +template struct MKLDNNActivationFunc : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { eltwise_forward(ctx, algorithm); } }; -template +template struct MKLDNNActivationGradFunc : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { eltwise_grad(ctx, algorithm); @@ -157,9 +157,9 @@ struct GeluMKLDNNFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { const bool approximate = ctx.Attr("approximate"); if (approximate) { - eltwise_forward(ctx, mkldnn::algorithm::eltwise_gelu_tanh); + eltwise_forward(ctx, dnnl::algorithm::eltwise_gelu_tanh); } else { - eltwise_forward(ctx, mkldnn::algorithm::eltwise_gelu_erf); + eltwise_forward(ctx, dnnl::algorithm::eltwise_gelu_erf); } } }; @@ -169,9 +169,9 @@ struct GeluMKLDNNGradFunctor : public BaseActivationFunctor { void operator()(const framework::ExecutionContext &ctx) const { const bool approximate = ctx.Attr("approximate"); if (approximate) { - eltwise_grad(ctx, mkldnn::algorithm::eltwise_gelu_tanh); + eltwise_grad(ctx, dnnl::algorithm::eltwise_gelu_tanh); } else { - eltwise_grad(ctx, mkldnn::algorithm::eltwise_gelu_erf); + eltwise_grad(ctx, dnnl::algorithm::eltwise_gelu_erf); } } }; @@ -185,75 +185,73 @@ struct SoftplusMKLDNNFunctor : public BaseActivationFunctor { template using ReluMKLDNNFunctor = - MKLDNNActivationFunc; + MKLDNNActivationFunc; template using Relu6MKLDNNFunctor = - MKLDNNActivationFunc; + MKLDNNActivationFunc; template using SwishMKLDNNFunctor = - MKLDNNActivationFunc; + MKLDNNActivationFunc; template using HardSwishMKLDNNFunctor = - MKLDNNActivationFunc; + MKLDNNActivationFunc; template using SigmoidMKLDNNFunctor = - MKLDNNActivationFunc; + MKLDNNActivationFunc; template using TanhMKLDNNFunctor = - MKLDNNActivationFunc; + MKLDNNActivationFunc; template using SqrtMKLDNNFunctor = - MKLDNNActivationFunc; + MKLDNNActivationFunc; template -using AbsMKLDNNFunctor = - MKLDNNActivationFunc; +using AbsMKLDNNFunctor = MKLDNNActivationFunc; template -using EluMKLDNNFunctor = - MKLDNNActivationFunc; +using EluMKLDNNFunctor = MKLDNNActivationFunc; template using ReluMKLDNNGradFunctor = - MKLDNNActivationGradFunc; + MKLDNNActivationGradFunc; template using Relu6MKLDNNGradFunctor = - MKLDNNActivationGradFunc; + MKLDNNActivationGradFunc; template using SwishMKLDNNGradFunctor = - MKLDNNActivationGradFunc; + MKLDNNActivationGradFunc; template using HardSwishMKLDNNGradFunctor = - MKLDNNActivationGradFunc; + MKLDNNActivationGradFunc; template using SigmoidMKLDNNGradFunctor = - MKLDNNActivationGradFunc; + MKLDNNActivationGradFunc; template using TanhMKLDNNGradFunctor = - MKLDNNActivationGradFunc; + MKLDNNActivationGradFunc; template using SqrtMKLDNNGradFunctor = - MKLDNNActivationGradFunc; + MKLDNNActivationGradFunc; template using AbsMKLDNNGradFunctor = - MKLDNNActivationGradFunc; + MKLDNNActivationGradFunc; template using EluMKLDNNGradFunctor = - MKLDNNActivationGradFunc; + MKLDNNActivationGradFunc; } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc index bd910e30672..bf95ffdc11e 100644 --- a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc @@ -27,24 +27,23 @@ class MKLDNNDeviceContext; namespace paddle { namespace operators { -using mkldnn::memory; -using mkldnn::primitive; -using mkldnn::reorder; -using mkldnn::stream; +using dnnl::memory; +using dnnl::primitive; +using dnnl::reorder; +using dnnl::stream; using paddle::platform::MKLDNNDeviceContext; using platform::to_void_cast; template class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT< - T, mkldnn::batch_normalization_forward, - mkldnn::batch_normalization_backward> { + T, dnnl::batch_normalization_forward, + dnnl::batch_normalization_backward> { public: BatchNormMKLDNNHandler(const paddle::framework::ExecutionContext &ctx, - const mkldnn::engine mkldnn_engine, const Tensor *x, + const dnnl::engine mkldnn_engine, const Tensor *x, const bool global_stats, const bool test_mode) - : platform::MKLDNNHandlerNoCachingT( + : platform::MKLDNNHandlerNoCachingT( mkldnn_engine, ctx.GetPlace()) { const float epsilon = ctx.Attr("epsilon"); const bool fuse_with_relu = ctx.HasAttr("fuse_with_relu") @@ -66,28 +65,27 @@ class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT< auto src_tz = paddle::framework::vectorize(x->dims()); // Flags are added by bitwise OR operation - auto flags = mkldnn::normalization_flags::use_scale_shift; // 001 + auto flags = dnnl::normalization_flags::use_scale_shift; // 001 if (global_stats) - flags |= mkldnn::normalization_flags::use_global_stats; // 010 + flags |= dnnl::normalization_flags::use_global_stats; // 010 if (fuse_with_relu && test_mode) - flags |= mkldnn::normalization_flags::fuse_norm_relu; // 100 + flags |= dnnl::normalization_flags::fuse_norm_relu; // 100 - auto md = mkldnn::memory::desc( + auto md = dnnl::memory::desc( src_tz, platform::MKLDNNGetDataType(), platform::MKLDNNFormatForSize(src_tz.size(), x->format())); this->AcquireForwardPrimitiveDescriptor( - global_stats == true ? mkldnn::prop_kind::forward_scoring - : mkldnn::prop_kind::forward_training, + global_stats == true ? dnnl::prop_kind::forward_scoring + : dnnl::prop_kind::forward_training, md, epsilon, flags); } BatchNormMKLDNNHandler(const paddle::framework::ExecutionContext &ctx, - const mkldnn::engine mkldnn_engine, const Tensor *in_x, + const dnnl::engine mkldnn_engine, const Tensor *in_x, const Tensor *scale, const Tensor *out_grad) - : platform::MKLDNNHandlerNoCachingT( + : platform::MKLDNNHandlerNoCachingT( mkldnn_engine, ctx.GetPlace()) { PADDLE_ENFORCE_EQ(out_grad->layout(), DataLayout::kMKLDNN, platform::errors::InvalidArgument( @@ -112,22 +110,22 @@ class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT< auto dims = framework::vectorize(in_x->dims()); auto diff_dst_md = - mkldnn::memory::desc(dims, platform::MKLDNNGetDataType(), diff_fmt); + dnnl::memory::desc(dims, platform::MKLDNNGetDataType(), diff_fmt); auto src_md = - mkldnn::memory::desc(dims, platform::MKLDNNGetDataType(), src_fmt); + dnnl::memory::desc(dims, platform::MKLDNNGetDataType(), src_fmt); const float epsilon = ctx.Attr("epsilon"); this->AcquireForwardPrimitiveDescriptor( - mkldnn::prop_kind::forward_training, src_md, epsilon, - mkldnn::normalization_flags::use_scale_shift); + dnnl::prop_kind::forward_training, src_md, epsilon, + dnnl::normalization_flags::use_scale_shift); this->AcquireBackwardPrimitiveDescriptor( - mkldnn::prop_kind::backward, diff_dst_md, src_md, epsilon, - mkldnn::normalization_flags::use_scale_shift); + dnnl::prop_kind::backward, diff_dst_md, src_md, epsilon, + dnnl::normalization_flags::use_scale_shift); } - std::shared_ptr AcquireScaleShiftMemory(const Tensor *scale, - const Tensor *shift) { + std::shared_ptr AcquireScaleShiftMemory(const Tensor *scale, + const Tensor *shift) { auto scale_tz = paddle::framework::vectorize(scale->dims()); const unsigned int C = scale_tz[0]; PADDLE_ENFORCE_EQ( @@ -147,34 +145,34 @@ class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT< return scaleshift_memory; } - std::shared_ptr AcquireDiffScaleShiftMemory( + std::shared_ptr AcquireDiffScaleShiftMemory( T *diff_scaleshift_data) { return this->AcquireMemoryFromPrimitive(this->bwd_pd_->diff_weights_desc(), diff_scaleshift_data); } - std::shared_ptr AcquireMeanMemory( + std::shared_ptr AcquireMeanMemory( const framework::Tensor *mean) { const T *mean_data = mean->data(); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->mean_desc(), to_void_cast(mean_data)); } - std::shared_ptr AcquireMeanMemory(framework::Tensor *mean) { + std::shared_ptr AcquireMeanMemory(framework::Tensor *mean) { T *mean_data = mean->mutable_data(this->place_, this->fwd_pd_->mean_desc().get_size()); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->mean_desc(), mean_data); } - std::shared_ptr AcquireVarianceMemory( + std::shared_ptr AcquireVarianceMemory( const framework::Tensor *variance) { const T *variance_data = variance->data(); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->variance_desc(), to_void_cast(variance_data)); } - std::shared_ptr AcquireVarianceMemory( + std::shared_ptr AcquireVarianceMemory( framework::Tensor *variance) { T *variance_data = variance->mutable_data( this->place_, this->fwd_pd_->variance_desc().get_size()); @@ -233,12 +231,11 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel { y->set_format(platform::GetMKLDNNFormat(*dst_memory)); auto &astream = platform::MKLDNNDeviceContext::tls().get_stream(); - batch_norm_p->execute(astream, - {{MKLDNN_ARG_SRC, *src_memory}, - {MKLDNN_ARG_SCALE_SHIFT, *scaleshift_memory}, - {MKLDNN_ARG_MEAN, *mean_memory}, - {MKLDNN_ARG_VARIANCE, *variance_memory}, - {MKLDNN_ARG_DST, *dst_memory}}); + batch_norm_p->execute(astream, {{DNNL_ARG_SRC, *src_memory}, + {DNNL_ARG_SCALE_SHIFT, *scaleshift_memory}, + {DNNL_ARG_MEAN, *mean_memory}, + {DNNL_ARG_VARIANCE, *variance_memory}, + {DNNL_ARG_DST, *dst_memory}}); astream.wait(); if (!global_stats) { @@ -307,13 +304,13 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel { auto &astream = platform::MKLDNNDeviceContext::tls().get_stream(); batch_norm_bwd_p->execute( - astream, {{MKLDNN_ARG_SRC, *src_memory}, - {MKLDNN_ARG_MEAN, *mean_memory}, - {MKLDNN_ARG_VARIANCE, *variance_memory}, - {MKLDNN_ARG_DIFF_DST, *diff_dst_memory}, - {MKLDNN_ARG_SCALE_SHIFT, *scaleshift_memory}, - {MKLDNN_ARG_DIFF_SRC, *diff_src_memory}, - {MKLDNN_ARG_DIFF_SCALE_SHIFT, *diff_scaleshift_memory}}); + astream, {{DNNL_ARG_SRC, *src_memory}, + {DNNL_ARG_MEAN, *mean_memory}, + {DNNL_ARG_VARIANCE, *variance_memory}, + {DNNL_ARG_DIFF_DST, *diff_dst_memory}, + {DNNL_ARG_SCALE_SHIFT, *scaleshift_memory}, + {DNNL_ARG_DIFF_SRC, *diff_src_memory}, + {DNNL_ARG_DIFF_SCALE_SHIFT, *diff_scaleshift_memory}}); astream.wait(); T *diff_scale_data = diff_scale->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/mkldnn/clip_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/clip_mkldnn_op.cc index 94c3700da8c..83ccd80e171 100644 --- a/paddle/fluid/operators/mkldnn/clip_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/clip_mkldnn_op.cc @@ -34,7 +34,7 @@ class ClipMKLDNNKernel : public paddle::framework::OpKernel { auto* out = ctx.Output("Out"); paddle::platform::ActivationMKLDNNHandler handler( - mkldnn::algorithm::eltwise_clip_v2, ctx, mkldnn_engine, ctx.GetPlace(), + dnnl::algorithm::eltwise_clip_v2, ctx, mkldnn_engine, ctx.GetPlace(), x); auto src_memory_p = handler.AcquireSrcMemory(x); @@ -42,8 +42,8 @@ class ClipMKLDNNKernel : public paddle::framework::OpKernel { auto activation_p = handler.AcquireForwardPrimitive(); auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream(); - activation_p->execute(astream, {{MKLDNN_ARG_FROM, *src_memory_p}, - {MKLDNN_ARG_TO, *dst_memory_p}}); + activation_p->execute(astream, {{DNNL_ARG_FROM, *src_memory_p}, + {DNNL_ARG_TO, *dst_memory_p}}); astream.wait(); out->set_layout(paddle::framework::DataLayout::kMKLDNN); @@ -68,8 +68,8 @@ class ClipGradMKLDNNKernel : public paddle::framework::OpKernel { auto* dout = ctx.Input(paddle::framework::GradVarName("Out")); paddle::platform::ActivationMKLDNNHandler handler( - mkldnn::algorithm::eltwise_clip_v2, ctx, mkldnn_engine, ctx.GetPlace(), - x, dout); + dnnl::algorithm::eltwise_clip_v2, ctx, mkldnn_engine, ctx.GetPlace(), x, + dout); auto src_memory_p = handler.AcquireBackwardSrcMemory(x); auto diff_dst_memory_p = handler.AcquireDiffDstMemory(dout); @@ -78,9 +78,9 @@ class ClipGradMKLDNNKernel : public paddle::framework::OpKernel { auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream(); activation_backward_p->execute(astream, - {{MKLDNN_ARG_SRC, *src_memory_p}, - {MKLDNN_ARG_DIFF_DST, *diff_dst_memory_p}, - {MKLDNN_ARG_DIFF_SRC, *diff_src_memory_p}}); + {{DNNL_ARG_SRC, *src_memory_p}, + {DNNL_ARG_DIFF_DST, *diff_dst_memory_p}, + {DNNL_ARG_DIFF_SRC, *diff_src_memory_p}}); astream.wait(); dx->set_layout(paddle::framework::DataLayout::kMKLDNN); diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index 4cc96a48bd2..cd92aaf0a10 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -24,10 +24,10 @@ namespace operators { using framework::DataLayout; using framework::Tensor; using framework::LoDTensor; -using mkldnn::memory; -using mkldnn::primitive; -using mkldnn::concat; -using mkldnn::stream; +using dnnl::memory; +using dnnl::primitive; +using dnnl::concat; +using dnnl::stream; using platform::to_void_cast; template @@ -35,7 +35,7 @@ class ConcatMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT { public: ConcatMKLDNNHandler(const framework::ExecutionContext& ctx, - const mkldnn::engine mkldnn_engine, + const dnnl::engine mkldnn_engine, const std::vector& inputs, Tensor* output) : platform::MKLDNNHandlerNoCachingT(mkldnn_engine, ctx.GetPlace()) { @@ -86,7 +86,7 @@ class ConcatMKLDNNHandler dst_md, concat_axis, srcs_md, this->engine_)); } - std::shared_ptr AcquireSrcMemory(const Tensor& input, int i) { + std::shared_ptr AcquireSrcMemory(const Tensor& input, int i) { const T* input_data = input.data(); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src_desc(i), to_void_cast(input_data)); @@ -139,9 +139,9 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel { std::unordered_map args; for (size_t i = 0; i < multi_input.size(); ++i) { srcs.push_back(handler.AcquireSrcMemory(*(multi_input[i]), i)); - args.insert({MKLDNN_ARG_MULTIPLE_SRC + i, *(srcs.at(i))}); + args.insert({DNNL_ARG_MULTIPLE_SRC + i, *(srcs.at(i))}); } - args.insert({MKLDNN_ARG_DST, *dst_mem}); + args.insert({DNNL_ARG_DST, *dst_mem}); concat_p->execute(astream, args); astream.wait(); @@ -185,7 +185,7 @@ class ConcatGradMKLDNNOpKernel : public paddle::framework::OpKernel { std::vector offset(dout_vec_dims.size(), 0); - mkldnn::memory::data_type dout_type = + dnnl::memory::data_type dout_type = framework::ToMKLDNNDataType(dout->type()); platform::ReorderMKLDNNHandler reorder_handler(dout_vec_dims, dout->type(), dout_type, onednn_engine); diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc index fa2428458e5..d499b273885 100644 --- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc @@ -33,18 +33,18 @@ inline MKLDNNMemoryFormat GetWeightsFormat(const MKLDNNMemoryFormat format, } } -static mkldnn::memory::data_type GetDstType(bool is_int8, bool is_bfloat16, - bool force_fp32_output, - std::string fuse_activation, - bool fuse_residual_conn, - const Tensor* residual_param) { - auto dst_dt = mkldnn::memory::data_type::f32; +static dnnl::memory::data_type GetDstType(bool is_int8, bool is_bfloat16, + bool force_fp32_output, + std::string fuse_activation, + bool fuse_residual_conn, + const Tensor* residual_param) { + auto dst_dt = dnnl::memory::data_type::f32; if (is_int8) { dst_dt = (fuse_activation == "relu" || fuse_activation == "relu6") - ? mkldnn::memory::data_type::u8 - : mkldnn::memory::data_type::s8; + ? dnnl::memory::data_type::u8 + : dnnl::memory::data_type::s8; if (force_fp32_output) { - dst_dt = mkldnn::memory::data_type::f32; + dst_dt = dnnl::memory::data_type::f32; } if (fuse_residual_conn && residual_param) { auto residual_dt = framework::ToMKLDNNDataType(residual_param->type()); @@ -52,7 +52,7 @@ static mkldnn::memory::data_type GetDstType(bool is_int8, bool is_bfloat16, } } else { if (!force_fp32_output && is_bfloat16) { - dst_dt = mkldnn::memory::data_type::bf16; + dst_dt = dnnl::memory::data_type::bf16; if (fuse_residual_conn && residual_param) { dst_dt = framework::ToMKLDNNDataType(residual_param->type()); } @@ -63,19 +63,19 @@ static mkldnn::memory::data_type GetDstType(bool is_int8, bool is_bfloat16, template class ConvMKLDNNHandlerT - : public platform::MKLDNNHandlerT { + : public platform::MKLDNNHandlerT { public: ConvMKLDNNHandlerT(const framework::ExecutionContext& ctx, const platform::MKLDNNDeviceContext& dev_ctx, - const mkldnn::engine mkldnn_engine, + const dnnl::engine mkldnn_engine, platform::Place cpu_place, const Tensor* input, const Tensor* filter, const Tensor* bias, Tensor* output, const std::string& unique_name) - : platform::MKLDNNHandlerT( + : platform::MKLDNNHandlerT( dev_ctx, mkldnn_engine, cpu_place, platform::CreateKey(dev_ctx, framework::vectorize(input->dims()), unique_name)) { @@ -184,27 +184,27 @@ class ConvMKLDNNHandlerT const auto dst_tz = framework::vectorize(output->dims()); - const mkldnn::memory::dims stride_dims = strides; + const dnnl::memory::dims stride_dims = strides; const auto mkldnn_paddings = platform::ToMkldnnPadding(paddings); - const mkldnn::memory::dims dilations_dims = dilations; + const dnnl::memory::dims dilations_dims = dilations; /* create memory descriptor for convolution without specified format * ('any') which lets a primitive (convolution in this case) choose * the memory format preferred for best performance */ auto chosen_memory_format = MKLDNNMemoryFormat::any; - auto data_type = mkldnn::memory::data_type::f32; + auto data_type = dnnl::memory::data_type::f32; if (ctx.Attr("mkldnn_data_type") == "bfloat16" || std::is_same::value) - data_type = mkldnn::memory::data_type::bf16; + data_type = dnnl::memory::data_type::bf16; - mkldnn::memory::desc src_md, weights_md; + dnnl::memory::desc src_md, weights_md; if (platform::is_int8()) { src_md = platform::MKLDNNMemDesc( src_tz, framework::ToMKLDNNDataType(input->type()), chosen_memory_format); weights_md = platform::MKLDNNMemDesc( - weights_tz, mkldnn::memory::data_type::s8, chosen_memory_format); + weights_tz, dnnl::memory::data_type::s8, chosen_memory_format); } else { src_md = platform::MKLDNNMemDesc(src_tz, data_type, chosen_memory_format); @@ -214,24 +214,24 @@ class ConvMKLDNNHandlerT const auto dst_md = platform::MKLDNNMemDesc( dst_tz, platform::MKLDNNGetDataType(), chosen_memory_format); - const auto fwd_prop_kind = is_test ? mkldnn::prop_kind::forward_inference - : mkldnn::prop_kind::forward_training; + const auto fwd_prop_kind = is_test ? dnnl::prop_kind::forward_inference + : dnnl::prop_kind::forward_training; float sum_scale = 1.0f; std::vector output_shift_scale; if (platform::is_int8()) std::tie(sum_scale, output_shift_scale) = get_int8_scales(ctx); - const mkldnn::primitive_attr conv_attr = CreatePostOps( + const dnnl::primitive_attr conv_attr = CreatePostOps( fuse_activation, fuse_alpha, fuse_beta, fuse_residual_conn, output_shift_scale, sum_scale); // for INT8 only! if (bias) { auto bias_tz = framework::vectorize(bias->dims()); - mkldnn::memory::desc bias_md; + dnnl::memory::desc bias_md; if (platform::is_int8()) { bias_md = platform::MKLDNNMemDesc( - bias_tz, mkldnn::memory::data_type::s32, MKLDNNMemoryFormat::x); + bias_tz, dnnl::memory::data_type::s32, MKLDNNMemoryFormat::x); } else { bias_md = platform::MKLDNNMemDesc(bias_tz, data_type, MKLDNNMemoryFormat::x); @@ -256,9 +256,9 @@ class ConvMKLDNNHandlerT const Tensor* filter, const Tensor* bias, const Tensor* out_grad, Tensor* filter_grad, Tensor* in_x_grad, const std::string& unique_name) - : platform::MKLDNNHandlerT( + : platform::MKLDNNHandlerT( dev_ctx, dev_ctx.GetEngine(), cpu_place, platform::CreateKey(dev_ctx, framework::vectorize(in->dims()), unique_name)) { @@ -348,42 +348,42 @@ class ConvMKLDNNHandlerT auto mkldnn_paddings = platform::ToMkldnnPadding(paddings); std::transform(dilations.begin(), dilations.end(), dilations.begin(), [](int64_t i) { return i - 1; }); - const mkldnn::memory::dims dilations_dims = dilations; + const dnnl::memory::dims dilations_dims = dilations; - const mkldnn::memory::dims stride_dims = strides; + const dnnl::memory::dims stride_dims = strides; // Recreating FWD PD. For training there are no post ops in convolution - mkldnn::primitive_attr conv_attr; + dnnl::primitive_attr conv_attr; if (bias) { auto bias_tz = framework::vectorize(bias->dims()); - mkldnn::memory::desc bias_md; + dnnl::memory::desc bias_md; if (platform::is_int8()) { bias_md = platform::MKLDNNMemDesc( - bias_tz, mkldnn::memory::data_type::s32, MKLDNNMemoryFormat::x); + bias_tz, dnnl::memory::data_type::s32, MKLDNNMemoryFormat::x); } else { bias_md = platform::MKLDNNMemDesc( - bias_tz, mkldnn::memory::data_type::f32, MKLDNNMemoryFormat::x); + bias_tz, dnnl::memory::data_type::f32, MKLDNNMemoryFormat::x); } this->AcquireForwardPrimitiveDescriptor( - conv_attr, mkldnn::prop_kind::forward_training, + conv_attr, dnnl::prop_kind::forward_training, dnnl::algorithm::convolution_direct, src_md, weights_md, bias_md, dst_md, stride_dims, dilations_dims, mkldnn_paddings[0], mkldnn_paddings[1]); } else { this->AcquireForwardPrimitiveDescriptor( - conv_attr, mkldnn::prop_kind::forward_training, + conv_attr, dnnl::prop_kind::forward_training, dnnl::algorithm::convolution_direct, src_md, weights_md, dst_md, stride_dims, dilations_dims, mkldnn_paddings[0], mkldnn_paddings[1]); } this->AcquireBackwardPrimitiveDescriptor( - mkldnn::algorithm::convolution_direct, diff_src_md, weights_md, + dnnl::algorithm::convolution_direct, diff_src_md, weights_md, diff_dst_md, strides, dilations_dims, mkldnn_paddings[0], mkldnn_paddings[1]); this->AcquireBackwardWeightsPrimitiveDescriptor( - mkldnn::algorithm::convolution_direct, src_md, diff_weights_md, + dnnl::algorithm::convolution_direct, src_md, diff_weights_md, diff_dst_md, strides, dilations_dims, mkldnn_paddings[0], mkldnn_paddings[1]); } @@ -471,12 +471,12 @@ class ConvMKLDNNHandlerT return std::make_tuple(sum_scale, output_shift_scale); } - mkldnn::primitive_attr CreatePostOps( + dnnl::primitive_attr CreatePostOps( std::string fuse_activation, float fuse_alpha, float fuse_beta, bool fuse_residual_conn, const std::vector output_shift_scale = {}, float sum_scale = 1.0f) { - mkldnn::primitive_attr conv_attr; - mkldnn::post_ops post_operations; + dnnl::primitive_attr conv_attr; + dnnl::post_ops post_operations; if (output_shift_scale.size() > 0) { int mask = output_shift_scale.size() > 1 ? 1 << 1 : 0; conv_attr.set_output_scales(mask, output_shift_scale); @@ -494,29 +494,28 @@ class ConvMKLDNNHandlerT // PostOps object and configure it to execute an eltwise relu operation. constexpr float scale = 1.0f; if (fuse_activation == "relu" || fuse_activation == "leaky_relu") { - post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_relu, + post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_relu, fuse_alpha, fuse_beta); } else if (fuse_activation == "relu6") { - post_operations.append_eltwise(scale, - mkldnn::algorithm::eltwise_bounded_relu, - fuse_alpha, fuse_beta); + post_operations.append_eltwise( + scale, dnnl::algorithm::eltwise_bounded_relu, fuse_alpha, fuse_beta); } else if (fuse_activation == "swish") { - post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_swish, + post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_swish, fuse_alpha, fuse_beta); } else if (fuse_activation == "hard_swish") { - post_operations.append_eltwise( - scale, mkldnn::algorithm::eltwise_hardswish, fuse_alpha, fuse_beta); + post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_hardswish, + fuse_alpha, fuse_beta); } else if (fuse_activation == "hard_sigmoid") { - post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_linear, + post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_linear, fuse_alpha, fuse_beta); - post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_clip, - 0.0f, 1.0f); + post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_clip, 0.0f, + 1.0f); } conv_attr.set_post_ops(post_operations); return conv_attr; } - std::shared_ptr + std::shared_ptr AcquireWeightsMemoryWithReorderFromDataPrimitive( const framework::Tensor* filter, const int groups, const bool is_conv3d) { const K* filter_data = filter->data(); @@ -532,22 +531,21 @@ class ConvMKLDNNHandlerT platform::to_void_cast(filter_data), "@weights_mem_d_p", false); } - std::shared_ptr AcquireSrcMemoryWithReorder( + std::shared_ptr AcquireSrcMemoryWithReorder( const framework::Tensor* input) { return this->AcquireMemoryWithReorderPrimitive( input, "@src_mem_p_user", "@src_mem_p_target", "@src_mem_p", this->fwd_pd_->src_desc()); } - std::shared_ptr - AcquireSrcMemoryWithReorderFromWeightsPrimitive( + std::shared_ptr AcquireSrcMemoryWithReorderFromWeightsPrimitive( const framework::Tensor* input) { return this->AcquireMemoryWithReorderPrimitive( input, "@src_mem_w_p_user", "@src_mem_w_p_target", "@src_mem_w_p", this->bwd_w_pd_->src_desc()); } - std::shared_ptr + std::shared_ptr AcquireDiffDstMemoryWithReorderFromWeightsPrimitive( const framework::Tensor* out_grad) { return this->AcquireMemoryWithReorderPrimitive( @@ -555,7 +553,7 @@ class ConvMKLDNNHandlerT "@diff_dst_mem_w_p", this->bwd_w_pd_->diff_dst_desc()); } - std::shared_ptr + std::shared_ptr AcquireDiffDstMemoryWithReorderMemoryFromDataPrimitive( const framework::Tensor* out_grad) { return this->AcquireMemoryWithReorderPrimitive( @@ -563,10 +561,10 @@ class ConvMKLDNNHandlerT "@diff_dst_mem_p", this->bwd_pd_->diff_dst_desc()); } - std::shared_ptr AcquireMemoryWithReorderPrimitive( + std::shared_ptr AcquireMemoryWithReorderPrimitive( const framework::Tensor* in_mem, const char* key_mem_user, const char* key_mem_target, const char* key_mem, - const mkldnn::memory::desc& mem_md) { + const dnnl::memory::desc& mem_md) { const T* in_mem_data = in_mem->data(); const std::string user_key_suffix{key_mem_user}; auto user_mem_p = this->AcquireMemory(user_key_suffix); @@ -588,7 +586,7 @@ class ConvMKLDNNHandlerT } } - std::shared_ptr AcquireWeightsMemoryWithReorder( + std::shared_ptr AcquireWeightsMemoryWithReorder( const framework::Tensor* filter, const int groups, const bool is_conv3d, const bool is_test, const std::vector& scale_data = {1.0f}, int mask = 0) { @@ -613,7 +611,7 @@ class ConvMKLDNNHandlerT } } - std::shared_ptr AcquireBiasMemoryWithReorder( + std::shared_ptr AcquireBiasMemoryWithReorder( const framework::Tensor* bias, const bool is_test, const std::vector& scale_data = {1.0f}, int mask = 0) { auto bias_mem_p = this->AcquireMemory("@bias_mem_p_target"); @@ -632,7 +630,7 @@ class ConvMKLDNNHandlerT } } - std::shared_ptr AcquireResidualMemory( + std::shared_ptr AcquireResidualMemory( const framework::Tensor* residual_param) { void* residual_data = residual_param->type() == framework::DataTypeTrait::DataType() @@ -653,7 +651,7 @@ class ConvMKLDNNHandlerT } } - std::shared_ptr AcquireDstMemoryWithResidual( + std::shared_ptr AcquireDstMemoryWithResidual( framework::Tensor* output, const framework::Tensor* residual_param) { std::shared_ptr dst_memory_p; if (residual_param->format() != @@ -692,17 +690,17 @@ class ConvMKLDNNOpKernel : public framework::OpKernel { GetDstType(is_INT8, is_BFLOAT16, force_fp32_output, fuse_activation, fuse_residual_conn, residual_param); if (!is_INT8) { - if (dst_dt == mkldnn::memory::data_type::f32) { + if (dst_dt == dnnl::memory::data_type::f32) { ComputeFP32(ctx); - } else if (dst_dt == mkldnn::memory::data_type::bf16) { + } else if (dst_dt == dnnl::memory::data_type::bf16) { ComputeFP32(ctx); } } else { - if (dst_dt == mkldnn::memory::data_type::f32) { + if (dst_dt == dnnl::memory::data_type::f32) { ComputeINT8(ctx); - } else if (dst_dt == mkldnn::memory::data_type::u8) { + } else if (dst_dt == dnnl::memory::data_type::u8) { ComputeINT8(ctx); - } else if (dst_dt == mkldnn::memory::data_type::s8) { + } else if (dst_dt == dnnl::memory::data_type::s8) { ComputeINT8(ctx); } } @@ -745,13 +743,13 @@ class ConvMKLDNNOpKernel : public framework::OpKernel { auto conv_p = handler.AcquireForwardPrimitive(); std::unordered_map args = { - {MKLDNN_ARG_SRC, *src_memory_p}, - {MKLDNN_ARG_WEIGHTS, *weights_memory_p}, - {MKLDNN_ARG_DST, *dst_memory_p}}; + {DNNL_ARG_SRC, *src_memory_p}, + {DNNL_ARG_WEIGHTS, *weights_memory_p}, + {DNNL_ARG_DST, *dst_memory_p}}; if (bias) { auto bias_memory_p = handler.AcquireBiasMemoryWithReorder(bias, is_test); - args.insert({MKLDNN_ARG_BIAS, *bias_memory_p}); + args.insert({DNNL_ARG_BIAS, *bias_memory_p}); } auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); @@ -821,7 +819,7 @@ class ConvMKLDNNOpKernel : public framework::OpKernel { dst_memory_p = handler.AcquireDstMemoryWithResidual(output, residual_param); need_s8_to_u8 = (platform::MKLDNNGetDataType() == - mkldnn::memory::data_type::s8) && + dnnl::memory::data_type::s8) && unsigned_output; } else { dst_memory_p = handler.template AcquireDstMemory(output); @@ -830,9 +828,9 @@ class ConvMKLDNNOpKernel : public framework::OpKernel { auto conv_p = handler.AcquireForwardPrimitive(); std::unordered_map args = { - {MKLDNN_ARG_SRC, *src_memory_p}, - {MKLDNN_ARG_WEIGHTS, *weights_memory_p}, - {MKLDNN_ARG_DST, *dst_memory_p}}; + {DNNL_ARG_SRC, *src_memory_p}, + {DNNL_ARG_WEIGHTS, *weights_memory_p}, + {DNNL_ARG_DST, *dst_memory_p}}; if (bias) { auto p_scales_tuple = handler.get_int8_bias_scales(ctx); @@ -840,7 +838,7 @@ class ConvMKLDNNOpKernel : public framework::OpKernel { auto bias_memory_p = handler.AcquireBiasMemoryWithReorder( bias, is_test, std::get<1>(*p_scales_tuple), std::get<0>(*p_scales_tuple)); - args.insert({MKLDNN_ARG_BIAS, *bias_memory_p}); + args.insert({DNNL_ARG_BIAS, *bias_memory_p}); } auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); @@ -905,9 +903,9 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel { // TODO(grygielski) why no bias_diff? conv_bwd_weights_p->execute( - astream, {{MKLDNN_ARG_SRC, *src_memory_p}, - {MKLDNN_ARG_DIFF_DST, *diff_dst_memory_p}, - {MKLDNN_ARG_DIFF_WEIGHTS, *diff_weights_memory_p}}); + astream, {{DNNL_ARG_SRC, *src_memory_p}, + {DNNL_ARG_DIFF_DST, *diff_dst_memory_p}, + {DNNL_ARG_DIFF_WEIGHTS, *diff_weights_memory_p}}); astream.wait(); filter_grad->set_layout(framework::DataLayout::kMKLDNN); @@ -918,16 +916,16 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel { // For convolution with groups convert from blocked to NCHW // otherwise there will be problems in next operators working on this data if (g > 1) { - mkldnn::memory::data_type in_type = + dnnl::memory::data_type in_type = framework::ToMKLDNNDataType(filter->type()); // for 3d conv with groups (six dimensional data reorder to goidhw) // for 2d conv with groups (five dimensional data reorder to goihw) // auto weights_tz = framework::vectorize(filter->dims()); auto weights_tz = diff_weights_memory_p->get_desc().dims(); - mkldnn::memory::format_tag out_format = - weights_tz.size() == 6 ? mkldnn::memory::format_tag::goidhw - : mkldnn::memory::format_tag::goihw; + dnnl::memory::format_tag out_format = + weights_tz.size() == 6 ? dnnl::memory::format_tag::goidhw + : dnnl::memory::format_tag::goihw; platform::ReorderMKLDNNHandler handler(weights_tz, filter->type(), in_type, mkldnn_engine); auto reorder_dst_memory_p = @@ -947,9 +945,9 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel { // So here we have a data in goihw , which can be interpreted as OIHW // (OIDHW for conv3d) // because filter_grad shape is set for OIHW (OIDHW for conv3d) - mkldnn::memory::format_tag target_format = - weights_tz.size() == 6 ? mkldnn::memory::format_tag::oidhw - : mkldnn::memory::format_tag::oihw; + dnnl::memory::format_tag target_format = + weights_tz.size() == 6 ? dnnl::memory::format_tag::oidhw + : dnnl::memory::format_tag::oihw; filter_grad->set_format(target_format); } else { filter_grad->set_format(filter_fmt); @@ -969,9 +967,9 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel { auto conv_bwd_data_p = handler.AcquireBackwardPrimitive(); conv_bwd_data_p->execute(astream, - {{MKLDNN_ARG_WEIGHTS, *weights_memory_p}, - {MKLDNN_ARG_DIFF_DST, *diff_dst_memory_p}, - {MKLDNN_ARG_DIFF_SRC, *diff_src_memory_p}}); + {{DNNL_ARG_WEIGHTS, *weights_memory_p}, + {DNNL_ARG_DIFF_DST, *diff_dst_memory_p}, + {DNNL_ARG_DIFF_SRC, *diff_src_memory_p}}); astream.wait(); input_grad->set_layout(framework::DataLayout::kMKLDNN); diff --git a/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc index 04ff37222e1..35e35eb4bcb 100644 --- a/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc @@ -25,8 +25,7 @@ namespace operators { using Tensor = framework::Tensor; using framework::DataLayout; -inline mkldnn::memory::dims GetWeightsTz(const Tensor* filter, - const int groups) { +inline dnnl::memory::dims GetWeightsTz(const Tensor* filter, const int groups) { auto iohw_weights_tz = framework::vectorize(filter->dims()); auto weights_tz = iohw_weights_tz; @@ -40,14 +39,13 @@ inline mkldnn::memory::dims GetWeightsTz(const Tensor* filter, template class ConvTransposeMKLDNNHandlerT - : public platform::MKLDNNHandlerNoCachingT { + : public platform::MKLDNNHandlerNoCachingT { public: ConvTransposeMKLDNNHandlerT(const framework::ExecutionContext& ctx, - const mkldnn::engine mkldnn_engine, + const dnnl::engine mkldnn_engine, const Tensor* input, const Tensor* filter, const Tensor* bias, Tensor* output) - : platform::MKLDNNHandlerNoCachingT( + : platform::MKLDNNHandlerNoCachingT( mkldnn_engine, ctx.GetPlace()), is_test_(ctx.Attr("is_test")) { PADDLE_ENFORCE_EQ(is_test_, true, @@ -103,13 +101,13 @@ class ConvTransposeMKLDNNHandlerT } std::vector strides_temp = ctx.Attr>("strides"); - mkldnn::memory::dims strides(begin(strides_temp), end(strides_temp)); + dnnl::memory::dims strides(begin(strides_temp), end(strides_temp)); std::vector paddings_temp = ctx.Attr>("paddings"); - mkldnn::memory::dims paddings(begin(paddings_temp), end(paddings_temp)); + dnnl::memory::dims paddings(begin(paddings_temp), end(paddings_temp)); std::vector dilations_temp = ctx.Attr>("dilations"); - mkldnn::memory::dims dilations(begin(dilations_temp), end(dilations_temp)); + dnnl::memory::dims dilations(begin(dilations_temp), end(dilations_temp)); int groups = ctx.Attr("groups"); std::string padding_algorithm = ctx.Attr("padding_algorithm"); @@ -149,10 +147,10 @@ class ConvTransposeMKLDNNHandlerT const float fuse_alpha = ctx.Attr("fuse_alpha"); const float fuse_beta = ctx.Attr("fuse_beta"); - auto data_type = mkldnn::memory::data_type::f32; + auto data_type = dnnl::memory::data_type::f32; if (ctx.Attr("mkldnn_data_type") == "bfloat16" || std::is_same::value) - data_type = mkldnn::memory::data_type::bf16; + data_type = dnnl::memory::data_type::bf16; const auto src_md = platform::MKLDNNMemDesc(src_tz, data_type, chosen_memory_format); @@ -161,10 +159,10 @@ class ConvTransposeMKLDNNHandlerT const auto dst_md = platform::MKLDNNMemDesc( dst_tz, platform::MKLDNNGetDataType(), chosen_memory_format); - const mkldnn::primitive_attr conv_trans_attr = + const dnnl::primitive_attr conv_trans_attr = CreatePostOps(fuse_activation, fuse_alpha, fuse_beta); - auto fwd_prop_kind = is_test_ ? mkldnn::prop_kind::forward_inference - : mkldnn::prop_kind::forward_training; + auto fwd_prop_kind = is_test_ ? dnnl::prop_kind::forward_inference + : dnnl::prop_kind::forward_training; if (bias) { std::vector bias_tz = framework::vectorize(bias->dims()); const auto bias_md = @@ -181,44 +179,43 @@ class ConvTransposeMKLDNNHandlerT } } - mkldnn::primitive_attr CreatePostOps(const std::string& fuse_activation, - const float& fuse_alpha, - const float& fuse_beta) { - mkldnn::primitive_attr conv_attr; - mkldnn::post_ops post_operations; + dnnl::primitive_attr CreatePostOps(const std::string& fuse_activation, + const float& fuse_alpha, + const float& fuse_beta) { + dnnl::primitive_attr conv_attr; + dnnl::post_ops post_operations; // Fusion with ReLU layer is executed through the PostOps feature. Create a // PostOps object and configure it to execute an eltwise relu operation. if (fuse_activation == "relu" || fuse_activation == "leaky_relu") { constexpr float scale = 1.0f; - post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_relu, + post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_relu, fuse_alpha, fuse_beta); } else if (fuse_activation == "relu6") { constexpr float scale = 1.0f; - post_operations.append_eltwise(scale, - mkldnn::algorithm::eltwise_bounded_relu, - fuse_alpha, fuse_beta); + post_operations.append_eltwise( + scale, dnnl::algorithm::eltwise_bounded_relu, fuse_alpha, fuse_beta); } else if (fuse_activation == "swish") { constexpr float scale = 1.0f; - post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_swish, + post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_swish, fuse_alpha, fuse_beta); } conv_attr.set_post_ops(post_operations); return conv_attr; } - std::shared_ptr AcquireSrcMemoryWithReorder( + std::shared_ptr AcquireSrcMemoryWithReorder( const framework::Tensor* input) { const T* input_data = input->data(); auto user_src_md = platform::MKLDNNMemDesc( framework::vectorize(input->dims()), platform::MKLDNNGetDataType(), input->format()); - return platform::MKLDNNHandlerNoCachingT:: + return platform::MKLDNNHandlerNoCachingT:: AcquireMemoryWithReorder(user_src_md, this->fwd_pd_->src_desc(), platform::to_void_cast(input_data)); } - std::shared_ptr AcquireWeightsMemoryWithReorder( + std::shared_ptr AcquireWeightsMemoryWithReorder( const platform::MKLDNNDeviceContext& dev_ctx, const std::string& key, const framework::Tensor* filter, const int& groups) { const K* filter_data = filter->data(); @@ -236,12 +233,12 @@ class ConvTransposeMKLDNNHandlerT } template - std::shared_ptr AcquireMemoryWithReorder( + std::shared_ptr AcquireMemoryWithReorder( const platform::MKLDNNDeviceContext& dev_ctx, - const mkldnn::memory::desc& user_md, - const mkldnn::memory::desc& target_md, void* ptr, const std::string& key, - const std::string& suffix, bool is_persistent = false, - const std::vector& scale_data = {1.0f}, int mask = 0) { + const dnnl::memory::desc& user_md, const dnnl::memory::desc& target_md, + void* ptr, const std::string& key, const std::string& suffix, + bool is_persistent = false, const std::vector& scale_data = {1.0f}, + int mask = 0) { const auto target_key = key + suffix + "_target"; const auto key_reorder_p = key + suffix + "reorder_p"; const auto user_key = key + suffix + "_user"; @@ -254,7 +251,7 @@ class ConvTransposeMKLDNNHandlerT std::make_shared(user_md, this->engine_, ptr); if (user_md != target_md) { target_memory_p = - std::make_shared(target_md, this->engine_); + std::make_shared(target_md, this->engine_); dnnl::reorder::primitive_desc reorder_pdesc; if (platform::is_int8()) { dnnl::primitive_attr attr; @@ -271,8 +268,8 @@ class ConvTransposeMKLDNNHandlerT auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); platform::RecordEvent record_reorder("int_reorder", platform::EventRole::kUniqueOp); - reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, - {MKLDNN_ARG_TO, *target_memory_p}}); + reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p}, + {DNNL_ARG_TO, *target_memory_p}}); astream.wait(); } else { target_memory_p = user_memory_p; @@ -288,20 +285,20 @@ class ConvTransposeMKLDNNHandlerT // TODO(jczaja): Here we detect if reorder is cached it means it is needed // need to change this to get rid of keys - auto reorder_p = std::static_pointer_cast( + auto reorder_p = std::static_pointer_cast( dev_ctx.GetBlob(key_reorder_p)); if (reorder_p != nullptr) { platform::RecordEvent record_reorder("int_reorder", platform::EventRole::kUniqueOp); - reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, - {MKLDNN_ARG_TO, *target_memory_p}}); + reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p}, + {DNNL_ARG_TO, *target_memory_p}}); astream.wait(); } } return target_memory_p; } - std::shared_ptr AcquireBiasMemoryWithReorder( + std::shared_ptr AcquireBiasMemoryWithReorder( const platform::MKLDNNDeviceContext& dev_ctx, const std::string& key, const framework::Tensor* bias) { const K* bias_data = bias->data(); @@ -364,14 +361,14 @@ class ConvTransposeMKLDNNOpKernel : public framework::OpKernel { auto conv_p = handler.AcquireForwardPrimitive(); std::unordered_map args = { - {MKLDNN_ARG_SRC, *src_memory_p}, - {MKLDNN_ARG_WEIGHTS, *weights_memory_p}, - {MKLDNN_ARG_DST, *dst_memory_p}}; + {DNNL_ARG_SRC, *src_memory_p}, + {DNNL_ARG_WEIGHTS, *weights_memory_p}, + {DNNL_ARG_DST, *dst_memory_p}}; if (bias) { auto bias_memory_p = handler.AcquireBiasMemoryWithReorder(dev_ctx, key, bias); - args.insert({MKLDNN_ARG_BIAS, *bias_memory_p}); + args.insert({DNNL_ARG_BIAS, *bias_memory_p}); } auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); conv_p->execute(astream, args); diff --git a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc index 08fdd3b74c7..4230d180385 100644 --- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc @@ -38,16 +38,16 @@ using framework::ExecutionContext; using platform::MKLDNNDeviceContext; using platform::to_void_cast; using platform::GetMKLDNNFormat; -using mkldnn::memory; -using mkldnn::inner_product_forward; -using mkldnn::primitive; -using mkldnn::stream; -using mkldnn::prop_kind; +using dnnl::memory; +using dnnl::inner_product_forward; +using dnnl::primitive; +using dnnl::stream; +using dnnl::prop_kind; template class FCPrimitiveFactory { public: - explicit FCPrimitiveFactory(const mkldnn::engine& engine) : engine_(engine) {} + explicit FCPrimitiveFactory(const dnnl::engine& engine) : engine_(engine) {} void ExecuteFcPrimitive(const LoDTensor* input, const Tensor* weights, const Tensor* bias, LoDTensor* output, @@ -89,8 +89,7 @@ class FCPrimitiveFactory { // descriptor has been divided into separate cases, based on the number // of input dimensions. size_t input_dim_num = input->dims().size(); - paddle::optional - fc_prim_desc; + paddle::optional fc_prim_desc; memory::desc usr_weights_desc = {}; switch (input_dim_num) { case 2: @@ -140,14 +139,14 @@ class FCPrimitiveFactory { void Execute() { auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); if (bias_) { - fc_->execute(astream, {{MKLDNN_ARG_SRC, *input_}, - {MKLDNN_ARG_WEIGHTS, *weights_}, - {MKLDNN_ARG_BIAS, *bias_}, - {MKLDNN_ARG_DST, *output_}}); + fc_->execute(astream, {{DNNL_ARG_SRC, *input_}, + {DNNL_ARG_WEIGHTS, *weights_}, + {DNNL_ARG_BIAS, *bias_}, + {DNNL_ARG_DST, *output_}}); } else { - fc_->execute(astream, {{MKLDNN_ARG_SRC, *input_}, - {MKLDNN_ARG_WEIGHTS, *weights_}, - {MKLDNN_ARG_DST, *output_}}); + fc_->execute(astream, {{DNNL_ARG_SRC, *input_}, + {DNNL_ARG_WEIGHTS, *weights_}, + {DNNL_ARG_DST, *output_}}); } astream.wait(); } @@ -192,7 +191,7 @@ class FCPrimitiveFactory { } } - mkldnn::inner_product_forward::primitive_desc Create2DFcPrimDescriptor( + dnnl::inner_product_forward::primitive_desc Create2DFcPrimDescriptor( const LoDTensor* input, const Tensor* weights, const Tensor* bias, LoDTensor* output, const ExecutionContext& ctx) { auto src_desc = CreateMemDescriptor(input, input->format()); @@ -213,7 +212,7 @@ class FCPrimitiveFactory { memory::desc Create2DUserWeightsDesc() { return weights_->get_desc(); } - mkldnn::inner_product_forward::primitive_desc Create3DFcPrimDescriptor( + dnnl::inner_product_forward::primitive_desc Create3DFcPrimDescriptor( const LoDTensor* input, const Tensor* weights, const Tensor* bias, LoDTensor* output, const ExecutionContext& ctx) { auto input_dims = framework::vectorize(input->dims()); @@ -244,7 +243,7 @@ class FCPrimitiveFactory { return CreateMemDescriptor(dims, MKLDNNMemoryFormat::oiw); } - mkldnn::inner_product_forward::primitive_desc Create4DFcPrimDescriptor( + dnnl::inner_product_forward::primitive_desc Create4DFcPrimDescriptor( const LoDTensor* input, const Tensor* weights, const Tensor* bias, LoDTensor* output, const ExecutionContext& ctx) { auto src_desc = CreateMemDescriptor(input, input->format()); @@ -274,13 +273,13 @@ class FCPrimitiveFactory { } // Convert data from one data format to another - std::shared_ptr Reorder(const memory::desc& src_desc, - const memory::desc& dst_desc, - void* src_data) { + std::shared_ptr Reorder(const memory::desc& src_desc, + const memory::desc& dst_desc, + void* src_data) { auto src_mem = memory(src_desc, engine_, src_data); auto dst_mem = std::make_shared(dst_desc, engine_); - auto reorder = mkldnn::reorder(src_mem, *dst_mem); + auto reorder = dnnl::reorder(src_mem, *dst_mem); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); { @@ -295,11 +294,11 @@ class FCPrimitiveFactory { // Convert data from one data format to another and rescale it. // If the desired data type is (un)signed int8, quantization occurs here. - std::shared_ptr ReorderWithScale( + std::shared_ptr ReorderWithScale( const std::shared_ptr src_mem, const memory::desc& dst_md, const std::vector& scale_data) { - auto dst_mem = std::make_shared(dst_md, engine_); - mkldnn::primitive_attr attributes; + auto dst_mem = std::make_shared(dst_md, engine_); + dnnl::primitive_attr attributes; // According to MKL-DNN's documentation mask determines along which // dimensions should the scale be applied. // 0 - Single scale applied to whole tensor @@ -308,14 +307,14 @@ class FCPrimitiveFactory { // becuase we perform per-output-channel quantization int mask = CreateMask(0, scale_data.size() > 1); attributes.set_output_scales(mask, scale_data); - auto reorder = mkldnn::reorder(*src_mem, *dst_mem, attributes); + auto reorder = dnnl::reorder(*src_mem, *dst_mem, attributes); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); { platform::RecordEvent record_reorder("int_reorder", platform::EventRole::kUniqueOp); reorder.execute(astream, - {{MKLDNN_ARG_FROM, *src_mem}, {MKLDNN_ARG_TO, *dst_mem}}); + {{DNNL_ARG_FROM, *src_mem}, {DNNL_ARG_TO, *dst_mem}}); astream.wait(); } @@ -323,43 +322,43 @@ class FCPrimitiveFactory { } template - static mkldnn::memory::desc CreateMemDescriptor( + static dnnl::memory::desc CreateMemDescriptor( const std::vector& dims, MKLDNNMemoryFormat format) { return platform::MKLDNNMemDesc(dims, platform::MKLDNNGetDataType(), format); } template - static mkldnn::memory::desc CreateMemDescriptor(const Tensor* tensor, - MKLDNNMemoryFormat format) { + static dnnl::memory::desc CreateMemDescriptor(const Tensor* tensor, + MKLDNNMemoryFormat format) { auto dims = framework::vectorize(tensor->dims()); return CreateMemDescriptor(dims, format); } template - mkldnn::memory CreateMemory(const mkldnn::memory::desc& desc, - const Tensor* tensor) { + dnnl::memory CreateMemory(const dnnl::memory::desc& desc, + const Tensor* tensor) { return CreateMemory(desc, platform::to_void_cast(tensor->data())); } - mkldnn::memory CreateMemory(const mkldnn::memory::desc& desc, void* data) { + dnnl::memory CreateMemory(const dnnl::memory::desc& desc, void* data) { return memory(desc, engine_, data); } template - std::shared_ptr CreateMemoryToBeCached( - const mkldnn::memory::desc& desc, const Tensor* tensor) { + std::shared_ptr CreateMemoryToBeCached( + const dnnl::memory::desc& desc, const Tensor* tensor) { return CreateMemoryToBeCached(desc, platform::to_void_cast(tensor->data())); } - std::shared_ptr CreateMemoryToBeCached( - const mkldnn::memory::desc& desc, void* data) { + std::shared_ptr CreateMemoryToBeCached( + const dnnl::memory::desc& desc, void* data) { return std::make_shared(desc, engine_, data); } // Create weights memory and transform to default MKL-DNN format - std::shared_ptr CreateWeightsMemory(const Tensor* weights) { + std::shared_ptr CreateWeightsMemory(const Tensor* weights) { auto dims = framework::vectorize(weights->dims()); std::swap(dims[0], dims[1]); // Correct output dimensions auto src_desc = CreateMemDescriptor(dims, MKLDNNMemoryFormat::io); @@ -446,9 +445,9 @@ class FCPrimitiveFactory { } // Fuse relu into FC with activation type attribute has been set to 'relu' - mkldnn::primitive_attr CreatePostOps(const ExecutionContext& ctx) { - mkldnn::primitive_attr attributes; - mkldnn::post_ops post_operations; + dnnl::primitive_attr CreatePostOps(const ExecutionContext& ctx) { + dnnl::primitive_attr attributes; + dnnl::post_ops post_operations; auto output_shift_scale = ComputeOutputShiftScale(ctx); int mask = CreateMask(1, output_shift_scale.size() > 1); @@ -458,56 +457,55 @@ class FCPrimitiveFactory { constexpr float scale = 1.0f; constexpr float negative_slope = 0.0f; constexpr float placeholder = 1.0f; // beta - post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_relu, + post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_relu, negative_slope, placeholder); } else if (ctx.Attr("activation_type") == "gelu") { constexpr float scale = 1.0f; constexpr float alpha = 0.0f; constexpr float beta = 0.0f; - post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_gelu, + post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_gelu, alpha, beta); } else if (ctx.Attr("activation_type") == "gelu_tanh") { constexpr float scale = 1.0f; constexpr float alpha = 0.0f; constexpr float beta = 0.0f; - post_operations.append_eltwise( - scale, mkldnn::algorithm::eltwise_gelu_tanh, alpha, beta); + post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_gelu_tanh, + alpha, beta); } else if (ctx.Attr("activation_type") == "gelu_erf") { constexpr float scale = 1.0f; constexpr float alpha = 0.0f; constexpr float beta = 0.0f; - post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_gelu_erf, + post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_gelu_erf, alpha, beta); } else if (ctx.Attr("activation_type") == "tanh") { constexpr float scale = 1.0f; constexpr float alpha = 0.0f; constexpr float beta = 0.0f; - post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_tanh, + post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_tanh, alpha, beta); } else if (ctx.Attr("activation_type") == "sigmoid") { constexpr float scale = 1.0f; constexpr float alpha = 0.0f; constexpr float beta = 0.0f; - post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_logistic, + post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_logistic, alpha, beta); } else if (ctx.Attr("activation_type") == "hard_swish") { constexpr float scale = 1.0f; constexpr float alpha = 0.0f; constexpr float beta = 0.0f; - post_operations.append_eltwise( - scale, mkldnn::algorithm::eltwise_hardswish, alpha, beta); + post_operations.append_eltwise(scale, dnnl::algorithm::eltwise_hardswish, + alpha, beta); } attributes.set_post_ops(post_operations); return attributes; } - mkldnn::inner_product_forward::primitive_desc CreateFcPrimDesc( - const mkldnn::memory::desc& input_desc, - const mkldnn::memory::desc& weights_desc, - const mkldnn::memory::desc& bias_desc, - const mkldnn::memory::desc& dst_desc, - const mkldnn::primitive_attr& attrs) { + dnnl::inner_product_forward::primitive_desc CreateFcPrimDesc( + const dnnl::memory::desc& input_desc, + const dnnl::memory::desc& weights_desc, + const dnnl::memory::desc& bias_desc, const dnnl::memory::desc& dst_desc, + const dnnl::primitive_attr& attrs) { auto fc_desc = inner_product_forward::desc(prop_kind::forward_scoring, input_desc, weights_desc, bias_desc, dst_desc); @@ -517,8 +515,8 @@ class FCPrimitiveFactory { // Create output memory based on output tensor and inner_product // primitive descriptor format chosen for output - mkldnn::memory CreateDstMemory( - const mkldnn::inner_product_forward::primitive_desc& fc_prim_desc, + dnnl::memory CreateDstMemory( + const dnnl::inner_product_forward::primitive_desc& fc_prim_desc, const ExecutionContext& ctx, Tensor* output) { auto dst_desc = fc_prim_desc.dst_desc(); auto buffer_size = dst_desc.get_size(); @@ -545,7 +543,7 @@ class FCPrimitiveFactory { } private: - const mkldnn::engine& engine_; + const dnnl::engine& engine_; paddle::optional input_; paddle::optional output_; std::shared_ptr bias_; diff --git a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc index 8a89499e4b5..4e77ef72cdb 100644 --- a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc @@ -22,16 +22,16 @@ using paddle::platform::MKLDNNDeviceContext; template class LRNMKLDNNHandler - : public platform::MKLDNNHandlerNoCachingT { + : public platform::MKLDNNHandlerNoCachingT { public: LRNMKLDNNHandler(const framework::ExecutionContext& ctx, - const mkldnn::engine mkldnn_engine, - platform::Place cpu_place, const Tensor* input) + const dnnl::engine mkldnn_engine, platform::Place cpu_place, + const Tensor* input) - : platform::MKLDNNHandlerNoCachingT(mkldnn_engine, - cpu_place) { + : platform::MKLDNNHandlerNoCachingT(mkldnn_engine, + cpu_place) { const int n = ctx.Attr("n"); // MKL-DNN implements LRN in a caffe way: // http://caffe.berkeleyvision.org/tutorial/layers/lrn.html @@ -46,22 +46,22 @@ class LRNMKLDNNHandler auto dims = framework::vectorize(input->dims()); - auto src_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType(), - input->format()); + auto src_md = dnnl::memory::desc(dims, platform::MKLDNNGetDataType(), + input->format()); this->AcquireForwardPrimitiveDescriptor( - is_test ? mkldnn::prop_kind::forward_inference - : mkldnn::prop_kind::forward_training, - mkldnn::algorithm::lrn_across_channels, src_md, n, alpha, beta, k); + is_test ? dnnl::prop_kind::forward_inference + : dnnl::prop_kind::forward_training, + dnnl::algorithm::lrn_across_channels, src_md, n, alpha, beta, k); } LRNMKLDNNHandler(const framework::ExecutionContext& ctx, - const mkldnn::engine mkldnn_engine, - platform::Place cpu_place, const Tensor* in_x, - const Tensor* out_grad, Tensor* in_x_grad) - : platform::MKLDNNHandlerNoCachingT(mkldnn_engine, - cpu_place) { + const dnnl::engine mkldnn_engine, platform::Place cpu_place, + const Tensor* in_x, const Tensor* out_grad, + Tensor* in_x_grad) + : platform::MKLDNNHandlerNoCachingT(mkldnn_engine, + cpu_place) { PADDLE_ENFORCE_EQ( ctx.Attr("is_test"), false, platform::errors::PreconditionNotMet( @@ -74,28 +74,28 @@ class LRNMKLDNNHandler auto dims = framework::vectorize(in_x->dims()); - auto src_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType(), - in_x->format()); - auto diff_md = mkldnn::memory::desc(dims, platform::MKLDNNGetDataType(), - out_grad->format()); + auto src_md = dnnl::memory::desc(dims, platform::MKLDNNGetDataType(), + in_x->format()); + auto diff_md = dnnl::memory::desc(dims, platform::MKLDNNGetDataType(), + out_grad->format()); this->AcquireForwardPrimitiveDescriptor( - mkldnn::prop_kind::forward_training, - mkldnn::algorithm::lrn_across_channels, src_md, n, alpha, beta, k); + dnnl::prop_kind::forward_training, dnnl::algorithm::lrn_across_channels, + src_md, n, alpha, beta, k); this->AcquireBackwardPrimitiveDescriptor( - mkldnn::algorithm::lrn_across_channels, src_md, diff_md, n, alpha, beta, + dnnl::algorithm::lrn_across_channels, src_md, diff_md, n, alpha, beta, k); } - std::shared_ptr AcquireWorkspaceMemory(Tensor* workspace) { + std::shared_ptr AcquireWorkspaceMemory(Tensor* workspace) { T* ptr = workspace->mutable_data( this->place_, this->fwd_pd_->workspace_desc().get_size()); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->workspace_desc(), ptr); } - std::shared_ptr AcquireBackwardWorkspaceMemory( + std::shared_ptr AcquireBackwardWorkspaceMemory( const Tensor* workspace) { const T* workspace_data = workspace->data(); return this->AcquireMemoryFromPrimitive( @@ -136,12 +136,12 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel { auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); if (!workspace_memory->get_desc().is_zero()) { mid->set_format(platform::GetMKLDNNFormat(*workspace_memory)); - lrn_p->execute(astream, {{MKLDNN_ARG_SRC, *src_memory}, - {MKLDNN_ARG_DST, *dst_memory}, - {MKLDNN_ARG_WORKSPACE, *workspace_memory}}); + lrn_p->execute(astream, {{DNNL_ARG_SRC, *src_memory}, + {DNNL_ARG_DST, *dst_memory}, + {DNNL_ARG_WORKSPACE, *workspace_memory}}); } else { - lrn_p->execute(astream, {{MKLDNN_ARG_SRC, *src_memory}, - {MKLDNN_ARG_DST, *dst_memory}}); + lrn_p->execute( + astream, {{DNNL_ARG_SRC, *src_memory}, {DNNL_ARG_DST, *dst_memory}}); } astream.wait(); @@ -182,10 +182,10 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel { auto lrn_bwd = handler.AcquireBackwardPrimitive(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); - lrn_bwd->execute(astream, {{MKLDNN_ARG_SRC, *src_memory}, - {MKLDNN_ARG_DIFF_DST, *diff_dst_memory}, - {MKLDNN_ARG_DIFF_SRC, *diff_src_memory}, - {MKLDNN_ARG_WORKSPACE, *workspace}}); + lrn_bwd->execute(astream, {{DNNL_ARG_SRC, *src_memory}, + {DNNL_ARG_DIFF_DST, *diff_dst_memory}, + {DNNL_ARG_DIFF_SRC, *diff_src_memory}, + {DNNL_ARG_WORKSPACE, *workspace}}); astream.wait(); in_x_grad->set_layout(framework::DataLayout::kMKLDNN); diff --git a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc index b7eb5a3ab4b..ac35c6f5a77 100644 --- a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc @@ -108,7 +108,7 @@ template class MatMulMKLDNNHandler : public paddle::platform::MKLDNNHandlerNoCachingT { public: - MatMulMKLDNNHandler(const mkldnn::engine engine, + MatMulMKLDNNHandler(const dnnl::engine engine, paddle::platform::Place cpu_place, Tensor* x, bool trans_x, Tensor* y, bool trans_y, Tensor* out, float scale) @@ -148,7 +148,7 @@ class MatMulMKLDNNHandler this->AcquireForwardPrimitiveDescriptor(attrs, x_md, y_md, out_md); } // Constructor for FWD MatMul - MatMulMKLDNNHandler(const mkldnn::engine engine, const ExecutionContext& ctx, + MatMulMKLDNNHandler(const dnnl::engine engine, const ExecutionContext& ctx, float scale) : paddle::platform::MKLDNNHandlerNoCachingT( engine, ctx.GetPlace()), @@ -202,9 +202,9 @@ class MatMulMKLDNNHandler weights_memory_p->set_data_handle(y_ptr); dst_memory_p->set_data_handle(out_ptr); matmul_p->execute(astream, { - {MKLDNN_ARG_SRC, *src_memory_p}, - {MKLDNN_ARG_WEIGHTS, *weights_memory_p}, - {MKLDNN_ARG_DST, *dst_memory_p}, + {DNNL_ARG_SRC, *src_memory_p}, + {DNNL_ARG_WEIGHTS, *weights_memory_p}, + {DNNL_ARG_DST, *dst_memory_p}, }); x_ptr = static_cast(x_ptr) + std::get<0>(offsets); y_ptr = static_cast(y_ptr) + std::get<1>(offsets); @@ -218,7 +218,7 @@ class MatMulMKLDNNHandler out->set_layout(DataLayout::kMKLDNN); } - std::shared_ptr AcquireDstMemory( + std::shared_ptr AcquireDstMemory( paddle::framework::Tensor* output) { // We cannot use base AcquireDstMemory as it makes an allocation request // base on DST memory primitive size. This is fine in general, but in MatMul @@ -548,7 +548,7 @@ void MatMulGradMKLDNNKernel::Compute(const ExecutionContext& ctx) const { template void MatMulGradMKLDNNKernel::ExecuteMatMulGrad( const ExecutionContext& ctx, const MKLDNNDeviceContext& dev_ctx, - const mkldnn::engine& engine, Tensor* x, bool trans_x, + const dnnl::engine& engine, Tensor* x, bool trans_x, bool is_fold_init_dims_x, Tensor* y, bool trans_y, bool is_fold_init_dims_y, Tensor* out) const { // gradient is calculated in a different way when broadcasting is used diff --git a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc index 422944107fb..09380240522 100644 --- a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc @@ -33,18 +33,17 @@ using framework::DataLayout; using framework::DDim; using framework::ExecutionContext; using framework::Tensor; -using mkldnn::inner_product_forward; -using mkldnn::memory; -using mkldnn::prop_kind; -using mkldnn::stream; +using dnnl::inner_product_forward; +using dnnl::memory; +using dnnl::prop_kind; +using dnnl::stream; using platform::MKLDNNDeviceContext; using platform::to_void_cast; template class MulPrimitiveFactory { public: - explicit MulPrimitiveFactory(const mkldnn::engine &engine) - : engine_(engine) {} + explicit MulPrimitiveFactory(const dnnl::engine &engine) : engine_(engine) {} inner_product_forward CreateMulPrimitive(const Tensor *x_input, const Tensor *y_input, @@ -99,15 +98,15 @@ class MulPrimitiveFactory { const memory::desc &dst_desc, void *src_data, const std::vector &scale) { auto mask = scale.size() > 1 ? 1 : 0; - mkldnn::primitive_attr attr; + dnnl::primitive_attr attr; attr.set_output_scales(mask, scale); auto src_mem = memory(src_desc, engine_, src_data); auto dst_mem = memory(dst_desc, engine_); - auto reorder_pd = mkldnn::reorder::primitive_desc(src_mem, dst_mem, attr); + auto reorder_pd = dnnl::reorder::primitive_desc(src_mem, dst_mem, attr); - auto reorder = mkldnn::reorder(reorder_pd); + auto reorder = dnnl::reorder(reorder_pd); auto &astream = platform::MKLDNNDeviceContext::tls().get_stream(); { @@ -132,9 +131,9 @@ class MulPrimitiveFactory { scale_y); } - mkldnn::primitive_attr CreateMulAttr(const ExecutionContext &ctx, - bool force_fp32_output) { - mkldnn::primitive_attr mul_attr; + dnnl::primitive_attr CreateMulAttr(const ExecutionContext &ctx, + bool force_fp32_output) { + dnnl::primitive_attr mul_attr; auto scale_y_data = ctx.Attr>("scale_y"); auto scale_x_data = ctx.Attr("scale_x"); @@ -185,9 +184,9 @@ class MulPrimitiveFactory { void Execute() { auto &astream = platform::MKLDNNDeviceContext::tls().get_stream(); - (*mul_).execute(astream, {{MKLDNN_ARG_SRC, *x_input_}, - {MKLDNN_ARG_WEIGHTS, *y_input_}, - {MKLDNN_ARG_DST, *output_}}); + (*mul_).execute(astream, {{DNNL_ARG_SRC, *x_input_}, + {DNNL_ARG_WEIGHTS, *y_input_}, + {DNNL_ARG_DST, *output_}}); astream.wait(); } @@ -268,7 +267,7 @@ class MulPrimitiveFactory { auto dst_mem = dst_data ? memory(dst_desc, engine_, dst_data) : memory(dst_desc, engine_); - auto reorder = mkldnn::reorder(src_mem, dst_mem); + auto reorder = dnnl::reorder(src_mem, dst_mem); auto &astream = platform::MKLDNNDeviceContext::tls().get_stream(); { @@ -289,7 +288,7 @@ class MulPrimitiveFactory { return Reorder(src_desc, dst_desc, to_void_cast(input_y->data())); } - const mkldnn::engine &engine_; + const dnnl::engine &engine_; paddle::optional x_input_; paddle::optional y_input_; paddle::optional output_; @@ -303,7 +302,7 @@ template std::shared_ptr> GetPrimitiveFactory( const MKLDNNDeviceContext &dev_ctx, const ExecutionContext &ctx, const Tensor *input_x, const Tensor *input_y, - const mkldnn::engine &mkldnn_engine) { + const dnnl::engine &mkldnn_engine) { std::string key = platform::CreateKey( dev_ctx, input_x->type(), framework::vectorize(input_x->dims()), input_y->type(), framework::vectorize(input_y->dims()), @@ -327,7 +326,7 @@ inner_product_forward GetMulPrimitive(const MKLDNNDeviceContext &dev_ctx, const ExecutionContext &ctx, const Tensor *input_x, const Tensor *input_y, Tensor *output, - const mkldnn::engine &mkldnn_engine) { + const dnnl::engine &mkldnn_engine) { constexpr bool is_int8 = std::is_same::value || std::is_same::value; bool force_fp32_output = ctx.Attr("force_fp32_output"); diff --git a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc index d86bab9d3a4..eb0240e3e3d 100644 --- a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc @@ -20,24 +20,24 @@ namespace paddle { namespace operators { using framework::DataLayout; -using mkldnn::memory; -using mkldnn::pooling_backward; -using mkldnn::pooling_forward; -using mkldnn::primitive; -using mkldnn::reorder; -using mkldnn::stream; +using dnnl::memory; +using dnnl::pooling_backward; +using dnnl::pooling_forward; +using dnnl::primitive; +using dnnl::reorder; +using dnnl::stream; using platform::to_void_cast; template class PoolingMKLDNNHandler - : public platform::MKLDNNHandlerNoCachingT { + : public platform::MKLDNNHandlerNoCachingT { public: PoolingMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, - const mkldnn::engine mkldnn_engine, const Tensor* input, + const dnnl::engine mkldnn_engine, const Tensor* input, Tensor* output) - : platform::MKLDNNHandlerNoCachingT( + : platform::MKLDNNHandlerNoCachingT( mkldnn_engine, ctx.GetPlace()) { PADDLE_ENFORCE_EQ(input->layout(), DataLayout::kMKLDNN, platform::errors::InvalidArgument( @@ -98,7 +98,7 @@ class PoolingMKLDNNHandler const auto exclude_padding = ctx.Attr("exclusive"); - const auto src_md = mkldnn::memory::desc(src_tz, dt, input->format()); + const auto src_md = dnnl::memory::desc(src_tz, dt, input->format()); /* create memory descriptor for pooling without specified format * ('any') which lets a primitive (pooling in this case) choose * the memory format preferred for best performance @@ -119,22 +119,21 @@ class PoolingMKLDNNHandler ComputeAdaptivePoolParameters(ctx, src_tz, &ksize, &strides); this->AcquireForwardPrimitiveDescriptor( - is_test ? mkldnn::prop_kind::forward_inference - : mkldnn::prop_kind::forward_training, + is_test ? dnnl::prop_kind::forward_inference + : dnnl::prop_kind::forward_training, pooling_type == "max" - ? mkldnn::algorithm::pooling_max - : (exclude_padding - ? mkldnn::algorithm::pooling_avg_exclude_padding - : mkldnn::algorithm::pooling_avg_include_padding), + ? dnnl::algorithm::pooling_max + : (exclude_padding ? dnnl::algorithm::pooling_avg_exclude_padding + : dnnl::algorithm::pooling_avg_include_padding), src_md, dst_md, strides, ksize, mkldnn_paddings[0], mkldnn_paddings[1]); } PoolingMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, - const mkldnn::engine mkldnn_engine, const Tensor* in_x, + const dnnl::engine mkldnn_engine, const Tensor* in_x, const Tensor* out_grad, Tensor* in_x_grad) - : platform::MKLDNNHandlerNoCachingT( + : platform::MKLDNNHandlerNoCachingT( mkldnn_engine, ctx.GetPlace()) { PADDLE_ENFORCE_EQ( in_x->layout(), DataLayout::kMKLDNN, @@ -185,12 +184,11 @@ class PoolingMKLDNNHandler auto diff_dst_tz = paddle::framework::vectorize(out_grad->dims()); const auto dt = framework::ToMKLDNNDataType(in_x->type()); - auto src_md = mkldnn::memory::desc(src_tz, dt, in_x->format()); - auto dst_md = - mkldnn::memory::desc(diff_dst_tz, dt, MKLDNNMemoryFormat::any); - auto diff_dst_md = mkldnn::memory::desc( + auto src_md = dnnl::memory::desc(src_tz, dt, in_x->format()); + auto dst_md = dnnl::memory::desc(diff_dst_tz, dt, MKLDNNMemoryFormat::any); + auto diff_dst_md = dnnl::memory::desc( diff_dst_tz, platform::MKLDNNGetDataType(), out_grad->format()); - auto diff_src_md = mkldnn::memory::desc( + auto diff_src_md = dnnl::memory::desc( diff_src_tz, platform::MKLDNNGetDataType(), MKLDNNMemoryFormat::any); auto mkldnn_paddings = platform::ToMkldnnPadding(paddings); @@ -205,44 +203,42 @@ class PoolingMKLDNNHandler const auto exclude_padding = ctx.Attr("exclusive"); this->AcquireForwardPrimitiveDescriptor( - mkldnn::prop_kind::forward_training, + dnnl::prop_kind::forward_training, pooling_type == "max" - ? mkldnn::algorithm::pooling_max - : (exclude_padding - ? mkldnn::algorithm::pooling_avg_exclude_padding - : mkldnn::algorithm::pooling_avg_include_padding), + ? dnnl::algorithm::pooling_max + : (exclude_padding ? dnnl::algorithm::pooling_avg_exclude_padding + : dnnl::algorithm::pooling_avg_include_padding), src_md, dst_md, strides, ksize, mkldnn_paddings[0], mkldnn_paddings[1]); this->AcquireBackwardPrimitiveDescriptor( pooling_type == "max" - ? mkldnn::algorithm::pooling_max - : (exclude_padding - ? mkldnn::algorithm::pooling_avg_exclude_padding - : mkldnn::algorithm::pooling_avg_include_padding), + ? dnnl::algorithm::pooling_max + : (exclude_padding ? dnnl::algorithm::pooling_avg_exclude_padding + : dnnl::algorithm::pooling_avg_include_padding), diff_src_md, diff_dst_md, strides, ksize, mkldnn_paddings[0], mkldnn_paddings[1]); } - std::shared_ptr AcquireWorkspaceMemory( + std::shared_ptr AcquireWorkspaceMemory( const platform::MKLDNNDeviceContext& dev_ctx, const std::string& unique_name) { - mkldnn::memory::desc workspace_md = this->fwd_pd_->workspace_desc(); + dnnl::memory::desc workspace_md = this->fwd_pd_->workspace_desc(); // Pooling Workspace has to be passed to Grad op that // may be executed by diffrent thread, hence // for that one we use key that does not contain TID std::string workspace_key = platform::CreateKey(dev_ctx, workspace_md.dims(), workspace_md.data_type(), unique_name, "@wrk"); - auto mem_p = std::static_pointer_cast( - dev_ctx.GetBlob(workspace_key)); + auto mem_p = + std::static_pointer_cast(dev_ctx.GetBlob(workspace_key)); if (mem_p == nullptr) { static std::mutex acquire_barrier; std::lock_guard block_threads_until_finish_this_job( acquire_barrier); - mem_p = std::static_pointer_cast( + mem_p = std::static_pointer_cast( dev_ctx.GetBlob(workspace_key)); if (mem_p == nullptr) { - mem_p = std::make_shared(workspace_md, this->engine_); + mem_p = std::make_shared(workspace_md, this->engine_); dev_ctx.SetBlob(workspace_key, mem_p); } } @@ -318,13 +314,13 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { // Training auto workspace_memory = handler.AcquireWorkspaceMemory(dev_ctx, ctx.OutputName("Out")); - pool_p->execute(astream, {{MKLDNN_ARG_SRC, *src_memory}, - {MKLDNN_ARG_DST, *dst_memory}, - {MKLDNN_ARG_WORKSPACE, *workspace_memory}}); + pool_p->execute(astream, {{DNNL_ARG_SRC, *src_memory}, + {DNNL_ARG_DST, *dst_memory}, + {DNNL_ARG_WORKSPACE, *workspace_memory}}); } else { // Inference - pool_p->execute(astream, {{MKLDNN_ARG_SRC, *src_memory}, - {MKLDNN_ARG_DST, *dst_memory}}); + pool_p->execute( + astream, {{DNNL_ARG_SRC, *src_memory}, {DNNL_ARG_DST, *dst_memory}}); } astream.wait(); @@ -360,13 +356,13 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel { // Max - pooling needs Workspace auto workspace_memory = handler.AcquireWorkspaceMemory(dev_ctx, ctx.InputName("Out")); - pool_bwd_p->execute(astream, {{MKLDNN_ARG_DIFF_SRC, *diff_src_memory}, - {MKLDNN_ARG_DIFF_DST, *diff_dst_memory}, - {MKLDNN_ARG_WORKSPACE, *workspace_memory}}); + pool_bwd_p->execute(astream, {{DNNL_ARG_DIFF_SRC, *diff_src_memory}, + {DNNL_ARG_DIFF_DST, *diff_dst_memory}, + {DNNL_ARG_WORKSPACE, *workspace_memory}}); } else { // Average Pooling - pool_bwd_p->execute(astream, {{MKLDNN_ARG_DIFF_SRC, *diff_src_memory}, - {MKLDNN_ARG_DIFF_DST, *diff_dst_memory}}); + pool_bwd_p->execute(astream, {{DNNL_ARG_DIFF_SRC, *diff_src_memory}, + {DNNL_ARG_DIFF_DST, *diff_dst_memory}}); } astream.wait(); diff --git a/paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc index b8b735e96d2..d7b4574fb0d 100644 --- a/paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc @@ -37,8 +37,7 @@ class ScaleMKLDNNKernel : public framework::OpKernel { bool is_inplaced = x->IsSharedBufferWith(*out); platform::ActivationMKLDNNHandler handler( - mkldnn::algorithm::eltwise_linear, ctx, mkldnn_engine, ctx.GetPlace(), - x); + dnnl::algorithm::eltwise_linear, ctx, mkldnn_engine, ctx.GetPlace(), x); auto src_memory_p = handler.AcquireSrcMemory(x); std::shared_ptr dst_memory_p = nullptr; @@ -51,8 +50,8 @@ class ScaleMKLDNNKernel : public framework::OpKernel { auto activation_p = handler.AcquireForwardPrimitive(); auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream(); - activation_p->execute(astream, {{MKLDNN_ARG_FROM, *src_memory_p}, - {MKLDNN_ARG_TO, *dst_memory_p}}); + activation_p->execute(astream, {{DNNL_ARG_FROM, *src_memory_p}, + {DNNL_ARG_TO, *dst_memory_p}}); astream.wait(); out->set_layout(framework::DataLayout::kMKLDNN); diff --git a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc index c26c017596d..a46d262f599 100644 --- a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc @@ -32,15 +32,15 @@ using platform::to_void_cast; template class SoftmaxMKLDNNHandler - : public platform::MKLDNNHandlerNoCachingT { + : public platform::MKLDNNHandlerNoCachingT { public: - SoftmaxMKLDNNHandler(const mkldnn::engine mkldnn_engine, + SoftmaxMKLDNNHandler(const dnnl::engine mkldnn_engine, platform::Place cpu_place, const Tensor* input, Tensor* output, const int axis) - : platform::MKLDNNHandlerNoCachingT( - mkldnn_engine, cpu_place) { + : platform::MKLDNNHandlerNoCachingT(mkldnn_engine, + cpu_place) { PADDLE_ENFORCE_EQ( input->dims(), output->dims(), platform::errors::InvalidArgument( @@ -55,13 +55,13 @@ class SoftmaxMKLDNNHandler } SoftmaxMKLDNNHandler(const framework::ExecutionContext& ctx, - const mkldnn::engine mkldnn_engine, + const dnnl::engine mkldnn_engine, platform::Place cpu_place, const Tensor* out, const Tensor* out_grad, Tensor* in_x_grad, const std::string& unique_name) - : platform::MKLDNNHandlerNoCachingT( - mkldnn_engine, cpu_place) { + : platform::MKLDNNHandlerNoCachingT(mkldnn_engine, + cpu_place) { PADDLE_ENFORCE_EQ(out_grad->dims(), in_x_grad->dims(), platform::errors::InvalidArgument( "The shape of softmax_grad's input " @@ -154,10 +154,9 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel { auto softmax_bwd_p = handler.AcquireBackwardPrimitive(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); - softmax_bwd_p->execute(astream, - {{MKLDNN_ARG_DST, *dst_memory_p}, - {MKLDNN_ARG_DIFF_DST, *diff_dst_memory_p}, - {MKLDNN_ARG_DIFF_SRC, *diff_src_memory_p}}); + softmax_bwd_p->execute(astream, {{DNNL_ARG_DST, *dst_memory_p}, + {DNNL_ARG_DIFF_DST, *diff_dst_memory_p}, + {DNNL_ARG_DIFF_SRC, *diff_src_memory_p}}); astream.wait(); in_x_grad->set_layout(framework::DataLayout::kMKLDNN); diff --git a/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc index 94cf3747581..ca3aab9afc0 100644 --- a/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc @@ -20,10 +20,10 @@ namespace operators { using framework::DataLayout; using framework::Tensor; using framework::LoDTensor; -using mkldnn::memory; -using mkldnn::primitive; -using mkldnn::concat; -using mkldnn::stream; +using dnnl::memory; +using dnnl::primitive; +using dnnl::concat; +using dnnl::stream; using platform::to_void_cast; template @@ -31,7 +31,7 @@ class StackMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT { public: StackMKLDNNHandler(const framework::ExecutionContext& ctx, - const mkldnn::engine mkldnn_engine, + const dnnl::engine mkldnn_engine, const std::vector& inputs, Tensor* output) : platform::MKLDNNHandlerNoCachingT(mkldnn_engine, ctx.GetPlace()) { @@ -91,7 +91,7 @@ class StackMKLDNNHandler dst_md, stack_axis, srcs_md, this->engine_)); } - std::shared_ptr AcquireSrcMemory(const Tensor& input, int i) { + std::shared_ptr AcquireSrcMemory(const Tensor& input, int i) { const T* input_data = input.data(); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src_desc(i), to_void_cast(input_data)); @@ -122,9 +122,9 @@ class StackMKLDNNOpKernel : public paddle::framework::OpKernel { std::unordered_map args; for (size_t i = 0; i < multi_input.size(); ++i) { srcs.push_back(handler.AcquireSrcMemory(*(multi_input[i]), i)); - args.insert({MKLDNN_ARG_MULTIPLE_SRC + i, *(srcs.at(i))}); + args.insert({DNNL_ARG_MULTIPLE_SRC + i, *(srcs.at(i))}); } - args.insert({MKLDNN_ARG_DST, *dst_mem}); + args.insert({DNNL_ARG_DST, *dst_mem}); concat_p->execute(astream, args); astream.wait(); diff --git a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc index 2760bcecd5b..5a19584ae38 100644 --- a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc @@ -48,7 +48,7 @@ template class SumMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT { public: - SumMKLDNNHandler(mkldnn::engine engine, platform::Place cpu_place, + SumMKLDNNHandler(dnnl::engine engine, platform::Place cpu_place, const std::vector& in_vars, framework::LoDTensor* z) @@ -57,21 +57,21 @@ class SumMKLDNNHandler auto dst_tz = framework::vectorize(z->dims()); auto src_tz = dst_tz; - std::vector srcs_md; + std::vector srcs_md; for (size_t i = 0; i < in_vars.size(); i++) { auto& input_it = in_vars[i]->Get(); if (input_it.numel() == 0) { continue; } MKLDNNMemoryFormat input_format = input_it.format(); - srcs_md.push_back(mkldnn::memory::desc( + srcs_md.push_back(dnnl::memory::desc( src_tz, platform::MKLDNNGetDataType(), input_format)); ++num_inputs_; } std::vector scales(num_inputs_, 1.0); - auto dst_md = mkldnn::memory::desc(dst_tz, platform::MKLDNNGetDataType(), - MKLDNNMemoryFormat::any); + auto dst_md = dnnl::memory::desc(dst_tz, platform::MKLDNNGetDataType(), + MKLDNNMemoryFormat::any); this->AcquireForwardPrimitiveDescriptor(dst_md, scales, srcs_md); } @@ -79,14 +79,14 @@ class SumMKLDNNHandler // (jczaja) sum oneDNN prim is not having .desc attribute so // we cannot use base AcquireForwardPrimitiveDescriptor void AcquireForwardPrimitiveDescriptor( - const mkldnn::memory::desc& dst_md, const std::vector& scales, - const std::vector& srcs_md) { + const dnnl::memory::desc& dst_md, const std::vector& scales, + const std::vector& srcs_md) { this->fwd_pd_.reset( new dnnl::sum::primitive_desc(dst_md, scales, srcs_md, this->engine_)); } - std::shared_ptr AcquireSrcMemory( - const framework::Tensor& input, int i) { + std::shared_ptr AcquireSrcMemory(const framework::Tensor& input, + int i) { const T* input_data = input.data(); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src_desc(i), to_void_cast(input_data)); @@ -94,7 +94,7 @@ class SumMKLDNNHandler using platform::MKLDNNHandlerNoCachingT::AcquireDstMemory; - std::shared_ptr AcquireDstMemory(void) { + std::shared_ptr AcquireDstMemory(void) { return this->AcquireMemoryFromPrimitive(this->fwd_pd_->dst_desc()); } @@ -125,7 +125,7 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel { SumMKLDNNHandler handler(mkldnn_engine, ctx.GetPlace(), in_vars, output); // Create list of SRC MEMs - std::vector> srcs_mem; + std::vector> srcs_mem; srcs_mem.reserve(handler.GetNumInputs()); int input_index = 0; for (size_t i = 0; i < in_vars.size(); i++) { @@ -147,11 +147,11 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel { auto sum_p = handler.AcquireForwardPrimitive(); - std::unordered_map args; + std::unordered_map args; for (size_t i = 0; i < srcs_mem.size(); ++i) { - args.insert({MKLDNN_ARG_MULTIPLE_SRC + i, *(srcs_mem[i])}); + args.insert({DNNL_ARG_MULTIPLE_SRC + i, *(srcs_mem[i])}); } - args.insert({MKLDNN_ARG_DST, *dst_mem}); + args.insert({DNNL_ARG_DST, *dst_mem}); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); sum_p->execute(astream, args); diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index cc3aab3ecdb..6276388c848 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -589,7 +589,7 @@ MKLDNNDeviceContext::MKLDNNDeviceContext(CPUPlace place) } MKLDNNDeviceContextThreadLocals::Body::Body() - : cur_engine(mkldnn::engine::kind::cpu, 0), cur_stream(cur_engine) { + : cur_engine(dnnl::engine::kind::cpu, 0), cur_stream(cur_engine) { cur_mkldnn_session_id = kMKLDNNSessionID_Default; cur_input_shape_str = ""; cur_input_shape_cache_capacity = 1; @@ -647,11 +647,11 @@ void MKLDNNDeviceContextThreadLocals::Body::log_lib_version(void) { } } -const mkldnn::engine& MKLDNNDeviceContextThreadLocals::Body::get_engine(void) { +const dnnl::engine& MKLDNNDeviceContextThreadLocals::Body::get_engine(void) { return cur_engine; } -mkldnn::stream& MKLDNNDeviceContextThreadLocals::Body::get_stream(void) { +dnnl::stream& MKLDNNDeviceContextThreadLocals::Body::get_stream(void) { return cur_stream; } diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 6ffc3bef743..d2f9ddbd783 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -46,8 +46,9 @@ limitations under the License. */ #endif #ifdef PADDLE_WITH_MKLDNN -#include "mkldnn.hpp" +#include "dnnl.hpp" #include "paddle/fluid/framework/data_layout.h" +namespace mkldnn = dnnl; #endif #include @@ -63,6 +64,10 @@ limitations under the License. */ #endif #include "unsupported/Eigen/CXX11/Tensor" +// This aias is required for now so that namespace name changes can be made to +// less than 20 files at a time. After all the names are changed it will be +// removed. + namespace Eigen { struct DefaultDevice; struct GpuDevice; @@ -706,8 +711,8 @@ class MKLDNNDeviceContextThreadLocals { // know for converting MKL-DNN Tensor to non MKL-DNN paddle::framework::DataLayout cur_paddle_data_layout; // MKL-DNN stream used for execution of primitives (per-thread) - mkldnn::engine cur_engine; - mkldnn::stream cur_stream; + dnnl::engine cur_engine; + dnnl::stream cur_stream; std::string key_suffix; // Key identifying current Executor bool key_attach_thread_id = true; void* exec_ptr_ = nullptr; @@ -721,8 +726,8 @@ class MKLDNNDeviceContextThreadLocals { void set_cur_paddle_data_layout(framework::DataLayout dl); framework::DataLayout get_cur_paddle_data_layout(void); void log_lib_version(void); - const mkldnn::engine& get_engine(void); - mkldnn::stream& get_stream(void); + const dnnl::engine& get_engine(void); + dnnl::stream& get_stream(void); void set_key_suffix(const std::string& suffix) { key_suffix = suffix; } const std::string& get_key_suffix(void) const { return key_suffix; } void disable_tid_in_key(void) { key_attach_thread_id = false; } @@ -776,7 +781,7 @@ class MKLDNNDeviceContext : public CPUDeviceContext { explicit MKLDNNDeviceContext(CPUPlace place); /* \brief Get the active engine */ - const mkldnn::engine& GetEngine() const { return tls().get_engine(); } + const dnnl::engine& GetEngine() const { return tls().get_engine(); } // Register object to currently used executor's map void LinkEntryWithExecutor(BlobPtr_t, KeyBlob::iterator) const; diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index 123ea5f1ef3..b98ca33285a 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -334,34 +334,34 @@ inline dnnl::memory::format_tag GetMKLDNNFormat(const dnnl::memory memory) { return GetMKLDNNFormat(mem_desc); } -inline mkldnn::memory::format_tag GetPlainMKLDNNFormat(int tensor_rank) { +inline dnnl::memory::format_tag GetPlainMKLDNNFormat(int tensor_rank) { switch (tensor_rank) { case 1: - return mkldnn::memory::format_tag::a; + return dnnl::memory::format_tag::a; break; case 2: - return mkldnn::memory::format_tag::ab; + return dnnl::memory::format_tag::ab; break; case 3: - return mkldnn::memory::format_tag::abc; + return dnnl::memory::format_tag::abc; break; case 4: - return mkldnn::memory::format_tag::abcd; + return dnnl::memory::format_tag::abcd; break; case 5: - return mkldnn::memory::format_tag::abcde; + return dnnl::memory::format_tag::abcde; break; case 6: - return mkldnn::memory::format_tag::abcdef; + return dnnl::memory::format_tag::abcdef; break; case 7: - return mkldnn::memory::format_tag::abcdefg; + return dnnl::memory::format_tag::abcdefg; break; case 8: - return mkldnn::memory::format_tag::abcdefgh; + return dnnl::memory::format_tag::abcdefgh; break; case 9: - return mkldnn::memory::format_tag::abcdefghi; + return dnnl::memory::format_tag::abcdefghi; break; default: PADDLE_THROW(platform::errors::Unimplemented( -- GitLab