Changed first batch of deprecated mkldnn headers and function names to new oneDNN names (#37040)

* Change first batch of mkldnn headers and namespace names to dnnl * Revert changes to tensor.h, which require approval * Format changes with pre-commit * Add int32 tests * Fix int32 tests and call GetDataFromTensor for int32 * Fix test

Changed first batch of deprecated mkldnn headers and function names to new oneDNN names (#37040)
* Change first batch of mkldnn headers and namespace names to dnnl * Revert changes to tensor.h, which require approval * Format changes with pre-commit * Add int32 tests * Fix int32 tests and call GetDataFromTensor for int32 * Fix test
ce3ee9bb · piotrekobiIntel · GitHub · 9c5d5665 · ce3ee9bb · ce3ee9bb
13 changed file
--- a/paddle/fluid/framework/data_layout_transform.cc
+++ b/paddle/fluid/framework/data_layout_transform.cc
@@ -100,21 +100,21 @@ void TransDataLayout(const OpKernelType& kernel_type_for_var,
 }

 #ifdef PADDLE_WITH_MKLDNN
-using mkldnn::memory;
-using mkldnn::primitive;
-using mkldnn::reorder;
+using dnnl::memory;
+using dnnl::primitive;
+using dnnl::reorder;

-void* GetDataFromTensor(const Tensor& tensor, mkldnn::memory::data_type type) {
+void* GetDataFromTensor(const Tensor& tensor, dnnl::memory::data_type type) {
  switch (type) {
-    case mkldnn::memory::data_type::f32:
+    case dnnl::memory::data_type::f32:
      return platform::to_void_cast(tensor.data<float>());
-    case mkldnn::memory::data_type::s8:
+    case dnnl::memory::data_type::s8:
      return platform::to_void_cast(tensor.data<int8_t>());
-    case mkldnn::memory::data_type::u8:
+    case dnnl::memory::data_type::u8:
      return platform::to_void_cast(tensor.data<unsigned char>());
-    case mkldnn::memory::data_type::s32:
+    case dnnl::memory::data_type::s32:
      return platform::to_void_cast(tensor.data<int32_t>());
-    case mkldnn::memory::data_type::bf16:
+    case dnnl::memory::data_type::bf16:
      return platform::to_void_cast(tensor.data<paddle::platform::bfloat16>());
    default:
      PADDLE_THROW(

--- a/paddle/fluid/framework/data_layout_transform.h
+++ b/paddle/fluid/framework/data_layout_transform.h
@@ -37,7 +37,7 @@ namespace paddle {
 namespace framework {

 #ifdef PADDLE_WITH_MKLDNN
-using MKLDNNDataType = mkldnn::memory::data_type;
+using MKLDNNDataType = dnnl::memory::data_type;

 inline MKLDNNMemoryFormat ToMKLDNNFormat(const DataLayout& layout) {
  switch (layout) {

--- a/paddle/fluid/framework/data_layout_transform_test.cc
+++ b/paddle/fluid/framework/data_layout_transform_test.cc
@@ -44,7 +44,7 @@ TEST(DataTransform, DataLayoutFunction) {
 }

 #ifdef PADDLE_WITH_MKLDNN
-TEST(DataTransform, GetDataFromTensorDNNL) {
+TEST(DataTransformBf16, GetDataFromTensorDNNL) {
  auto place = paddle::platform::CPUPlace();
  paddle::framework::Tensor in = paddle::framework::Tensor();
  in.mutable_data<paddle::platform::bfloat16>(
@@ -55,4 +55,14 @@ TEST(DataTransform, GetDataFromTensorDNNL) {
  EXPECT_EQ(in_data, paddle::platform::to_void_cast(
                         in.data<paddle::platform::bfloat16>()));
 }
+
+TEST(DataTransformInt32, GetDataFromTensorDNNL) {
+  auto place = paddle::platform::CPUPlace();
+  paddle::framework::Tensor in = paddle::framework::Tensor();
+  in.mutable_data<int32_t>(paddle::framework::make_ddim({2, 3, 1, 2}), place);
+
+  void* in_data =
+      paddle::framework::GetDataFromTensor(in, dnnl::memory::data_type::s32);
+  EXPECT_EQ(in_data, paddle::platform::to_void_cast(in.data<int32_t>()));
+}
 #endif
--- a/paddle/fluid/framework/data_type_transform_test.cc
+++ b/paddle/fluid/framework/data_type_transform_test.cc
@@ -310,4 +310,117 @@ TEST(DataTypeTransform, CPUTransform) {
                static_cast<paddle::platform::bfloat16>(in_data_bool[i]).x);
    }
  }
+
+  // data type transform from/to int32
+  {
+    paddle::framework::Tensor in;
+    paddle::framework::Tensor out;
+
+    int32_t* ptr =
+        in.mutable_data<int32_t>(paddle::framework::make_ddim({2, 3}), place);
+    int data_number = 2 * 3;
+
+    for (int i = 0; i < data_number; ++i) {
+      ptr[i] = i;
+    }
+
+    // transform from int32 to other data types
+    paddle::framework::TransDataType(kernel_int32, kernel_fp32, in, &out);
+    float* out_data_float = out.data<float>();
+    for (int i = 0; i < data_number; ++i) {
+      EXPECT_EQ(out_data_float[i], static_cast<float>(ptr[i]));
+    }
+
+    paddle::framework::TransDataType(kernel_int32, kernel_fp64, in, &out);
+    double* out_data_double = out.data<double>();
+    for (int i = 0; i < data_number; ++i) {
+      EXPECT_EQ(out_data_double[i], static_cast<double>(ptr[i]));
+    }
+
+    paddle::framework::TransDataType(kernel_int32, kernel_bf16, in, &out);
+    paddle::platform::bfloat16* out_data_bf16 =
+        out.data<paddle::platform::bfloat16>();
+    for (int i = 0; i < data_number; ++i) {
+      EXPECT_EQ(out_data_bf16[i],
+                static_cast<paddle::platform::bfloat16>(ptr[i]));
+    }
+
+    paddle::framework::TransDataType(kernel_int32, kernel_int64, in, &out);
+    int64_t* out_data_int64 = out.data<int64_t>();
+    for (int i = 0; i < data_number; ++i) {
+      EXPECT_EQ(out_data_int64[i], static_cast<int64_t>(ptr[i]));
+    }
+
+    paddle::framework::TransDataType(kernel_int32, kernel_bool, in, &out);
+    bool* out_data_bool = out.data<bool>();
+    for (int i = 0; i < data_number; ++i) {
+      EXPECT_EQ(out_data_bool[i], static_cast<bool>(ptr[i]));
+    }
+
+    // transform float to int32
+    float* in_data_float =
+        in.mutable_data<float>(paddle::framework::make_ddim({2, 3}), place);
+    for (int i = 0; i < data_number; ++i) {
+      in_data_float[i] = i;
+    }
+
+    paddle::framework::TransDataType(kernel_fp32, kernel_int32, in, &out);
+    ptr = out.data<int32_t>();
+    for (int i = 0; i < data_number; ++i) {
+      EXPECT_EQ(ptr[i], static_cast<int32_t>(in_data_float[i]));
+    }
+
+    // transform double to int32
+    double* in_data_double =
+        in.mutable_data<double>(paddle::framework::make_ddim({2, 3}), place);
+    for (int i = 0; i < data_number; ++i) {
+      in_data_double[i] = i;
+    }
+
+    paddle::framework::TransDataType(kernel_fp64, kernel_int32, in, &out);
+    ptr = out.data<int32_t>();
+    for (int i = 0; i < data_number; ++i) {
+      EXPECT_EQ(ptr[i], static_cast<int32_t>(in_data_double[i]));
+    }
+
+    // transform bfloat16 to int32
+    paddle::platform::bfloat16* in_data_bf16 =
+        in.mutable_data<paddle::platform::bfloat16>(
+            paddle::framework::make_ddim({2, 3}), place);
+    for (int i = 0; i < data_number; ++i) {
+      in_data_bf16[i] = i;
+    }
+
+    paddle::framework::TransDataType(kernel_bf16, kernel_int32, in, &out);
+    ptr = out.data<int32_t>();
+    for (int i = 0; i < data_number; ++i) {
+      EXPECT_EQ(ptr[i], static_cast<int32_t>(in_data_bf16[i]));
+    }
+
+    // transform int64 to int32
+    int64_t* in_data_int64 =
+        in.mutable_data<int64_t>(paddle::framework::make_ddim({2, 3}), place);
+    for (int i = 0; i < data_number; ++i) {
+      in_data_int64[i] = i;
+    }
+
+    paddle::framework::TransDataType(kernel_int64, kernel_int32, in, &out);
+    ptr = out.data<int32_t>();
+    for (int i = 0; i < data_number; ++i) {
+      EXPECT_EQ(ptr[i], static_cast<int32_t>(in_data_int64[i]));
+    }
+
+    // transform bool to int32
+    bool* in_data_bool =
+        in.mutable_data<bool>(paddle::framework::make_ddim({2, 3}), place);
+    for (int i = 0; i < data_number; ++i) {
+      in_data_bool[i] = i;
+    }
+
+    paddle::framework::TransDataType(kernel_bool, kernel_int32, in, &out);
+    ptr = out.data<int32_t>();
+    for (int i = 0; i < data_number; ++i) {
+      EXPECT_EQ(ptr[i], static_cast<int32_t>(in_data_bool[i]));
+    }
+  }
 }
--- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
+++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
@@ -26,9 +26,9 @@ namespace operators {

 using framework::DataLayout;
 using framework::Tensor;
-using mkldnn::memory;
-using mkldnn::primitive;
-using mkldnn::stream;
+using dnnl::memory;
+using dnnl::primitive;
+using dnnl::stream;

 template <typename T, dnnl::algorithm BINARY_OP>
 class EltwiseMKLDNNKernel : public framework::OpKernel<T> {

--- a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc
+++ b/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc
@@ -31,12 +31,11 @@ class LSTMMKLDNNHandler
 public:
  LSTMMKLDNNHandler(const paddle::framework::ExecutionContext& ctx,
                    const platform::MKLDNNDeviceContext& dev_ctx,
-                    const mkldnn::engine mkldnn_engine,
-                    platform::Place cpu_place, const LoDTensor* input,
-                    const Tensor* weight_h, const Tensor* h0, const Tensor* c0,
-                    const bool is_reverse, const int64_t N, const int64_t Ti,
-                    const int64_t IC, const int64_t OC,
-                    const std::string& unique_name)
+                    const dnnl::engine mkldnn_engine, platform::Place cpu_place,
+                    const LoDTensor* input, const Tensor* weight_h,
+                    const Tensor* h0, const Tensor* c0, const bool is_reverse,
+                    const int64_t N, const int64_t Ti, const int64_t IC,
+                    const int64_t OC, const std::string& unique_name)
      : RNNMKLDNNHandler<T, dnnl::lstm_forward, T_out>(
            ctx, dev_ctx, mkldnn_engine, ctx.GetPlace(), input, weight_h, h0,
            is_reverse, N, Ti, IC, OC, 4,

--- a/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h
+++ b/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h
@@ -30,12 +30,11 @@ class RNNMKLDNNHandler : public platform::MKLDNNHandlerT<T, T_alg> {
 public:
  RNNMKLDNNHandler(const paddle::framework::ExecutionContext& ctx,
                   const platform::MKLDNNDeviceContext& dev_ctx,
-                   const mkldnn::engine mkldnn_engine,
-                   platform::Place cpu_place, const LoDTensor* input,
-                   const Tensor* weight_h, const Tensor* h0,
-                   const bool is_reverse, const int64_t N, const int64_t Ti,
-                   const int64_t IC, const int64_t OC, const int64_t G,
-                   const std::string& unique_name)
+                   const dnnl::engine mkldnn_engine, platform::Place cpu_place,
+                   const LoDTensor* input, const Tensor* weight_h,
+                   const Tensor* h0, const bool is_reverse, const int64_t N,
+                   const int64_t Ti, const int64_t IC, const int64_t OC,
+                   const int64_t G, const std::string& unique_name)
      : platform::MKLDNNHandlerT<T, T_alg>(
            dev_ctx, dev_ctx.GetEngine(), cpu_place,
            CreateKey(dev_ctx, unique_name, MKLDNNGetDataType<T>(), Ti)),

--- a/paddle/fluid/operators/mkldnn/axpy_handler.cc
+++ b/paddle/fluid/operators/mkldnn/axpy_handler.cc
@@ -17,7 +17,7 @@ limitations under the License. */
 #include <string>
 #include <vector>

-#include "mkldnn.hpp"
+#include "dnnl.hpp"
 #include "paddle/fluid/operators/mkldnn/axpy_handler.h"
 #include "paddle/fluid/platform/bfloat16.h"
 #include "paddle/fluid/platform/device_context.h"

--- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "mkldnn.hpp"
+#include "dnnl.hpp"
 #include "paddle/fluid/framework/data_layout_transform.h"
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/operators/dequantize_op.h"
@@ -23,13 +23,13 @@ limitations under the License. */
 namespace paddle {
 namespace operators {

-using mkldnn::memory;
-using mkldnn::primitive;
-using mkldnn::reorder;
+using dnnl::memory;
+using dnnl::primitive;
+using dnnl::reorder;
 using platform::to_void_cast;
 using Tensor = framework::Tensor;
 using framework::DataLayout;
-using mkldnn::stream;
+using dnnl::stream;
 using platform::GetMKLDNNFormat;

 template <typename T>
@@ -64,7 +64,7 @@ class DeQuantOpKernel : public framework::OpKernel<T> {

    auto src_tz = paddle::framework::vectorize<int64_t>(input->dims());
    auto dst_tz = paddle::framework::vectorize<int64_t>(output->dims());
-    mkldnn::memory::data_type src_dt =
+    dnnl::memory::data_type src_dt =
        paddle::framework::ToMKLDNNDataType(input->type());
    MKLDNNMemoryFormat src_fmt = input->format();

@@ -76,34 +76,34 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
    const std::string key_src_mem = key + "@s";
    const std::string key_dst_mem = key + "@d";

-    std::shared_ptr<mkldnn::memory> src_memory;
-    std::shared_ptr<mkldnn::memory> dst_memory;
+    std::shared_ptr<dnnl::memory> src_memory;
+    std::shared_ptr<dnnl::memory> dst_memory;
    std::shared_ptr<reorder> reorder_p;
    reorder_p = std::static_pointer_cast<reorder>(dev_ctx.GetBlob(key_prim));

    if (reorder_p == nullptr) {
-      mkldnn::primitive_attr attri;
+      dnnl::primitive_attr attri;
      int mask = 0;
      float reorder_scale = 1. / scale_data;
      attri.set_output_scales(mask, {reorder_scale});

      if (with_shift) {
-        mkldnn::post_ops post_operations;
+        dnnl::post_ops post_operations;
        post_operations.append_sum();
        attri.set_post_ops(post_operations);
        std::fill(output_data, output_data + output->numel(), reorder_shift);
      }

      auto src_md = platform::MKLDNNMemDesc({src_tz}, src_dt, src_fmt);
-      src_memory = std::make_shared<mkldnn::memory>(
-          src_md, engine, to_void_cast<T>(input_data));
+      src_memory = std::make_shared<dnnl::memory>(src_md, engine,
+                                                  to_void_cast<T>(input_data));

      auto dst_md =
          platform::MKLDNNMemDesc({dst_tz}, memory::data_type::f32,
                                  platform::MKLDNNFormatForSize(
                                      dst_tz.size(), MKLDNNMemoryFormat::nchw));

-      dst_memory = std::make_shared<mkldnn::memory>(
+      dst_memory = std::make_shared<dnnl::memory>(
          dst_md, engine, to_void_cast<float>(output_data));

      auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
@@ -113,12 +113,12 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
      dev_ctx.SetBlob(key_src_mem, src_memory);
      dev_ctx.SetBlob(key_dst_mem, dst_memory);
    } else {
-      src_memory = std::static_pointer_cast<mkldnn::memory>(
-          dev_ctx.GetBlob(key_src_mem));
+      src_memory =
+          std::static_pointer_cast<dnnl::memory>(dev_ctx.GetBlob(key_src_mem));
      src_memory->set_data_handle(to_void_cast<T>(input_data));

-      dst_memory = std::static_pointer_cast<mkldnn::memory>(
-          dev_ctx.GetBlob(key_dst_mem));
+      dst_memory =
+          std::static_pointer_cast<dnnl::memory>(dev_ctx.GetBlob(key_dst_mem));
      if (with_shift)
        std::fill(output_data, output_data + output->numel(), reorder_shift);
      dst_memory->set_data_handle(output->mutable_data<float>(ctx.GetPlace()));

--- a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "mkldnn.hpp"
+#include "dnnl.hpp"
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/operators/quantize_op.h"
 #include "paddle/fluid/platform/mkldnn_helper.h"
@@ -21,13 +21,13 @@ limitations under the License. */
 namespace paddle {
 namespace operators {

-using mkldnn::memory;
-using mkldnn::primitive;
-using mkldnn::reorder;
+using dnnl::memory;
+using dnnl::primitive;
+using dnnl::reorder;
 using platform::to_void_cast;
 using Tensor = framework::Tensor;
 using framework::DataLayout;
-using mkldnn::stream;
+using dnnl::stream;
 using platform::GetMKLDNNFormat;

 template <typename T>
@@ -65,19 +65,19 @@ class QuantOpKernel : public framework::OpKernel<T> {
    bool bfloat16 = ctx.Attr<bool>("bfloat16");

    // TODO(jczaja): Refactor with Acquire API
-    std::shared_ptr<mkldnn::memory> src_memory;
-    std::shared_ptr<mkldnn::memory> dst_memory;
+    std::shared_ptr<dnnl::memory> src_memory;
+    std::shared_ptr<dnnl::memory> dst_memory;
    std::shared_ptr<reorder> reorder_p;

    std::string out_layout = ctx.Attr<std::string>("output_format");
    MKLDNNMemoryFormat out_format =
        platform::data_format_to_memory_format(out_layout);
-    mkldnn::primitive_attr attri;
+    dnnl::primitive_attr attri;
    int mask = 0;
    attri.set_output_scales(mask, {scale_data});

    if (with_shift) {
-      mkldnn::post_ops post_operations;
+      dnnl::post_ops post_operations;
      post_operations.append_sum();
      attri.set_post_ops(post_operations);
      uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace());
@@ -87,10 +87,10 @@ class QuantOpKernel : public framework::OpKernel<T> {

    auto src_md = platform::MKLDNNMemDesc({src_tz}, memory::data_type::f32,
                                          input->format());
-    src_memory = std::make_shared<mkldnn::memory>(src_md, engine,
-                                                  to_void_cast<T>(input_data));
+    src_memory = std::make_shared<dnnl::memory>(src_md, engine,
+                                                to_void_cast<T>(input_data));

-    std::shared_ptr<mkldnn::memory::desc> dst_md;
+    std::shared_ptr<dnnl::memory::desc> dst_md;
    if (bfloat16) {
      platform::SetDstMemoryQuantized<paddle::platform::bfloat16>(
          ctx, output, dst_tz, engine, dst_md, dst_memory, out_format);

--- a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "mkldnn.hpp"
+#include "dnnl.hpp"
 #include "paddle/fluid/framework/data_layout_transform.h"
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/operators/requantize_op.h"
@@ -93,7 +93,7 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
      int mask = 0;
      attri.set_output_scales(mask, {reorder_scale});
      if (with_shift) {
-        mkldnn::post_ops post_operations;
+        dnnl::post_ops post_operations;
        post_operations.append_sum();
        attri.set_post_ops(post_operations);
        uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace());

--- a/paddle/fluid/platform/mkldnn_helper.h
+++ b/paddle/fluid/platform/mkldnn_helper.h
@@ -20,22 +20,22 @@ limitations under the License. */
 #include <string>
 #include <utility>
 #include <vector>
-#include "mkldnn.hpp"
+#include "dnnl.hpp"
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/platform/place.h"
 #include "paddle/fluid/platform/profiler.h"
 namespace paddle {
 #ifdef PADDLE_WITH_MKLDNN
-using MKLDNNMemoryFormat = mkldnn::memory::format_tag;
+using MKLDNNMemoryFormat = dnnl::memory::format_tag;
 #endif
 namespace platform {

-using MKLDNNStream = mkldnn::stream;
-using MKLDNNEngine = mkldnn::engine;
-using MKLDNNMemory = mkldnn::memory;
-using MKLDNNMemoryDescriptor = mkldnn::memory::desc;
-using MKLDNNPrimitive = mkldnn::primitive;
-using MKLDNNPrimitiveDesc = mkldnn::handle<mkldnn_primitive_desc_t>;
+using MKLDNNStream = dnnl::stream;
+using MKLDNNEngine = dnnl::engine;
+using MKLDNNMemory = dnnl::memory;
+using MKLDNNMemoryDescriptor = dnnl::memory::desc;
+using MKLDNNPrimitive = dnnl::primitive;
+using MKLDNNPrimitiveDesc = dnnl::handle<dnnl_primitive_desc_t>;

 typedef std::unique_ptr<MKLDNNStream> MKLDNNStreamPtr;
 typedef std::unique_ptr<MKLDNNEngine> MKLDNNEnginePtr;
@@ -62,7 +62,7 @@ using tf_pd = typename Type::primitive_desc;
 template <typename Type, typename Engine, typename... Args>
 std::shared_ptr<tf_pd<Type>> MKLDNNFwdPrimitiveDesc(const Engine& e,
                                                    Args&&... args) {
-  auto desc = tf_desc<Type>(mkldnn::prop_kind::forward, (args)...);
+  auto desc = tf_desc<Type>(dnnl::prop_kind::forward, (args)...);
  auto pd = new tf_pd<Type>(desc, e);
  return std::shared_ptr<tf_pd<Type>>(pd);
 }
@@ -129,10 +129,10 @@ struct mkldnn_dummy_primitive {
  struct desc {};
 };

-inline mkldnn::memory::desc MKLDNNMemDesc(const std::vector<int64_t>& dims,
-                                          mkldnn::memory::data_type data_type,
-                                          MKLDNNMemoryFormat format) {
-  return mkldnn::memory::desc({dims}, data_type, format);
+inline dnnl::memory::desc MKLDNNMemDesc(const std::vector<int64_t>& dims,
+                                        dnnl::memory::data_type data_type,
+                                        MKLDNNMemoryFormat format) {
+  return dnnl::memory::desc({dims}, data_type, format);
 }

 inline void ClearMKLDNNCache(const platform::Place& place,
@@ -159,36 +159,35 @@ inline void DontClearMKLDNNCache(const platform::Place& place) {
 }

 template <typename Type>
-mkldnn::memory::data_type MKLDNNGetDataType() {
-  return mkldnn::memory::data_type::undef;
+dnnl::memory::data_type MKLDNNGetDataType() {
+  return dnnl::memory::data_type::undef;
 }

 template <>
-inline mkldnn::memory::data_type MKLDNNGetDataType<float>() {
-  return mkldnn::memory::data_type::f32;
+inline dnnl::memory::data_type MKLDNNGetDataType<float>() {
+  return dnnl::memory::data_type::f32;
 }
 template <>
-inline mkldnn::memory::data_type MKLDNNGetDataType<int32_t>() {
-  return mkldnn::memory::data_type::s32;
+inline dnnl::memory::data_type MKLDNNGetDataType<int32_t>() {
+  return dnnl::memory::data_type::s32;
 }
 template <>
-inline mkldnn::memory::data_type MKLDNNGetDataType<int8_t>() {
-  return mkldnn::memory::data_type::s8;
+inline dnnl::memory::data_type MKLDNNGetDataType<int8_t>() {
+  return dnnl::memory::data_type::s8;
 }
 template <>
-inline mkldnn::memory::data_type MKLDNNGetDataType<uint8_t>() {
-  return mkldnn::memory::data_type::u8;
+inline dnnl::memory::data_type MKLDNNGetDataType<uint8_t>() {
+  return dnnl::memory::data_type::u8;
 }

 template <>
-inline mkldnn::memory::data_type
-MKLDNNGetDataType<paddle::platform::bfloat16>() {
-  return mkldnn::memory::data_type::bf16;
+inline dnnl::memory::data_type MKLDNNGetDataType<paddle::platform::bfloat16>() {
+  return dnnl::memory::data_type::bf16;
 }

-inline void Reorder(mkldnn::memory src, mkldnn::memory dst,
-                    const mkldnn::engine& engine) {
-  auto reorder_prim = mkldnn::reorder(src, dst);
+inline void Reorder(dnnl::memory src, dnnl::memory dst,
+                    const dnnl::engine& engine) {
+  auto reorder_prim = dnnl::reorder(src, dst);
  auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
  platform::RecordEvent record_reorder("int_reorder",
                                       platform::EventRole::kUniqueOp);
@@ -196,8 +195,7 @@ inline void Reorder(mkldnn::memory src, mkldnn::memory dst,
  astream.wait();
 }

-inline mkldnn::memory::format_tag GetMKLDNNFormat(
-    mkldnn::memory::desc mem_desc) {
+inline dnnl::memory::format_tag GetMKLDNNFormat(dnnl::memory::desc mem_desc) {
  auto ndims = mem_desc.data.ndims;
  auto strides = mem_desc.data.format_desc.blocking.strides;
  auto inner_nblks = mem_desc.data.format_desc.blocking.inner_nblks;
@@ -205,62 +203,62 @@ inline mkldnn::memory::format_tag GetMKLDNNFormat(
  auto inner_idxs = mem_desc.data.format_desc.blocking.inner_idxs;

  if (ndims == 1) {
-    return mkldnn::memory::format_tag::x;
+    return dnnl::memory::format_tag::x;
  } else if (ndims == 2) {
    if (inner_nblks == 0) {
      if (strides[0] >= strides[1]) {
-        return mkldnn::memory::format_tag::nc;
+        return dnnl::memory::format_tag::nc;
      } else {
-        return mkldnn::memory::format_tag::cn;
+        return dnnl::memory::format_tag::cn;
      }
    }
  } else if (ndims == 3) {
    if (inner_nblks == 0) {
      if (strides[0] >= strides[1] && strides[1] >= strides[2]) {
-        return mkldnn::memory::format_tag::ncw;
+        return dnnl::memory::format_tag::ncw;
      } else if (strides[1] >= strides[0] && strides[0] >= strides[2]) {
-        return mkldnn::memory::format_tag::ntc;
+        return dnnl::memory::format_tag::ntc;
      } else {
-        return mkldnn::memory::format_tag::nwc;
+        return dnnl::memory::format_tag::nwc;
      }
    }
  } else if (ndims == 4) {
    if (inner_nblks == 0) {
      if (strides[0] >= strides[1] && strides[1] >= strides[2] &&
          strides[2] >= strides[3]) {
-        return mkldnn::memory::format_tag::nchw;
+        return dnnl::memory::format_tag::nchw;
      } else if (strides[2] >= strides[3] && strides[3] >= strides[1] &&
                 strides[1] >= strides[0]) {
-        return mkldnn::memory::format_tag::cdba;
+        return dnnl::memory::format_tag::cdba;
      } else {
-        return mkldnn::memory::format_tag::nhwc;
+        return dnnl::memory::format_tag::nhwc;
      }
    } else if (inner_nblks == 1) {
      if (inner_blks[0] == 16 && inner_idxs[0] == 1) {
-        return mkldnn::memory::format_tag::nChw16c;
+        return dnnl::memory::format_tag::nChw16c;
      } else if (inner_blks[0] == 8 && inner_idxs[0] == 1) {
-        return mkldnn::memory::format_tag::nChw8c;
+        return dnnl::memory::format_tag::nChw8c;
      } else if (inner_blks[0] == 8 && inner_idxs[0] == 0) {
        if (strides[0] >= strides[2] && strides[2] >= strides[3] &&
            strides[3] >= strides[1]) {
-          return mkldnn::memory::format_tag::Acdb8a;
+          return dnnl::memory::format_tag::Acdb8a;
        }
      } else if (inner_blks[0] == 4 && inner_idxs[0] == 1) {
-        return mkldnn::memory::format_tag::nChw4c;
+        return dnnl::memory::format_tag::nChw4c;
      } else if (inner_blks[0] == 16 && inner_idxs[0] == 0) {
        if (strides[0] >= strides[2] && strides[2] >= strides[3] &&
            strides[3] >= strides[1]) {
-          return mkldnn::memory::format_tag::Acdb16a;
+          return dnnl::memory::format_tag::Acdb16a;
        }
      }
    } else if (inner_nblks == 2) {
      if (inner_blks[0] == 16 && inner_blks[1] == 16) {
        if (inner_idxs[0] == 1 && inner_idxs[1] == 0) {
-          return mkldnn::memory::format_tag::OIhw16i16o;
+          return dnnl::memory::format_tag::OIhw16i16o;
        }
      } else if (inner_blks[0] == 8 && inner_blks[1] == 8) {
        if (inner_idxs[0] == 1 && inner_idxs[1] == 0) {
-          return mkldnn::memory::format_tag::OIhw8i8o;
+          return dnnl::memory::format_tag::OIhw8i8o;
        }
      }
    }
@@ -268,38 +266,38 @@ inline mkldnn::memory::format_tag GetMKLDNNFormat(
    if (inner_nblks == 0) {
      if (strides[0] >= strides[1] && strides[1] >= strides[2] &&
          strides[2] >= strides[3] && strides[3] >= strides[4]) {
-        return mkldnn::memory::format_tag::ncdhw;
+        return dnnl::memory::format_tag::ncdhw;
      } else {
-        return mkldnn::memory::format_tag::ndhwc;
+        return dnnl::memory::format_tag::ndhwc;
      }
    } else if (inner_nblks == 1) {
      if (inner_blks[0] == 8 && inner_idxs[0] == 0) {
        if (strides[0] >= strides[2] && strides[2] >= strides[3] &&
            strides[3] >= strides[4] && strides[4] >= strides[1]) {
-          return mkldnn::memory::format_tag::Acdeb8a;
+          return dnnl::memory::format_tag::Acdeb8a;
        }
        if (strides[0] >= strides[1] && strides[1] >= strides[2] &&
            strides[2] >= strides[3] && strides[3] >= strides[4]) {
-          return mkldnn::memory::format_tag::Abcde8a;
+          return dnnl::memory::format_tag::Abcde8a;
        }
      } else if (inner_blks[0] == 8 && inner_idxs[0] == 1) {
        if (strides[0] >= strides[1] && strides[1] >= strides[2] &&
            strides[2] >= strides[3] && strides[3] >= strides[4]) {
-          return mkldnn::memory::format_tag::aBcde8b;
+          return dnnl::memory::format_tag::aBcde8b;
        }
      } else if (inner_blks[0] == 16 && inner_idxs[0] == 0) {
        if (strides[0] >= strides[2] && strides[2] >= strides[3] &&
            strides[3] >= strides[4] && strides[4] >= strides[1]) {
-          return mkldnn::memory::format_tag::Acdeb16a;
+          return dnnl::memory::format_tag::Acdeb16a;
        }
        if (strides[0] >= strides[1] && strides[1] >= strides[2] &&
            strides[2] >= strides[3] && strides[3] >= strides[4]) {
-          return mkldnn::memory::format_tag::Abcde16a;
+          return dnnl::memory::format_tag::Abcde16a;
        }
      } else if (inner_blks[0] == 16 && inner_idxs[0] == 1) {
        if (strides[0] >= strides[1] && strides[1] >= strides[2] &&
            strides[2] >= strides[3] && strides[3] >= strides[4]) {
-          return mkldnn::memory::format_tag::aBcde16b;
+          return dnnl::memory::format_tag::aBcde16b;
        }
      }
    }
@@ -308,7 +306,7 @@ inline mkldnn::memory::format_tag GetMKLDNNFormat(
      if (strides[0] >= strides[1] && strides[1] >= strides[2] &&
          strides[2] >= strides[3] && strides[3] >= strides[4] &&
          strides[4] >= strides[5]) {
-        return mkldnn::memory::format_tag::abcdef;
+        return dnnl::memory::format_tag::abcdef;
      }
    }
  }
@@ -325,10 +323,10 @@ inline mkldnn::memory::format_tag GetMKLDNNFormat(
  // for (int i=0;i<inner_nblks;++i) {
  //   std::cout<<"INNER_IDXS["<<i<<"]: "<<inner_idxs[i]<<std::endl;
  // }
-  return mkldnn::memory::format_tag::undef;
+  return dnnl::memory::format_tag::undef;
 }

-inline mkldnn::memory::format_tag GetMKLDNNFormat(const mkldnn::memory memory) {
+inline dnnl::memory::format_tag GetMKLDNNFormat(const dnnl::memory memory) {
  auto mem_desc = memory.get_desc();
  return GetMKLDNNFormat(mem_desc);
 }
@@ -441,24 +439,24 @@ inline void AppendKey(std::string* key, const T& num) {

 template <>
 inline void AppendKey(std::string* key,
-                      const mkldnn::memory::format_tag& format) {
+                      const dnnl::memory::format_tag& format) {
  key->append(std::to_string(static_cast<int>(format)));
 }

 template <>
 inline void AppendKey(std::string* key,
-                      const mkldnn::memory::data_type& data_type) {
+                      const dnnl::memory::data_type& data_type) {
  key->append(std::to_string(static_cast<int>(data_type)));
 }

 template <>
-inline void AppendKey(std::string* key, const mkldnn::algorithm& algorithm) {
+inline void AppendKey(std::string* key, const dnnl::algorithm& algorithm) {
  key->append(std::to_string(static_cast<int>(algorithm)));
 }

 template <>
 inline void AppendKey(std::string* key,
-                      const mkldnn::normalization_flags& flags) {
+                      const dnnl::normalization_flags& flags) {
  key->append(std::to_string(static_cast<int>(flags)));
 }


--- a/paddle/fluid/platform/mkldnn_reuse.h
+++ b/paddle/fluid/platform/mkldnn_reuse.h
@@ -33,14 +33,14 @@ namespace platform {
 using framework::DataLayout;
 using framework::Tensor;
 using user_function = std::function<std::shared_ptr<float>(const float*)>;
-using memory = mkldnn::memory;
+using memory = dnnl::memory;

 template <typename T, typename TForward,
          typename TBackward = mkldnn_dummy_primitive,
          typename TBackward_params = mkldnn_dummy_primitive>
 class MKLDNNHandlerNoCachingT {
 public:
-  MKLDNNHandlerNoCachingT(mkldnn::engine engine, platform::Place cpu_place)
+  MKLDNNHandlerNoCachingT(dnnl::engine engine, platform::Place cpu_place)
      : engine_(engine), place_(cpu_place), fwd_pd_(nullptr), bwd_pd_(nullptr) {
    platform::MKLDNNDeviceContext::tls().log_lib_version();
  }
@@ -60,7 +60,7 @@ class MKLDNNHandlerNoCachingT {
    return std::make_shared<TBackward_params>(*bwd_w_pd_);
  }

-  std::shared_ptr<mkldnn::memory> AcquireSrcMemory(
+  std::shared_ptr<dnnl::memory> AcquireSrcMemory(
      const framework::Tensor* input) {
    const T* input_data = input->data<T>();
    return this->AcquireMemoryFromPrimitive(fwd_pd_->src_desc(),
@@ -68,33 +68,33 @@ class MKLDNNHandlerNoCachingT {
  }

  template <typename T_out = T>
-  std::shared_ptr<mkldnn::memory> AcquireDstMemory(framework::Tensor* output) {
+  std::shared_ptr<dnnl::memory> AcquireDstMemory(framework::Tensor* output) {
    T_out* ptr =
        output->mutable_data<T_out>(place_, fwd_pd_->dst_desc().get_size());
    return this->AcquireMemoryFromPrimitive(fwd_pd_->dst_desc(), ptr);
  }

  template <typename T_out = T>
-  std::shared_ptr<mkldnn::memory> AcquireDstMemory(void) {
+  std::shared_ptr<dnnl::memory> AcquireDstMemory(void) {
    return this->AcquireMemoryFromPrimitive(fwd_pd_->dst_desc());
  }

  template <typename T_out = T>
-  std::shared_ptr<mkldnn::memory> AcquireDstMemory(
+  std::shared_ptr<dnnl::memory> AcquireDstMemory(
      const framework::Tensor* output) {
    const T_out* output_data = output->data<T_out>();
    return this->AcquireMemoryFromPrimitive(bwd_pd_->dst_desc(),
                                            to_void_cast<T_out>(output_data));
  }

-  std::shared_ptr<mkldnn::memory> AcquireDiffDstMemory(
+  std::shared_ptr<dnnl::memory> AcquireDiffDstMemory(
      const framework::Tensor* diffdst) {
    const T* ptr = diffdst->data<T>();
    return this->AcquireMemoryFromPrimitive(bwd_pd_->diff_dst_desc(),
                                            to_void_cast<T>(ptr));
  }

-  std::shared_ptr<mkldnn::memory> AcquireDiffSrcMemory(
+  std::shared_ptr<dnnl::memory> AcquireDiffSrcMemory(
      framework::Tensor* diffsrc) {
    T* ptr =
        diffsrc->mutable_data<T>(place_, bwd_pd_->diff_src_desc().get_size());
@@ -102,7 +102,7 @@ class MKLDNNHandlerNoCachingT {
  }

  // Buffer of given Tensor is used for oneDNN computation
-  std::shared_ptr<mkldnn::memory> AcquireDiffWeightsMemory(
+  std::shared_ptr<dnnl::memory> AcquireDiffWeightsMemory(
      framework::Tensor* diff_weights) {
    PADDLE_ENFORCE_NOT_NULL(
        bwd_w_pd_,
@@ -115,7 +115,7 @@ class MKLDNNHandlerNoCachingT {
  }

  // Buffer is allocated by oneDNN to store computation results
-  std::shared_ptr<mkldnn::memory> AcquireDiffWeightsMemory(void) {
+  std::shared_ptr<dnnl::memory> AcquireDiffWeightsMemory(void) {
    PADDLE_ENFORCE_NOT_NULL(
        bwd_w_pd_,
        platform::errors::Unavailable(
@@ -179,37 +179,36 @@ class MKLDNNHandlerNoCachingT {
        bwd_desc, engine_, *fwd_pd_);
  }

-  std::shared_ptr<mkldnn::memory> AcquireMemoryFromPrimitive(
-      mkldnn::memory::desc md, void* ptr) {
-    return std::make_shared<mkldnn::memory>(md, engine_, ptr);
+  std::shared_ptr<dnnl::memory> AcquireMemoryFromPrimitive(
+      dnnl::memory::desc md, void* ptr) {
+    return std::make_shared<dnnl::memory>(md, engine_, ptr);
  }

-  std::shared_ptr<mkldnn::memory> AcquireMemoryFromPrimitive(
-      mkldnn::memory::desc md) {
-    return std::make_shared<mkldnn::memory>(md, engine_);
+  std::shared_ptr<dnnl::memory> AcquireMemoryFromPrimitive(
+      dnnl::memory::desc md) {
+    return std::make_shared<dnnl::memory>(md, engine_);
  }

-  void AcquireReorder(const std::shared_ptr<mkldnn::memory>& user_memory_p,
-                      const std::shared_ptr<mkldnn::memory>& target_memory_p) {
+  void AcquireReorder(const std::shared_ptr<dnnl::memory>& user_memory_p,
+                      const std::shared_ptr<dnnl::memory>& target_memory_p) {
    auto reorder_p =
-        std::make_shared<mkldnn::reorder>(*user_memory_p, *target_memory_p);
+        std::make_shared<dnnl::reorder>(*user_memory_p, *target_memory_p);

    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();

    platform::RecordEvent record_reorder("int_reorder",
                                         platform::EventRole::kUniqueOp);
-    reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
-                                 {MKLDNN_ARG_TO, *target_memory_p}});
+    reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
+                                 {DNNL_ARG_TO, *target_memory_p}});
    astream.wait();
  }

  template <typename F = T>
-  std::shared_ptr<mkldnn::memory> AcquireMemoryWithReorder(
-      const mkldnn::memory::desc& user_md,
-      const mkldnn::memory::desc& target_md, void* ptr,
-      bool is_persistent = false,
+  std::shared_ptr<dnnl::memory> AcquireMemoryWithReorder(
+      const dnnl::memory::desc& user_md, const dnnl::memory::desc& target_md,
+      void* ptr, bool is_persistent = false,
      std::function<std::shared_ptr<F>(const F*)> custom_reorder_func = {}) {
-    std::shared_ptr<mkldnn::memory> target_memory_p;
+    std::shared_ptr<dnnl::memory> target_memory_p;
    if (custom_reorder_func) {
      auto reordered_data =
          custom_reorder_func(reinterpret_cast<const F*>(ptr));
@@ -217,15 +216,15 @@ class MKLDNNHandlerNoCachingT {
    }
    auto user_memory_p = std::make_shared<dnnl::memory>(user_md, engine_, ptr);
    if (user_md != target_md) {
-      target_memory_p = std::make_shared<mkldnn::memory>(target_md, engine_);
+      target_memory_p = std::make_shared<dnnl::memory>(target_md, engine_);
      auto reorder_p =
          std::make_shared<dnnl::reorder>(*user_memory_p, *target_memory_p);

      auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
      platform::RecordEvent record_reorder("int_reorder",
                                           platform::EventRole::kUniqueOp);
-      reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
-                                   {MKLDNN_ARG_TO, *target_memory_p}});
+      reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
+                                   {DNNL_ARG_TO, *target_memory_p}});
      astream.wait();
    } else {
      target_memory_p = user_memory_p;
@@ -233,7 +232,7 @@ class MKLDNNHandlerNoCachingT {
    return target_memory_p;
  }

-  mkldnn::engine engine_;
+  dnnl::engine engine_;
  platform::Place place_;
  std::shared_ptr<typename TForward::primitive_desc> fwd_pd_;
  std::shared_ptr<typename TBackward::primitive_desc> bwd_pd_;
@@ -245,7 +244,7 @@ template <typename T, typename TForward,
          typename TBackward_params = mkldnn_dummy_primitive>
 class MKLDNNHandlerT {
 public:
-  MKLDNNHandlerT(const MKLDNNDeviceContext& dev_ctx, mkldnn::engine engine,
+  MKLDNNHandlerT(const MKLDNNDeviceContext& dev_ctx, dnnl::engine engine,
                 platform::Place cpu_place, const std::string& base_key)
      : dev_ctx_(dev_ctx),
        engine_(engine),
@@ -294,7 +293,7 @@ class MKLDNNHandlerT {
    return backward_p;
  }

-  std::shared_ptr<mkldnn::memory> AcquireSrcMemory(
+  std::shared_ptr<dnnl::memory> AcquireSrcMemory(
      const framework::Tensor* input) {
    const T* input_data = input->data<T>();
    return this->AcquireMemoryFromPrimitive(
@@ -302,7 +301,7 @@ class MKLDNNHandlerT {
  }

  template <typename T_out = T>
-  std::shared_ptr<mkldnn::memory> AcquireDstMemory(framework::Tensor* output) {
+  std::shared_ptr<dnnl::memory> AcquireDstMemory(framework::Tensor* output) {
    T_out* ptr =
        output->mutable_data<T_out>(place_, fwd_pd_->dst_desc().get_size());
    return this->AcquireMemoryFromPrimitive(fwd_pd_->dst_desc(), ptr,
@@ -310,12 +309,12 @@ class MKLDNNHandlerT {
  }

  template <typename T_out = T>
-  std::shared_ptr<mkldnn::memory> AcquireDstMemory(void) {
+  std::shared_ptr<dnnl::memory> AcquireDstMemory(void) {
    return this->AcquireMemoryFromPrimitive(fwd_pd_->dst_desc(), "@dstt_mem_p");
  }

  template <typename T_out = T>
-  std::shared_ptr<mkldnn::memory> AcquireDstMemory(
+  std::shared_ptr<dnnl::memory> AcquireDstMemory(
      const framework::Tensor* output) {
    const T_out* output_data = output->data<T_out>();
    return this->AcquireMemoryFromPrimitive(bwd_pd_->dst_desc(),
@@ -323,14 +322,14 @@ class MKLDNNHandlerT {
                                            "@bwd-dst_mem_p");
  }

-  std::shared_ptr<mkldnn::memory> AcquireDiffDstMemory(
+  std::shared_ptr<dnnl::memory> AcquireDiffDstMemory(
      const framework::Tensor* diffdst) {
    const T* ptr = diffdst->data<T>();
    return this->AcquireMemoryFromPrimitive(
        bwd_pd_->diff_dst_desc(), to_void_cast<T>(ptr), "@diff_dst_mem_p");
  }

-  std::shared_ptr<mkldnn::memory> AcquireDiffSrcMemory(
+  std::shared_ptr<dnnl::memory> AcquireDiffSrcMemory(
      framework::Tensor* diffsrc) {
    T* ptr =
        diffsrc->mutable_data<T>(place_, bwd_pd_->diff_src_desc().get_size());
@@ -339,7 +338,7 @@ class MKLDNNHandlerT {
  }

  // Buffer of given Tensor is used for oneDNN computation
-  std::shared_ptr<mkldnn::memory> AcquireDiffWeightsMemory(
+  std::shared_ptr<dnnl::memory> AcquireDiffWeightsMemory(
      framework::Tensor* diff_weights) {
    PADDLE_ENFORCE_NOT_NULL(
        bwd_w_pd_,
@@ -352,7 +351,7 @@ class MKLDNNHandlerT {
  }

  // Buffer is allocated by oneDNN to store computation results
-  std::shared_ptr<mkldnn::memory> AcquireDiffWeightsMemory(void) {
+  std::shared_ptr<dnnl::memory> AcquireDiffWeightsMemory(void) {
    PADDLE_ENFORCE_NOT_NULL(
        bwd_w_pd_,
        platform::errors::Unavailable(
@@ -467,19 +466,19 @@ class MKLDNNHandlerT {
    }
  }

-  std::shared_ptr<mkldnn::memory> AcquireMemoryFromPrimitive(
+  std::shared_ptr<dnnl::memory> AcquireMemoryFromPrimitive(
      const std::string& suffix) {
-    return std::static_pointer_cast<mkldnn::memory>(
+    return std::static_pointer_cast<dnnl::memory>(
        dev_ctx_.GetBlob(key_ + suffix));
  }

-  std::shared_ptr<mkldnn::memory> AcquireMemoryFromPrimitive(
-      mkldnn::memory::desc md, void* ptr, const std::string& suffix) {
+  std::shared_ptr<dnnl::memory> AcquireMemoryFromPrimitive(
+      dnnl::memory::desc md, void* ptr, const std::string& suffix) {
    const auto local_key = key_ + suffix;
    auto mem_p =
-        std::static_pointer_cast<mkldnn::memory>(dev_ctx_.GetBlob(local_key));
+        std::static_pointer_cast<dnnl::memory>(dev_ctx_.GetBlob(local_key));
    if (mem_p == nullptr) {
-      mem_p = std::make_shared<mkldnn::memory>(md, engine_, ptr);
+      mem_p = std::make_shared<dnnl::memory>(md, engine_, ptr);
      dev_ctx_.SetBlob(local_key, mem_p);
    } else {
      mem_p->set_data_handle(ptr);
@@ -487,37 +486,36 @@ class MKLDNNHandlerT {
    return mem_p;
  }

-  std::shared_ptr<mkldnn::memory> AcquireMemoryFromPrimitive(
-      mkldnn::memory::desc md, const std::string& suffix) {
+  std::shared_ptr<dnnl::memory> AcquireMemoryFromPrimitive(
+      dnnl::memory::desc md, const std::string& suffix) {
    const auto local_key = key_ + suffix;
    auto mem_p =
-        std::static_pointer_cast<mkldnn::memory>(dev_ctx_.GetBlob(local_key));
+        std::static_pointer_cast<dnnl::memory>(dev_ctx_.GetBlob(local_key));
    if (mem_p == nullptr) {
-      mem_p = std::make_shared<mkldnn::memory>(md, engine_);
+      mem_p = std::make_shared<dnnl::memory>(md, engine_);
      dev_ctx_.SetBlob(local_key, mem_p);
    }
    return mem_p;
  }

-  void AcquireReorder(const std::shared_ptr<mkldnn::memory>& user_memory_p,
-                      const std::shared_ptr<mkldnn::memory>& target_memory_p) {
+  void AcquireReorder(const std::shared_ptr<dnnl::memory>& user_memory_p,
+                      const std::shared_ptr<dnnl::memory>& target_memory_p) {
    auto reorder_p =
-        std::make_shared<mkldnn::reorder>(*user_memory_p, *target_memory_p);
+        std::make_shared<dnnl::reorder>(*user_memory_p, *target_memory_p);

    auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();

    platform::RecordEvent record_reorder("int_reorder",
                                         platform::EventRole::kUniqueOp);
-    reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
-                                 {MKLDNN_ARG_TO, *target_memory_p}});
+    reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
+                                 {DNNL_ARG_TO, *target_memory_p}});
    astream.wait();
  }

  template <typename F = T>
-  std::shared_ptr<mkldnn::memory> AcquireMemoryWithReorder(
-      const mkldnn::memory::desc& user_md,
-      const mkldnn::memory::desc& target_md, void* ptr,
-      const std::string& suffix, bool is_persistent = false,
+  std::shared_ptr<dnnl::memory> AcquireMemoryWithReorder(
+      const dnnl::memory::desc& user_md, const dnnl::memory::desc& target_md,
+      void* ptr, const std::string& suffix, bool is_persistent = false,
      std::function<std::shared_ptr<F>(const F*)> custom_reorder_func = {},
      const std::vector<float>& scale_data = {1.0f}, int mask = 0) {
    const auto target_key = key_ + suffix + "_target";
@@ -537,7 +535,7 @@ class MKLDNNHandlerT {
      auto user_memory_p =
          std::make_shared<dnnl::memory>(user_md, engine_, ptr);
      if (user_md != target_md) {
-        target_memory_p = std::make_shared<mkldnn::memory>(target_md, engine_);
+        target_memory_p = std::make_shared<dnnl::memory>(target_md, engine_);
        dnnl::reorder::primitive_desc reorder_pdesc;
        if (is_int8<T>()) {
          dnnl::primitive_attr attr;
@@ -554,8 +552,8 @@ class MKLDNNHandlerT {
        auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
        platform::RecordEvent record_reorder("int_reorder",
                                             platform::EventRole::kUniqueOp);
-        reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
-                                     {MKLDNN_ARG_TO, *target_memory_p}});
+        reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
+                                     {DNNL_ARG_TO, *target_memory_p}});
        astream.wait();
      } else {
        target_memory_p = user_memory_p;
@@ -571,27 +569,26 @@ class MKLDNNHandlerT {

      // TODO(jczaja): Here we detect if reorder is cached it means it is needed
      // need to change this to get rid of keys
-      auto reorder_p = std::static_pointer_cast<mkldnn::reorder>(
+      auto reorder_p = std::static_pointer_cast<dnnl::reorder>(
          dev_ctx_.GetBlob(key_reorder_p));
      if (reorder_p != nullptr) {
        platform::RecordEvent record_reorder("int_reorder",
                                             platform::EventRole::kUniqueOp);
-        reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
-                                     {MKLDNN_ARG_TO, *target_memory_p}});
+        reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
+                                     {DNNL_ARG_TO, *target_memory_p}});
        astream.wait();
      }
    }
    return target_memory_p;
  }

-  std::shared_ptr<mkldnn::memory> AcquireMemory(const std::string& suffix) {
+  std::shared_ptr<dnnl::memory> AcquireMemory(const std::string& suffix) {
    const auto local_key = key_ + suffix;
-    return std::static_pointer_cast<mkldnn::memory>(
-        dev_ctx_.GetBlob(local_key));
+    return std::static_pointer_cast<dnnl::memory>(dev_ctx_.GetBlob(local_key));
  }

  const MKLDNNDeviceContext& dev_ctx_;
-  mkldnn::engine engine_;
+  dnnl::engine engine_;
  platform::Place place_;
  std::string key_common_;
  std::string key_;
@@ -605,7 +602,7 @@ class BinaryMKLDNNHandler
    : public platform::MKLDNNHandlerNoCachingT<T, dnnl::binary> {
 public:
  BinaryMKLDNNHandler(const dnnl::algorithm algo, const int axis,
-                      const mkldnn::engine engine, platform::Place cpu_place,
+                      const dnnl::engine engine, platform::Place cpu_place,
                      const Tensor* x, const Tensor* y, Tensor* z,
                      float scale_x, float scale_y, float scale_z,
                      const dnnl::post_ops& post_ops = dnnl::post_ops())
@@ -662,7 +659,7 @@ class BinaryMKLDNNHandler
    this->AcquireForwardPrimitiveDescriptor(attributes, algo, src0_md, src1_md,
                                            dst_md);
  }
-  std::shared_ptr<mkldnn::memory> AcquireSecondSrcMemory(
+  std::shared_ptr<dnnl::memory> AcquireSecondSrcMemory(
      const framework::Tensor* input) {
    const T* input_data = input->data<T>();
    return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src1_desc(),
@@ -707,7 +704,7 @@ class BroadcastDataMKLDNNHandler
    : public platform::MKLDNNHandlerNoCachingT<T, dnnl::binary> {
 public:
  BroadcastDataMKLDNNHandler(const dnnl::algorithm algo,
-                             const mkldnn::engine engine,
+                             const dnnl::engine engine,
                             platform::Place cpu_place, const Tensor* out,
                             const Tensor* x, float scale_x, float scale_y,
                             const std::vector<int64_t>& input_dims)
@@ -735,7 +732,7 @@ class BroadcastDataMKLDNNHandler
  }

  template <typename T_out = T>
-  std::shared_ptr<mkldnn::memory> AcquireDstMemory(framework::Tensor* output) {
+  std::shared_ptr<dnnl::memory> AcquireDstMemory(framework::Tensor* output) {
    T_out* ptr = output->mutable_data<T_out>(
        this->place_, this->fwd_pd_->dst_desc().get_size());
    memset(ptr, 0, this->fwd_pd_->dst_desc().get_size());
@@ -748,7 +745,7 @@ class ReductionMKLDNNHandler
    : public platform::MKLDNNHandlerNoCachingT<T, dnnl::reduction> {
 public:
  ReductionMKLDNNHandler(const dnnl::algorithm algo, const float p,
-                         const float eps, const mkldnn::engine engine,
+                         const float eps, const dnnl::engine engine,
                         platform::Place cpu_place, const Tensor* x,
                         const Tensor* y, std::vector<int64_t> y_tz,
                         const dnnl::primitive_attr& attr = NULL)
@@ -777,16 +774,16 @@ class ReductionMKLDNNHandler

 template <typename T>
 class ActivationMKLDNNHandler
-    : public MKLDNNHandlerNoCachingT<T, mkldnn::eltwise_forward,
-                                     mkldnn::eltwise_backward> {
+    : public MKLDNNHandlerNoCachingT<T, dnnl::eltwise_forward,
+                                     dnnl::eltwise_backward> {
 public:
-  ActivationMKLDNNHandler(mkldnn::algorithm algorithm,
+  ActivationMKLDNNHandler(dnnl::algorithm algorithm,
                          const framework::ExecutionContext& ctx,
-                          const mkldnn::engine engine, Place cpu_place,
+                          const dnnl::engine engine, Place cpu_place,
                          const framework::Tensor* in_x)
-      : platform::MKLDNNHandlerNoCachingT<T, mkldnn::eltwise_forward,
-                                          mkldnn::eltwise_backward>(engine,
-                                                                    cpu_place) {
+      : platform::MKLDNNHandlerNoCachingT<T, dnnl::eltwise_forward,
+                                          dnnl::eltwise_backward>(engine,
+                                                                  cpu_place) {
    float alpha = ctx.HasAttr("alpha") ? ctx.Attr<float>("alpha") : 0;
    float beta = ctx.HasAttr("beta") ? ctx.Attr<float>("beta") : 0;

@@ -811,7 +808,7 @@ class ActivationMKLDNNHandler
                                 : ctx.Attr<float>("max");
    } else {
      // paddle uses beta but mkldnn uses alpha for swish
-      if (algorithm == mkldnn::algorithm::eltwise_swish) {
+      if (algorithm == dnnl::algorithm::eltwise_swish) {
        std::swap(alpha, beta);
      } else if (algorithm == dnnl::algorithm::eltwise_bounded_relu) {
        alpha = ctx.Attr<float>("threshold");
@@ -827,24 +824,24 @@ class ActivationMKLDNNHandler
    auto src_tz = framework::vectorize<int64_t>(in_x->dims());
    auto src_fmt = src_tz.size() == 2 ? MKLDNNMemoryFormat::nc : in_x->format();
    auto md =
-        mkldnn::memory::desc(src_tz, platform::MKLDNNGetDataType<T>(), src_fmt);
+        dnnl::memory::desc(src_tz, platform::MKLDNNGetDataType<T>(), src_fmt);

-    this->AcquireForwardPrimitiveDescriptor(mkldnn::prop_kind::forward_training,
+    this->AcquireForwardPrimitiveDescriptor(dnnl::prop_kind::forward_training,
                                            algorithm, md, alpha, beta);
  }

-  ActivationMKLDNNHandler(mkldnn::algorithm algorithm,
+  ActivationMKLDNNHandler(dnnl::algorithm algorithm,
                          const framework::ExecutionContext& ctx,
-                          const mkldnn::engine engine, Place cpu_place,
+                          const dnnl::engine engine, Place cpu_place,
                          const framework::Tensor* in_x, const Tensor* out_grad)
-      : platform::MKLDNNHandlerNoCachingT<T, mkldnn::eltwise_forward,
-                                          mkldnn::eltwise_backward>(engine,
-                                                                    cpu_place) {
+      : platform::MKLDNNHandlerNoCachingT<T, dnnl::eltwise_forward,
+                                          dnnl::eltwise_backward>(engine,
+                                                                  cpu_place) {
    float alpha = ctx.HasAttr("alpha") ? ctx.Attr<float>("alpha") : 0;
    float beta = ctx.HasAttr("beta") ? ctx.Attr<float>("beta") : 0;

    // paddle uses beta but mkldnn uses alpha for swish
-    if (algorithm == mkldnn::algorithm::eltwise_swish) {
+    if (algorithm == dnnl::algorithm::eltwise_swish) {
      std::swap(alpha, beta);
    } else if (algorithm == dnnl::algorithm::eltwise_bounded_relu) {
      alpha = ctx.Attr<float>("threshold");
@@ -870,13 +867,13 @@ class ActivationMKLDNNHandler
    auto src_md = platform::MKLDNNMemDesc(
        dims, platform::MKLDNNGetDataType<T>(), src_fmt);

-    this->AcquireForwardPrimitiveDescriptor(mkldnn::prop_kind::forward_training,
+    this->AcquireForwardPrimitiveDescriptor(dnnl::prop_kind::forward_training,
                                            algorithm, src_md, alpha, beta);
    this->AcquireBackwardPrimitiveDescriptor(algorithm, diff_dst_md, src_md,
                                             alpha, beta);
  }

-  std::shared_ptr<mkldnn::memory> AcquireBackwardSrcMemory(
+  std::shared_ptr<dnnl::memory> AcquireBackwardSrcMemory(
      const framework::Tensor* input) {
    const T* input_data = input->data<T>();
    return this->AcquireMemoryFromPrimitive(this->bwd_pd_->src_desc(),
@@ -888,7 +885,7 @@ class ReorderMKLDNNHandler {
 public:
  ReorderMKLDNNHandler(std::vector<int64_t>& dims,  // NOLINT
                       framework::proto::VarType::Type vtype,
-                       mkldnn::memory::data_type dtype, mkldnn::engine engine)
+                       dnnl::memory::data_type dtype, dnnl::engine engine)
      : dims_(dims),
        vtype_(vtype),
        vtype_dst_(vtype),
@@ -898,10 +895,9 @@ class ReorderMKLDNNHandler {

  ReorderMKLDNNHandler(std::vector<int64_t>& dims,  // NOLINT
                       framework::proto::VarType::Type vtype,
-                       mkldnn::memory::data_type dtype,
+                       dnnl::memory::data_type dtype,
                       framework::proto::VarType::Type vtype_dst,
-                       mkldnn::memory::data_type dtype_dst,
-                       mkldnn::engine engine)
+                       dnnl::memory::data_type dtype_dst, dnnl::engine engine)
      : dims_(dims),
        vtype_(vtype),
        vtype_dst_(vtype_dst),
@@ -909,56 +905,56 @@ class ReorderMKLDNNHandler {
        dtype_dst_(dtype_dst),
        engine_(engine) {}

-  std::shared_ptr<mkldnn::memory> AcquireSrcMemory(
-      const MKLDNNMemoryFormat& fmt, void* ptr) {
-    auto md = mkldnn::memory::desc(dims_, dtype_, fmt);
-    return std::make_shared<mkldnn::memory>(md, engine_, ptr);
+  std::shared_ptr<dnnl::memory> AcquireSrcMemory(const MKLDNNMemoryFormat& fmt,
+                                                 void* ptr) {
+    auto md = dnnl::memory::desc(dims_, dtype_, fmt);
+    return std::make_shared<dnnl::memory>(md, engine_, ptr);
  }

-  std::shared_ptr<mkldnn::memory> AcquireSubmemory(
+  std::shared_ptr<dnnl::memory> AcquireSubmemory(
      const std::vector<int64_t>& dims, const std::vector<int64_t>& offset,
-      const std::shared_ptr<mkldnn::memory>& mem_p) {
+      const std::shared_ptr<dnnl::memory>& mem_p) {
    auto sub_md = mem_p->get_desc().submemory_desc(dims, {offset});
-    auto sub_mem_p = std::make_shared<mkldnn::memory>(sub_md, engine_,
-                                                      mem_p->get_data_handle());
+    auto sub_mem_p = std::make_shared<dnnl::memory>(sub_md, engine_,
+                                                    mem_p->get_data_handle());
    return sub_mem_p;
  }

-  std::shared_ptr<mkldnn::memory> AcquireDstMemory(
-      framework::Tensor* output, const MKLDNNMemoryFormat& fmt,
-      platform::Place place) {
+  std::shared_ptr<dnnl::memory> AcquireDstMemory(framework::Tensor* output,
+                                                 const MKLDNNMemoryFormat& fmt,
+                                                 platform::Place place) {
    auto dst_md = platform::MKLDNNMemDesc(dims_, dtype_dst_, fmt);
    auto dst_data = output->mutable_data(place, vtype_dst_, dst_md.get_size());
-    return std::make_shared<mkldnn::memory>(dst_md, engine_, dst_data);
+    return std::make_shared<dnnl::memory>(dst_md, engine_, dst_data);
  }

-  std::shared_ptr<mkldnn::memory> AcquireDstMemory(
+  std::shared_ptr<dnnl::memory> AcquireDstMemory(
      framework::Tensor* output, const std::vector<int64_t>& dims,
      const MKLDNNMemoryFormat& fmt, platform::Place place) {
    auto dst_md = platform::MKLDNNMemDesc(dims, dtype_dst_, fmt);
    auto dst_data = output->mutable_data(place, vtype_dst_, dst_md.get_size());
-    return std::make_shared<mkldnn::memory>(dst_md, engine_, dst_data);
+    return std::make_shared<dnnl::memory>(dst_md, engine_, dst_data);
  }

-  std::shared_ptr<mkldnn::reorder> AcquireReorder(
-      std::shared_ptr<mkldnn::memory> dst_memory_p,
-      std::shared_ptr<mkldnn::memory> src_memory_p) {
-    return std::make_shared<mkldnn::reorder>(*(src_memory_p), *(dst_memory_p));
+  std::shared_ptr<dnnl::reorder> AcquireReorder(
+      std::shared_ptr<dnnl::memory> dst_memory_p,
+      std::shared_ptr<dnnl::memory> src_memory_p) {
+    return std::make_shared<dnnl::reorder>(*(src_memory_p), *(dst_memory_p));
  }

 private:
  std::vector<int64_t> dims_;
  framework::proto::VarType::Type vtype_, vtype_dst_;
-  mkldnn::memory::data_type dtype_, dtype_dst_;
-  mkldnn::engine engine_;
+  dnnl::memory::data_type dtype_, dtype_dst_;
+  dnnl::engine engine_;
 };

 template <typename T>
 static void SetDstMemoryQuantized(
    const framework::ExecutionContext& ctx, framework::Tensor* output,
-    std::vector<int64_t> dst_tz, const mkldnn::engine& engine,
-    std::shared_ptr<mkldnn::memory::desc>& dst_md,  // NOLINT
-    std::shared_ptr<mkldnn::memory>& dst_memory,    // NOLINT
+    std::vector<int64_t> dst_tz, const dnnl::engine& engine,
+    std::shared_ptr<dnnl::memory::desc>& dst_md,  // NOLINT
+    std::shared_ptr<dnnl::memory>& dst_memory,    // NOLINT
    MKLDNNMemoryFormat output_format) {
  T* output_data = output->mutable_data<T>(ctx.GetPlace());
  const size_t dst_dims = dst_tz.size();
@@ -974,9 +970,9 @@ static void SetDstMemoryQuantized(
      {dst_tz}, paddle::framework::ToMKLDNNDataType(
                    framework::DataTypeTrait<T>::DataType()),
      dst_fmt);
-  dst_md.reset(new mkldnn::memory::desc(tmp_dst_md));
+  dst_md.reset(new dnnl::memory::desc(tmp_dst_md));
  dst_memory.reset(
-      new mkldnn::memory(*dst_md, engine, to_void_cast<T>(output_data)));
+      new dnnl::memory(*dst_md, engine, to_void_cast<T>(output_data)));
 }

 }  // namespace platform