[PHI] Migrate gaussian_random kernel (#45481)

* gaussian random * mkldnn to onednn renaming * fix merge conflicts * remove fluid code * onednn renaming * change header path * change fluid import to phi

[PHI] Migrate gaussian_random kernel (#45481)
* gaussian random * mkldnn to onednn renaming * fix merge conflicts * remove fluid code * onednn renaming * change header path * change fluid import to phi
4e3d222d · Sławomir Siwek · GitHub · e1a5fb8f · e1a5fb8f · 4e3d222d
12 changed file
--- a/paddle/fluid/operators/mkldnn/gaussian_random_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/gaussian_random_mkldnn_op.cc
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <string>
-
-#include "paddle/fluid/framework/generator.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/utils.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
-
-namespace paddle {
-namespace operators {
-
-using framework::DataLayout;
-template <typename T>
-class GaussianMKLDNNKernel : public paddle::framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    float mean = context.Attr<float>("mean");
-    float std = context.Attr<float>("std");
-    auto* tensor = context.Output<framework::Tensor>("Out");
-
-    auto shape = GetShape(context);
-    tensor->Resize(shape);
-    T* data = tensor->mutable_data<T>(context.GetPlace());
-    int64_t size = tensor->numel();
-    std::normal_distribution<T> dist(mean, std);
-    unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
-    auto engine = framework::GetCPURandomEngine(seed);
-
-    for (int64_t i = 0; i < size; ++i) {
-      data[i] = dist(*engine);
-    }
-
-    dnnl::memory::desc out_mem_desc(
-        phi::vectorize(tensor->dims()),
-        framework::ToMKLDNNDataType(
-            framework::TransToProtoVarType(tensor->dtype())),
-        platform::GetPlainMKLDNNFormat(tensor->dims().size()));
-
-    tensor->set_mem_desc(out_mem_desc);
-  }
-};
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-
-REGISTER_OP_KERNEL(gaussian_random,
-                   MKLDNN,
-                   ::paddle::platform::CPUPlace,
-                   ops::GaussianMKLDNNKernel<float>);
--- a/paddle/phi/backends/onednn/onednn_helper.h
+++ b/paddle/phi/backends/onednn/onednn_helper.h
@@ -25,47 +25,75 @@
 namespace phi {
 namespace funcs {

-using MKLDNNMemoryFormat = dnnl::memory::format_tag;
-using MKLDNNDataType = dnnl::memory::data_type;
+using OneDNNMemoryFormat = dnnl::memory::format_tag;
+using OneDNNDataType = dnnl::memory::data_type;

 template <typename Type>
 void* to_void_cast(const Type* t) {
  return static_cast<void*>(const_cast<Type*>(t));
 }

-inline MKLDNNMemoryFormat MKLDNNFormatForSize(size_t dims_size,
-                                              MKLDNNMemoryFormat data_format) {
+inline OneDNNMemoryFormat OneDNNFormatForSize(size_t dims_size,
+                                              OneDNNMemoryFormat data_format) {
  if (dims_size == 1) {
-    return MKLDNNMemoryFormat::x;
+    return OneDNNMemoryFormat::x;
  } else if (dims_size == 2) {
-    return MKLDNNMemoryFormat::nc;
+    return OneDNNMemoryFormat::nc;
  } else if (dims_size == 3) {
-    if (data_format == MKLDNNMemoryFormat::nchw) {
-      return MKLDNNMemoryFormat::ncw;
-    } else if (data_format == MKLDNNMemoryFormat::nhwc) {
-      return MKLDNNMemoryFormat::nwc;
+    if (data_format == OneDNNMemoryFormat::nchw) {
+      return OneDNNMemoryFormat::ncw;
+    } else if (data_format == OneDNNMemoryFormat::nhwc) {
+      return OneDNNMemoryFormat::nwc;
    }
  } else if (dims_size == 4) {
-    if (data_format == MKLDNNMemoryFormat::goihw) {
-      return MKLDNNMemoryFormat::oihw;
+    if (data_format == OneDNNMemoryFormat::goihw) {
+      return OneDNNMemoryFormat::oihw;
    }
  } else if (dims_size == 5) {
-    if (data_format == MKLDNNMemoryFormat::goidhw) {
-      return MKLDNNMemoryFormat::oidhw;
+    if (data_format == OneDNNMemoryFormat::goidhw) {
+      return OneDNNMemoryFormat::oidhw;
    }
-    if (data_format == MKLDNNMemoryFormat::nchw) {
-      return MKLDNNMemoryFormat::ncdhw;
-    } else if (data_format == MKLDNNMemoryFormat::nhwc) {
-      return MKLDNNMemoryFormat::ndhwc;
+    if (data_format == OneDNNMemoryFormat::nchw) {
+      return OneDNNMemoryFormat::ncdhw;
+    } else if (data_format == OneDNNMemoryFormat::nhwc) {
+      return OneDNNMemoryFormat::ndhwc;
    }
  } else if (dims_size == 6) {
-    if (data_format == MKLDNNMemoryFormat::nchw) {
-      return MKLDNNMemoryFormat::abcdef;
+    if (data_format == OneDNNMemoryFormat::nchw) {
+      return OneDNNMemoryFormat::abcdef;
    }
  }
  return data_format;
 }

+inline dnnl::memory::format_tag GetPlainOneDNNFormat(int tensor_rank) {
+  switch (tensor_rank) {
+    case 1:
+      return dnnl::memory::format_tag::a;
+    case 2:
+      return dnnl::memory::format_tag::ab;
+    case 3:
+      return dnnl::memory::format_tag::abc;
+    case 4:
+      return dnnl::memory::format_tag::abcd;
+    case 5:
+      return dnnl::memory::format_tag::abcde;
+    case 6:
+      return dnnl::memory::format_tag::abcdef;
+    case 7:
+      return dnnl::memory::format_tag::abcdefg;
+    case 8:
+      return dnnl::memory::format_tag::abcdefgh;
+    case 9:
+      return dnnl::memory::format_tag::abcdefghi;
+    default:
+      PADDLE_THROW(phi::errors::Unimplemented(
+          "Paddle support tensors with rank in range <1, 9>, but received "
+          "tensor with rank: %d",
+          tensor_rank));
+  }
+}
+
 inline void MatchShapeToLayout(DenseTensor* tensor_in,
                               DataLayout from,
                               DataLayout to) {
@@ -119,14 +147,14 @@ inline void MatchShapeToLayout(DenseTensor* tensor_in,
  }
 }

-struct mkldnn_dummy_primitive {
+struct onednn_dummy_primitive {
  struct primitive_desc {};
  struct desc {};
 };

-inline dnnl::memory::desc MKLDNNMemDesc(const std::vector<int64_t>& dims,
+inline dnnl::memory::desc OneDNNMemDesc(const std::vector<int64_t>& dims,
                                        dnnl::memory::data_type data_type,
-                                        MKLDNNMemoryFormat format) {
+                                        OneDNNMemoryFormat format) {
  return dnnl::memory::desc({dims}, data_type, format);
 }


--- a/paddle/phi/backends/onednn/onednn_reuse.h
+++ b/paddle/phi/backends/onednn/onednn_reuse.h
@@ -26,6 +26,7 @@ limitations under the License. */
 #include "paddle/phi/common/data_type.h"
 #include "paddle/phi/common/place.h"
 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/kernels/funcs/data_layout_transform.h"

 namespace phi {
 namespace funcs {
@@ -38,8 +39,8 @@ using MKLDNNMemoryFormat = dnnl::memory::format_tag;

 template <typename T,
          typename TForward,
-          typename TBackward = mkldnn_dummy_primitive,
-          typename TBackward_params = mkldnn_dummy_primitive>
+          typename TBackward = onednn_dummy_primitive,
+          typename TBackward_params = onednn_dummy_primitive>
 class MKLDNNHandlerNoCachingT {
 public:
  MKLDNNHandlerNoCachingT(dnnl::engine engine, Place cpu_place)
@@ -250,12 +251,12 @@ class MKLDNNHandlerNoCachingT {
 };

 template <typename T>
-class ActivationMKLDNNHandler
+class ActivationOneDNNHandler
    : public MKLDNNHandlerNoCachingT<T,
                                     dnnl::eltwise_forward,
                                     dnnl::eltwise_backward> {
 public:
-  ActivationMKLDNNHandler(dnnl::algorithm algorithm,
+  ActivationOneDNNHandler(dnnl::algorithm algorithm,
                          float alpha,
                          float beta,
                          const dnnl::engine engine,
@@ -271,7 +272,7 @@ class ActivationMKLDNNHandler
                                            beta);
  }

-  ActivationMKLDNNHandler(dnnl::algorithm algorithm,
+  ActivationOneDNNHandler(dnnl::algorithm algorithm,
                          float alpha,
                          float beta,
                          const dnnl::engine engine,
@@ -298,9 +299,9 @@ class ActivationMKLDNNHandler
  }
 };

-class ReorderMKLDNNHandler {
+class ReorderOneDNNHandler {
 public:
-  ReorderMKLDNNHandler(std::vector<int64_t>& dims,  // NOLINT
+  ReorderOneDNNHandler(std::vector<int64_t>& dims,  // NOLINT
                       DataType ptype,
                       dnnl::memory::data_type dtype,
                       dnnl::engine engine)
@@ -311,7 +312,7 @@ class ReorderMKLDNNHandler {
        dtype_dst_(dtype),
        engine_(engine) {}

-  ReorderMKLDNNHandler(std::vector<int64_t>& dims,  // NOLINT
+  ReorderOneDNNHandler(std::vector<int64_t>& dims,  // NOLINT
                       DataType ptype,
                       dnnl::memory::data_type dtype,
                       DataType ptype_dst,
@@ -348,7 +349,7 @@ class ReorderMKLDNNHandler {
  std::shared_ptr<dnnl::memory> AcquireDstMemory(DenseTensor* output,
                                                 const MKLDNNMemoryFormat& fmt,
                                                 Place place) {
-    auto dst_md = MKLDNNMemDesc(dims_, dtype_dst_, fmt);
+    auto dst_md = OneDNNMemDesc(dims_, dtype_dst_, fmt);
    auto dst_data = output->mutable_data(place, ptype_dst_, dst_md.get_size());
    return std::make_shared<dnnl::memory>(dst_md, engine_, dst_data);
  }
@@ -373,7 +374,7 @@ class ReorderMKLDNNHandler {
      const std::vector<int64_t>& dims,
      const MKLDNNMemoryFormat& fmt,
      Place place) {
-    auto dst_md = MKLDNNMemDesc(dims, dtype_dst_, fmt);
+    auto dst_md = OneDNNMemDesc(dims, dtype_dst_, fmt);
    auto dst_data = output->mutable_data(place, ptype_dst_, dst_md.get_size());
    return std::make_shared<dnnl::memory>(dst_md, engine_, dst_data);
  }

--- a/paddle/phi/core/compat/convert_utils.cc
+++ b/paddle/phi/core/compat/convert_utils.cc
@@ -123,7 +123,7 @@ const std::string& TransToFluidOpName(const std::string& phi_kernel_name) {
 }

 #ifdef PADDLE_WITH_MKLDNN
-dnnl::memory::data_type TransToMKLDNNDataType(
+dnnl::memory::data_type TransToOneDNNDataType(
    const paddle::experimental::DataType& dtype) {
  switch (dtype) {
    case DataType::FLOAT32:

--- a/paddle/phi/core/compat/convert_utils.h
+++ b/paddle/phi/core/compat/convert_utils.h
@@ -33,7 +33,7 @@ Backend TransToPhiBackend(const phi::Place& place);
 phi::Place TransToPhiPlace(const Backend& backend, bool set_device_id = true);

 #ifdef PADDLE_WITH_MKLDNN
-dnnl::memory::data_type TransToMKLDNNDataType(
+dnnl::memory::data_type TransToOneDNNDataType(
    const paddle::experimental::DataType& dtype);
 #endif


--- a/paddle/phi/core/dense_tensor_impl.cc
+++ b/paddle/phi/core/dense_tensor_impl.cc
@@ -353,7 +353,7 @@ std::vector<DenseTensor> DenseTensor::Chunk(int64_t chunks,
 dnnl::memory::desc DenseTensor::mem_desc() const {
  return mem_desc_ ? mem_desc_
                   : dnnl::memory::desc(phi::vectorize(meta_.dims),
-                                        phi::TransToMKLDNNDataType(meta_.dtype),
+                                        phi::TransToOneDNNDataType(meta_.dtype),
                                        format_);
 }


--- a/paddle/phi/kernels/funcs/data_layout_transform.cc
+++ b/paddle/phi/kernels/funcs/data_layout_transform.cc
@@ -52,7 +52,7 @@ void* GetDataFromTensor(const DenseTensor& tensor,
  }
 }

-void innerTransDataLayoutFromMKLDNN(DataLayout in_layout,
+void innerTransDataLayoutFromOneDNN(DataLayout in_layout,
                                    DataLayout out_layout,
                                    const DenseTensor& in,
                                    DenseTensor* out,
@@ -68,15 +68,15 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout,
  auto in_tz = vectorize<int64_t>(in.dims());
  auto out_tz = in_tz;

-  auto in_type = ToMKLDNNDataType(in.dtype());
+  auto in_type = ToOneDNNDataType(in.dtype());
  PADDLE_ENFORCE_NE(
      in_type,
-      MKLDNNDataType::undef,
+      OneDNNDataType::undef,
      errors::InvalidArgument("Input tensor type (%s) is not supported.",
                              in.dtype()));

  auto out_format =
-      MKLDNNFormatForSize(in_tz.size(), ToMKLDNNFormat(out_layout));
+      OneDNNFormatForSize(in_tz.size(), ToOneDNNFormat(out_layout));
  dnnl::memory::desc out_mem_desc(out_tz, in_type, out_format);

  // output tensor has the same dims as input. Reorder don't change dims
@@ -86,7 +86,7 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout,
  if ((in.mem_desc() != out->mem_desc()) || always_copy) {
    void* in_data = GetDataFromTensor(in, in_type);

-    ReorderMKLDNNHandler handler(in_tz, in.dtype(), in_type, cpu_engine);
+    ReorderOneDNNHandler handler(in_tz, in.dtype(), in_type, cpu_engine);

    auto reorder_src_memory_p =
        handler.AcquireSrcMemory(in.mem_desc(), in_data);
@@ -114,7 +114,7 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout,
  VLOG(10) << "out->layout: " << out->layout() << " in->dims: " << in.dims()
           << " out->dims: " << out->dims();
  // reset format since the out tensor will be feed to non-MKLDNN OPkernel
-  out->set_format(MKLDNNMemoryFormat::undef);
+  out->set_format(OneDNNMemoryFormat::undef);
 }

 #endif

--- a/paddle/phi/kernels/funcs/data_layout_transform.h
+++ b/paddle/phi/kernels/funcs/data_layout_transform.h
@@ -28,19 +28,19 @@ namespace funcs {

 #ifdef PADDLE_WITH_MKLDNN

-using MKLDNNDataType = dnnl::memory::data_type;
-using MKLDNNMemoryFormat = dnnl::memory::format_tag;
+using OneDNNDataType = dnnl::memory::data_type;
+using OneDNNMemoryFormat = dnnl::memory::format_tag;

-inline MKLDNNMemoryFormat ToMKLDNNFormat(const DataLayout& layout) {
+inline OneDNNMemoryFormat ToOneDNNFormat(const DataLayout& layout) {
  switch (layout) {
    case DataLayout::NHWC:
-      return MKLDNNMemoryFormat::nhwc;
+      return OneDNNMemoryFormat::nhwc;
    case DataLayout::NCHW:
-      return MKLDNNMemoryFormat::nchw;
+      return OneDNNMemoryFormat::nchw;
    case DataLayout::NCDHW:
-      return MKLDNNMemoryFormat::ncdhw;
+      return OneDNNMemoryFormat::ncdhw;
    case DataLayout::NDHWC:
-      return MKLDNNMemoryFormat::ndhwc;
+      return OneDNNMemoryFormat::ndhwc;
    default:
      PADDLE_THROW(errors::InvalidArgument(
          "Fail to convert layout %s to MKLDNN format.",
@@ -49,25 +49,25 @@ inline MKLDNNMemoryFormat ToMKLDNNFormat(const DataLayout& layout) {
 }

 // Caution: proto::VarType::Type -> phi::DataType after transfer
-inline MKLDNNDataType ToMKLDNNDataType(DataType type) {
-  static std::unordered_map<DataType, MKLDNNDataType> dict{
-      {DataType::FLOAT32, MKLDNNDataType::f32},
-      {DataType::INT8, MKLDNNDataType::s8},
-      {DataType::UINT8, MKLDNNDataType::u8},
-      {DataType::INT32, MKLDNNDataType::s32},
-      {DataType::BFLOAT16, MKLDNNDataType::bf16}};
+inline OneDNNDataType ToOneDNNDataType(DataType type) {
+  static std::unordered_map<DataType, OneDNNDataType> dict{
+      {DataType::FLOAT32, OneDNNDataType::f32},
+      {DataType::INT8, OneDNNDataType::s8},
+      {DataType::UINT8, OneDNNDataType::u8},
+      {DataType::INT32, OneDNNDataType::s32},
+      {DataType::BFLOAT16, OneDNNDataType::bf16}};
  auto iter = dict.find(type);
  if (iter != dict.end()) return iter->second;
-  return MKLDNNDataType::undef;
+  return OneDNNDataType::undef;
 }

-void innerTransDataLayoutFromMKLDNN(DataLayout in_layout,
+void innerTransDataLayoutFromOneDNN(DataLayout in_layout,
                                    DataLayout out_layout,
                                    const DenseTensor& in,
                                    DenseTensor* out,
                                    Place place,
                                    bool always_copy = false);
-void* GetDataFromTensor(const DenseTensor& tensor, MKLDNNDataType type);
+void* GetDataFromTensor(const DenseTensor& tensor, OneDNNDataType type);

 #endif


--- a/paddle/phi/kernels/onednn/activation_grad_kernel.cc
+++ b/paddle/phi/kernels/onednn/activation_grad_kernel.cc
@@ -75,10 +75,13 @@ void eltwise_grad(const OneDNNContext& dev_ctx,
                  float beta,
                  DenseTensor* dx,
                  dnnl::algorithm algorithm) {
-  const auto& mkldnn_engine = dev_ctx.GetEngine();
-
-  funcs::ActivationMKLDNNHandler<T> handler(
-      algorithm, alpha, beta, mkldnn_engine, dev_ctx.GetPlace(), &x, &dout);
+  funcs::ActivationOneDNNHandler<T> handler(algorithm,
+                                            alpha,
+                                            beta,
+                                            dev_ctx.GetEngine(),
+                                            dev_ctx.GetPlace(),
+                                            &x,
+                                            &dout);

  auto src_memory_p = handler.AcquireBackwardSrcMemory(&x);
  auto diff_dst_memory_p = handler.AcquireDiffDstMemory(&dout);
@@ -103,10 +106,13 @@ void eltwise_grad_use_out(const OneDNNContext& dev_ctx,
                          float beta,
                          DenseTensor* dx,
                          dnnl::algorithm algorithm) {
-  const auto& mkldnn_engine = dev_ctx.GetEngine();
-
-  funcs::ActivationMKLDNNHandler<T> handler(
-      algorithm, alpha, beta, mkldnn_engine, dev_ctx.GetPlace(), &out, &dout);
+  funcs::ActivationOneDNNHandler<T> handler(algorithm,
+                                            alpha,
+                                            beta,
+                                            dev_ctx.GetEngine(),
+                                            dev_ctx.GetPlace(),
+                                            &out,
+                                            &dout);

  auto dst_memory_p = handler.AcquireBackwardSrcMemory(&out);
  auto diff_dst_memory_p = handler.AcquireDiffDstMemory(&dout);
@@ -124,7 +130,7 @@ void eltwise_grad_use_out(const OneDNNContext& dev_ctx,
 }

 template <typename T, dnnl::algorithm algorithm>
-struct MKLDNNActivationGradFunc : public funcs::BaseActivationFunctor<T> {
+struct OneDNNActivationGradFunc : public funcs::BaseActivationFunctor<T> {
  void operator()(const OneDNNContext& dev_ctx,
                  const DenseTensor& x,
                  const DenseTensor& dout,
@@ -136,7 +142,7 @@ struct MKLDNNActivationGradFunc : public funcs::BaseActivationFunctor<T> {
 };

 template <typename T, dnnl::algorithm algorithm>
-struct MKLDNNActivationGradUseOutFunc : public funcs::BaseActivationFunctor<T> {
+struct OneDNNActivationGradUseOutFunc : public funcs::BaseActivationFunctor<T> {
  void operator()(const OneDNNContext& dev_ctx,
                  const DenseTensor& out,
                  const DenseTensor& dout,
@@ -148,66 +154,66 @@ struct MKLDNNActivationGradUseOutFunc : public funcs::BaseActivationFunctor<T> {
 };

 template <typename T>
-using AbsMKLDNNGradFunctor =
-    MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_abs>;
+using AbsOneDNNGradFunctor =
+    OneDNNActivationGradFunc<T, dnnl::algorithm::eltwise_abs>;

 template <typename T>
-using ReluMKLDNNGradFunctor =
-    MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_relu>;
+using ReluOneDNNGradFunctor =
+    OneDNNActivationGradFunc<T, dnnl::algorithm::eltwise_relu>;

 template <typename T>
-using SwishMKLDNNGradFunctor =
-    MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_swish>;
+using SwishOneDNNGradFunctor =
+    OneDNNActivationGradFunc<T, dnnl::algorithm::eltwise_swish>;

 template <typename T>
-using HardSwishMKLDNNGradFunctor =
-    MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_hardswish>;
+using HardSwishOneDNNGradFunctor =
+    OneDNNActivationGradFunc<T, dnnl::algorithm::eltwise_hardswish>;

 template <typename T>
-using MishMKLDNNGradFunctor =
-    MKLDNNActivationGradFunc<T, dnnl::algorithm::eltwise_mish>;
+using MishOneDNNGradFunctor =
+    OneDNNActivationGradFunc<T, dnnl::algorithm::eltwise_mish>;

 template <typename T>
-using SigmoidMKLDNNGradUseOutFunctor = MKLDNNActivationGradUseOutFunc<
+using SigmoidOneDNNGradUseOutFunctor = OneDNNActivationGradUseOutFunc<
    T,
    dnnl::algorithm::eltwise_logistic_use_dst_for_bwd>;

 template <typename T>
-using TanhMKLDNNGradUseOutFunctor = MKLDNNActivationGradUseOutFunc<
+using TanhOneDNNGradUseOutFunctor = OneDNNActivationGradUseOutFunc<
    T,
    dnnl::algorithm::eltwise_tanh_use_dst_for_bwd>;

 template <typename T>
-using SqrtMKLDNNGradUseOutFunctor = MKLDNNActivationGradUseOutFunc<
+using SqrtOneDNNGradUseOutFunctor = OneDNNActivationGradUseOutFunc<
    T,
    dnnl::algorithm::eltwise_sqrt_use_dst_for_bwd>;

 template <typename T>
-using EluMKLDNNGradUseOutFunctor = MKLDNNActivationGradUseOutFunc<
+using EluOneDNNGradUseOutFunctor = OneDNNActivationGradUseOutFunc<
    T,
    dnnl::algorithm::eltwise_elu_use_dst_for_bwd>;

 template <typename T>
-using ExpMKLDNNGradUseOutFunctor = MKLDNNActivationGradUseOutFunc<
+using ExpOneDNNGradUseOutFunctor = OneDNNActivationGradUseOutFunc<
    T,
    dnnl::algorithm::eltwise_exp_use_dst_for_bwd>;

-DEFINE_ONEDNN_ACTIVATION_GRAD_KERNEL_DEPOUT(Tanh, TanhMKLDNNGradUseOutFunctor);
-DEFINE_ONEDNN_ACTIVATION_GRAD_KERNEL_DEPOUT(Sqrt, SqrtMKLDNNGradUseOutFunctor);
+DEFINE_ONEDNN_ACTIVATION_GRAD_KERNEL_DEPOUT(Tanh, TanhOneDNNGradUseOutFunctor);
+DEFINE_ONEDNN_ACTIVATION_GRAD_KERNEL_DEPOUT(Sqrt, SqrtOneDNNGradUseOutFunctor);
 DEFINE_ONEDNN_ACTIVATION_GRAD_KERNEL_DEPOUT(Sigmoid,
-                                            SigmoidMKLDNNGradUseOutFunctor);
-DEFINE_ONEDNN_ACTIVATION_GRAD_KERNEL_DEPOUT(Exp, ExpMKLDNNGradUseOutFunctor);
-DEFINE_ONEDNN_ACTIVATION_GRAD_KERNEL_DEPOUT(Abs, AbsMKLDNNGradFunctor);
-DEFINE_ONEDNN_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu, ReluMKLDNNGradFunctor);
+                                            SigmoidOneDNNGradUseOutFunctor);
+DEFINE_ONEDNN_ACTIVATION_GRAD_KERNEL_DEPOUT(Exp, ExpOneDNNGradUseOutFunctor);
+DEFINE_ONEDNN_ACTIVATION_GRAD_KERNEL_DEPOUT(Abs, AbsOneDNNGradFunctor);
+DEFINE_ONEDNN_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu, ReluOneDNNGradFunctor);

 DEFINE_ONEDNN_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu,
-                                                  ReluMKLDNNGradFunctor,
+                                                  ReluOneDNNGradFunctor,
                                                  alpha);
 DEFINE_ONEDNN_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Mish,
-                                                  MishMKLDNNGradFunctor,
+                                                  MishOneDNNGradFunctor,
                                                  threshold);
 DEFINE_ONEDNN_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(Swish,
-                                                  SwishMKLDNNGradFunctor,
+                                                  SwishOneDNNGradFunctor,
                                                  beta);
 template <typename T, typename Context>
 void HardSwishGradKernel(const Context& dev_ctx,
@@ -217,7 +223,7 @@ void HardSwishGradKernel(const Context& dev_ctx,
                         float scale,
                         float offset,
                         DenseTensor* dx) {
-  HardSwishMKLDNNGradFunctor<T> functor;
+  HardSwishOneDNNGradFunctor<T> functor;
  functor(dev_ctx, x, dout, threshold, 0, dx);
 }

@@ -228,7 +234,7 @@ void EluGradKernel(const Context& dev_ctx,
                   const DenseTensor& dout,
                   float alpha,
                   DenseTensor* dx) {
-  EluMKLDNNGradUseOutFunctor<T> functor;
+  EluOneDNNGradUseOutFunctor<T> functor;
  functor(dev_ctx, out, dout, alpha, 0, dx);
 }


--- a/paddle/phi/kernels/onednn/activation_kernel.cc
+++ b/paddle/phi/kernels/onednn/activation_kernel.cc
@@ -52,12 +52,11 @@ void EltwiseForward(const OneDNNContext& dev_ctx,
                    true,
                    phi::errors::PreconditionNotMet(
                        "Operator DNNL eletwise_forward must use ONEDNNPlace"));
-  const auto& mkldnn_engine = dev_ctx.GetEngine();

  bool is_inplaced = x.IsSharedBufferWith(*out);

-  funcs::ActivationMKLDNNHandler<T> handler(
-      algorithm, alpha, beta, mkldnn_engine, dev_ctx.GetPlace(), &x);
+  funcs::ActivationOneDNNHandler<T> handler(
+      algorithm, alpha, beta, dev_ctx.GetEngine(), dev_ctx.GetPlace(), &x);

  auto src_memory_p = handler.AcquireSrcMemory(&x);
  std::shared_ptr<dnnl::memory> dst_memory_p = nullptr;
@@ -78,7 +77,7 @@ void EltwiseForward(const OneDNNContext& dev_ctx,
 }

 template <typename T, dnnl::algorithm algorithm>
-struct MKLDNNActivationFunc : public funcs::BaseActivationFunctor<T> {
+struct OneDNNActivationFunc : public funcs::BaseActivationFunctor<T> {
  void operator()(const OneDNNContext& dev_ctx,
                  const DenseTensor& x,
                  float alpha,
@@ -89,64 +88,65 @@ struct MKLDNNActivationFunc : public funcs::BaseActivationFunctor<T> {
 };

 template <typename T>
-using AbsMKLDNNFunctor = MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_abs>;
+using AbsOneDNNFunctor = OneDNNActivationFunc<T, dnnl::algorithm::eltwise_abs>;

 template <typename T>
-using ReluMKLDNNFunctor =
-    MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_relu>;
+using ReluOneDNNFunctor =
+    OneDNNActivationFunc<T, dnnl::algorithm::eltwise_relu>;

 template <typename T>
-using Relu6MKLDNNFunctor =
-    MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_bounded_relu>;
+using Relu6OneDNNFunctor =
+    OneDNNActivationFunc<T, dnnl::algorithm::eltwise_bounded_relu>;

 template <typename T>
-using SwishMKLDNNFunctor =
-    MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_swish>;
+using SwishOneDNNFunctor =
+    OneDNNActivationFunc<T, dnnl::algorithm::eltwise_swish>;

 template <typename T>
-using HardSwishMKLDNNFunctor =
-    MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_hardswish>;
+using HardSwishOneDNNFunctor =
+    OneDNNActivationFunc<T, dnnl::algorithm::eltwise_hardswish>;

 template <typename T>
-using MishMKLDNNFunctor =
-    MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_mish>;
+using MishOneDNNFunctor =
+    OneDNNActivationFunc<T, dnnl::algorithm::eltwise_mish>;

 template <typename T>
-using SigmoidMKLDNNFunctor =
-    MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_logistic>;
+using SigmoidOneDNNFunctor =
+    OneDNNActivationFunc<T, dnnl::algorithm::eltwise_logistic>;

 template <typename T>
-using TanhMKLDNNFunctor =
-    MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_tanh>;
+using TanhOneDNNFunctor =
+    OneDNNActivationFunc<T, dnnl::algorithm::eltwise_tanh>;

 template <typename T>
-using SqrtMKLDNNFunctor =
-    MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_sqrt>;
+using SqrtOneDNNFunctor =
+    OneDNNActivationFunc<T, dnnl::algorithm::eltwise_sqrt>;

 template <typename T>
-using EluMKLDNNFunctor = MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_elu>;
+using EluOneDNNFunctor = OneDNNActivationFunc<T, dnnl::algorithm::eltwise_elu>;

 template <typename T>
-using ExpMKLDNNFunctor = MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_exp>;
+using ExpOneDNNFunctor = OneDNNActivationFunc<T, dnnl::algorithm::eltwise_exp>;

 template <typename T>
-using RoundMKLDNNFunctor =
-    MKLDNNActivationFunc<T, dnnl::algorithm::eltwise_round>;
-
-DEFINE_ONEDNN_ACTIVATION_KERNEL(Abs, AbsMKLDNNFunctor)
-DEFINE_ONEDNN_ACTIVATION_KERNEL(Relu, ReluMKLDNNFunctor)
-DEFINE_ONEDNN_ACTIVATION_KERNEL(Tanh, TanhMKLDNNFunctor)
-DEFINE_ONEDNN_ACTIVATION_KERNEL(Exp, ExpMKLDNNFunctor)
-DEFINE_ONEDNN_ACTIVATION_KERNEL(Sqrt, SqrtMKLDNNFunctor)
-DEFINE_ONEDNN_ACTIVATION_KERNEL(Sigmoid, SigmoidMKLDNNFunctor)
+using RoundOneDNNFunctor =
+    OneDNNActivationFunc<T, dnnl::algorithm::eltwise_round>;
+
+DEFINE_ONEDNN_ACTIVATION_KERNEL(Abs, AbsOneDNNFunctor)
+DEFINE_ONEDNN_ACTIVATION_KERNEL(Relu, ReluOneDNNFunctor)
+DEFINE_ONEDNN_ACTIVATION_KERNEL(Tanh, TanhOneDNNFunctor)
+DEFINE_ONEDNN_ACTIVATION_KERNEL(Exp, ExpOneDNNFunctor)
+DEFINE_ONEDNN_ACTIVATION_KERNEL(Sqrt, SqrtOneDNNFunctor)
+DEFINE_ONEDNN_ACTIVATION_KERNEL(Sigmoid, SigmoidOneDNNFunctor)
+
 // round eltwise primitive doesn't support BF16, nor does it support grad
-DEFINE_ONEDNN_ACTIVATION_KERNEL(Round, RoundMKLDNNFunctor)
+DEFINE_ONEDNN_ACTIVATION_KERNEL(Round, RoundOneDNNFunctor)

-DEFINE_ONEDNN_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, ReluMKLDNNFunctor, alpha)
-DEFINE_ONEDNN_ACT_KERNEL_WITH_ONE_ATTRS(Mish, MishMKLDNNFunctor, threshold)
-DEFINE_ONEDNN_ACT_KERNEL_WITH_ONE_ATTRS(Elu, EluMKLDNNFunctor, alpha)
-DEFINE_ONEDNN_ACT_KERNEL_WITH_ONE_ATTRS(Relu6, Relu6MKLDNNFunctor, threshold)
-DEFINE_ONEDNN_ACT_KERNEL_WITH_ONE_ATTRS(Swish, SwishMKLDNNFunctor, beta)
+DEFINE_ONEDNN_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, ReluOneDNNFunctor, alpha)
+DEFINE_ONEDNN_ACT_KERNEL_WITH_ONE_ATTRS(Mish, MishOneDNNFunctor, threshold)
+DEFINE_ONEDNN_ACT_KERNEL_WITH_ONE_ATTRS(Elu, EluOneDNNFunctor, alpha)
+DEFINE_ONEDNN_ACT_KERNEL_WITH_ONE_ATTRS(Relu6, Relu6OneDNNFunctor, threshold)
+DEFINE_ONEDNN_ACT_KERNEL_WITH_ONE_ATTRS(Swish, SwishOneDNNFunctor, beta)

 template <typename T, typename Context>
 void HardSwishKernel(const Context& dev_ctx,
@@ -155,7 +155,7 @@ void HardSwishKernel(const Context& dev_ctx,
                     float scale,
                     float offset,
                     DenseTensor* out) {
-  HardSwishMKLDNNFunctor<T> functor;
+  HardSwishOneDNNFunctor<T> functor;
  functor(dev_ctx, x, threshold, 0, out);
 }


--- a/paddle/phi/kernels/onednn/gaussian_random_kernel.cc
+++ b/paddle/phi/kernels/onednn/gaussian_random_kernel.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/gaussian_random_kernel.h"
+
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void GaussianRandomKernel(const Context& ctx,
+                          const IntArray& shape,
+                          float mean,
+                          float std,
+                          int seed,
+                          DataType dtype,
+                          DenseTensor* out) {
+  std::normal_distribution<T> dist(mean, std);
+  auto engine = std::make_shared<std::mt19937_64>();
+  engine->seed(seed);
+
+  T* data = ctx.template Alloc<T>(out);
+  for (int64_t i = 0; i < out->numel(); ++i) {
+    data[i] = dist(*engine);
+  }
+
+  out->Resize(phi::make_ddim(shape.GetData()));
+  dnnl::memory::desc out_mem_desc(
+      vectorize(out->dims()),
+      funcs::ToOneDNNDataType(out->dtype()),
+      funcs::GetPlainOneDNNFormat(out->dims().size()));
+  out->set_mem_desc(out_mem_desc);
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(
+    gaussian_random, OneDNN, ALL_LAYOUT, phi::GaussianRandomKernel, float) {}
--- a/paddle/phi/kernels/transfer_layout_kernel.cc
+++ b/paddle/phi/kernels/transfer_layout_kernel.cc
@@ -109,8 +109,8 @@ void TransferLayoutMKLDNN(const Context& dev_ctx,
  if (src_layout != DataLayout::MKLDNN && dst_layout == DataLayout::MKLDNN) {
    // Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel
    // Just set layout/format. No real transform occur
-    auto out_format = funcs::MKLDNNFormatForSize(
-        x.dims().size(), funcs::ToMKLDNNFormat(src_layout));
+    auto out_format = funcs::OneDNNFormatForSize(
+        x.dims().size(), funcs::ToOneDNNFormat(src_layout));

    out->ShareDataWith(x);
    // For NHWC data we need reshape of tensors as MKL-DNN
@@ -127,7 +127,7 @@ void TransferLayoutMKLDNN(const Context& dev_ctx,
             dst_layout != DataLayout::MKLDNN) {
    // Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel
    // Do transform via MKLDNN lib
-    funcs::innerTransDataLayoutFromMKLDNN(
+    funcs::innerTransDataLayoutFromOneDNN(
        src_layout, dst_layout, x, out, dev_ctx.GetPlace());
  } else if (src_layout == DataLayout::MKLDNN &&
             dst_layout == DataLayout::MKLDNN) {