[PHI decoupling] Remove fluid imports from MKLDNN code (#48981)

* fix wrong handler name * mkldnn_engine -> onednn_engine * remove fluid/errors.h imports * remove fluid/enforce.h imports * remove note and unnecessary import * remove fluid/pretty_log.h imports * remove fluid/place.h imports * remove fluid/data_layout_transform.h imports * remove fluid/device_context.h imports * remove mkldnn_helper code * remove fluid/mkldnn_reuse.h imports * pretty_log import

[PHI decoupling] Remove fluid imports from MKLDNN code (#48981)
* fix wrong handler name * mkldnn_engine -> onednn_engine * remove fluid/errors.h imports * remove fluid/enforce.h imports * remove note and unnecessary import * remove fluid/pretty_log.h imports * remove fluid/place.h imports * remove fluid/data_layout_transform.h imports * remove fluid/device_context.h imports * remove mkldnn_helper code * remove fluid/mkldnn_reuse.h imports * pretty_log import
4d5a5533 · Sławomir Siwek · GitHub · 32633c8e · 4d5a5533 · 4d5a5533
68 changed file
--- a/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.cc
@@ -16,8 +16,8 @@
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/platform/enforce.h"
+#include "paddle/phi/core/enforce.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass_tester.cc
@@ -19,7 +19,6 @@
 #include "paddle/fluid/framework/op_desc.h"
 #include "paddle/fluid/framework/op_version_registry.h"
 #include "paddle/fluid/framework/program_desc.h"
-#include "paddle/fluid/platform/errors.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc
@@ -28,7 +28,7 @@ namespace ir {
 void ComputePropagateScalesMkldnnPass::GetTensorFromVector(
    const std::vector<float>& data_v, phi::DenseTensor* tensor) const {
  const int size = static_cast<int>(data_v.size());
-  auto* data = tensor->mutable_data<float>({size}, platform::CPUPlace());
+  auto* data = tensor->mutable_data<float>({size}, phi::CPUPlace());
  for (int i = 0; i < size; i++) {
    data[i] = data_v[i];
  }
@@ -123,7 +123,7 @@ void ComputePropagateScalesMkldnnPass::ComputeVarScales(
      std::vector<int64_t> reshape_dims = {dims[0], volume};
      tmp_tensor.Resize(phi::make_ddim(reshape_dims));
      auto* weight_data = weight_tensor->data<float>();
-      auto* tmp_data = tmp_tensor.mutable_data<float>(platform::CPUPlace());
+      auto* tmp_data = tmp_tensor.mutable_data<float>(phi::CPUPlace());
      for (int i = 0; i < weight_tensor->numel(); i++) {
        tmp_data[i] = std::abs(weight_data[i]);
      }
@@ -365,7 +365,7 @@ void ComputePropagateScalesMkldnnPass::UpdateScaleOpInOutScales(
  auto pair = iter->second;
  const auto tensor = pair.second;
  tmp_tensor.Resize(tensor.dims());
-  auto* data = tmp_tensor.mutable_data<float>(platform::CPUPlace());
+  auto* data = tmp_tensor.mutable_data<float>(phi::CPUPlace());
  auto* src_data = tensor.data<float>();
  for (int i = 0; i < tensor.numel(); i++) {
    if (out_iter != var_quant_scales->end()) {

--- a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc
@@ -17,7 +17,7 @@
 #include "paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h"
 #include "paddle/fluid/framework/naive_executor.h"
-#include "paddle/fluid/platform/place.h"
+#include "paddle/phi/common/place.h"
 namespace paddle {
 namespace framework {
@@ -119,7 +119,7 @@ class ComputePropagateScalesMkldnnPassTest : public testing::Test {
                    const ProgramDesc& prog,
                    Scope* scope,
                    const std::initializer_list<std::string>& variable_names) {
-    auto place = paddle::platform::CPUPlace();
+    auto place = phi::CPUPlace();
    NaiveExecutor exe{place};
    exe.CreateVariables(prog, 0, true, scope);
@@ -148,19 +148,19 @@ class ComputePropagateScalesMkldnnPassTest : public testing::Test {
    auto* wx_tensor = wx_var->GetMutable<phi::DenseTensor>();
    wx_tensor->Resize(phi::make_dim(wx.size(), wx[0].size()));
    for (size_t i = 0; i < wx.size(); i++)
-      std::copy(begin(wx[i]),
+      std::copy(
+          begin(wx[i]),
          end(wx[i]),
-                wx_tensor->mutable_data<float>(platform::CPUPlace()) +
+          wx_tensor->mutable_data<float>(phi::CPUPlace()) + i * wx[0].size());
-                    i * wx[0].size());
    auto* wh_var = scope.FindVar(wh_var_names);
    auto* wh_tensor = wh_var->GetMutable<phi::DenseTensor>();
    wh_tensor->Resize(phi::make_dim(wh.size(), wh[0].size()));
    for (size_t i = 0; i < wh.size(); i++)
-      std::copy(begin(wh[i]),
+      std::copy(
+          begin(wh[i]),
          end(wh[i]),
-                wh_tensor->mutable_data<float>(platform::CPUPlace()) +
+          wh_tensor->mutable_data<float>(phi::CPUPlace()) + i * wh[0].size());
-                    i * wh[0].size());
    if (type == "gru") {
      ComputeGruWeightScales(
          graph, &scope, wx_name, wh_name, &var_quant_scales);
@@ -283,7 +283,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, get_scales_function) {
  var_tensor.Resize(phi::make_dim(values.size(), 1));
  std::copy(begin(values),
            end(values),
-            var_tensor.mutable_data<float>(platform::CPUPlace()));
+            var_tensor.mutable_data<float>(phi::CPUPlace()));
  std::vector<float> results = GetScales(&var_tensor, 0);
  ASSERT_EQ(results.size(), std::size_t(1));
@@ -310,7 +310,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, compute_var_scales) {
  weight_tensor->Resize(phi::make_dim(1, values.size()));
  std::copy(begin(values),
            end(values),
-            weight_tensor->mutable_data<float>(platform::CPUPlace()));
+            weight_tensor->mutable_data<float>(phi::CPUPlace()));
  auto max_val = *std::max_element(values.begin(), values.end());
@@ -338,7 +338,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, update_relu_output_scales) {
  StringPairMap var_quant_scales;
  for (auto& var_name : conv_variable_names) {
    phi::DenseTensor tensor;
-    auto* data = tensor.mutable_data<float>({1}, platform::CPUPlace());
+    auto* data = tensor.mutable_data<float>({1}, phi::CPUPlace());
    data[0] = 10;
    auto pair = std::make_pair(false, tensor);
    var_quant_scales.insert(std::make_pair(var_name, pair));

--- a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc
@@ -15,8 +15,8 @@
 #include "paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc
@@ -80,7 +80,7 @@ void recompute_bias_and_weights(const Scope* scope,
      ac_bias_tensor.data<float>(), ac_bias_tensor.numel(), 1);
  EigenVectorArrayMap eltwise_y_in_array(
-      eltwise_y_in_tensor->mutable_data<float>(platform::CPUPlace()),
+      eltwise_y_in_tensor->mutable_data<float>(phi::CPUPlace()),
      eltwise_y_in_tensor->numel(),
      1);
@@ -91,7 +91,7 @@ void recompute_bias_and_weights(const Scope* scope,
      scope->FindVar(conv_weight->Name())->GetMutable<phi::DenseTensor>();
  auto weights_shape = weights->dims();
  auto weights_shape_2d = phi::flatten_to_2d(weights_shape, 1);
-  auto* weights_data = weights->mutable_data<float>(platform::CPUPlace());
+  auto* weights_data = weights->mutable_data<float>(phi::CPUPlace());
  EigenMatrixArrayMap weights_array_2d(
      weights_data, weights_shape_2d[0], weights_shape_2d[1]);
@@ -233,7 +233,7 @@ void ConvAffineChannelFusePass::ApplyImpl(ir::Graph* graph) const {
    auto* eltwise_y_in_tensor =
        scope->Var(eltwise_y_in_node->Name())->GetMutable<phi::DenseTensor>();
    eltwise_y_in_tensor->Resize(ac_bias_tensor->dims());
-    std::fill_n(eltwise_y_in_tensor->mutable_data<float>(platform::CPUPlace()),
+    std::fill_n(eltwise_y_in_tensor->mutable_data<float>(phi::CPUPlace()),
                eltwise_y_in_tensor->numel(),
                0.0f);

--- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc
@@ -19,8 +19,8 @@
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/platform/enforce.h"
+#include "paddle/phi/core/enforce.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {
@@ -263,7 +263,7 @@ phi::DenseTensor tensor_apply_eltwise(const phi::DenseTensor& vec_a,
  vec_y.Resize(vec_a.dims());
  const float* a = vec_a.data<float>();
  const float* b = vec_b.data<float>();
-  float* y = vec_y.mutable_data<float>(platform::CPUPlace());
+  float* y = vec_y.mutable_data<float>(phi::CPUPlace());
  for (int i = 0; i < vec_a.numel(); i++) {
    y[i] = f(a[i], b[i]);
  }

--- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc
@@ -19,7 +19,7 @@
 #include "paddle/fluid/framework/op_proto_maker.h"
 #include "paddle/fluid/framework/op_version_registry.h"
 #include "paddle/fluid/imperative/type_defs.h"
-#include "paddle/fluid/platform/place.h"
+#include "paddle/phi/common/place.h"
 namespace paddle {
 namespace framework {
@@ -112,7 +112,7 @@ void InitTensorHolder(Scope* scope,
 void MainTest(bool convWithExistingBias) {
  auto prog = BuildProgramDesc(convWithExistingBias);
  std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
-  auto place = paddle::platform::CPUPlace();
+  auto place = phi::CPUPlace();
  NaiveExecutor exe{place};
  Scope scope;
  // Init scope, as it is used in pass

--- a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc
@@ -16,7 +16,7 @@
 #include "paddle/fluid/framework/ir/graph_traits.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc
@@ -18,7 +18,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass_tester.cc
@@ -17,7 +17,6 @@
 #include "paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.h"
 #include "paddle/fluid/framework/naive_executor.h"
 #include "paddle/fluid/imperative/type_defs.h"
-#include "paddle/fluid/platform/place.h"
 namespace paddle {
 namespace framework {
@@ -68,23 +67,16 @@ void SetOp(ProgramDesc* prog,
 static const std::initializer_list<std::string> variable_names{
    "z", "a", "b", "c", "d", "e", "f", "g", "h", "i"};
-void PreparePass(std::unique_ptr<ir::Graph>& graph,
-                 int* original_nodes_num,
-                 int* current_nodes_num) {
-  auto pass = PassRegistry::Instance().Get("cpu_bfloat16_pass");
-  *original_nodes_num = graph->Nodes().size();
-  graph.reset(pass->Apply(graph.release()));
-  *current_nodes_num = graph->Nodes().size();
-}
 void MainTest(const ProgramDesc& prog,
              const int& quant_count,
              const int& dequant_count,
              const int& added_nodes_count) {
  auto graph = std::make_unique<ir::Graph>(prog);
-  int original_nodes_num, current_nodes_num;
+  auto pass = PassRegistry::Instance().Get("cpu_bfloat16_pass");
-  PreparePass(graph, &original_nodes_num, &current_nodes_num);
+  int original_nodes_num = graph->Nodes().size();
+  graph.reset(pass->Apply(graph.release()));
+  int current_nodes_num = graph->Nodes().size();
  int quantize_nodes_count = 0;
  int dequantize_nodes_count = 0;

--- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc
@@ -19,7 +19,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
 #include "paddle/fluid/platform/mkldnn_helper.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
@@ -20,7 +20,7 @@
 #include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h"
 #include "paddle/fluid/platform/mkldnn_helper.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {
@@ -1204,8 +1204,7 @@ void CPUQuantizePass::QuantizeMultiGru(Graph* graph) const {
      auto* w_scale_tensor_dst =
          scope->Var(w_scale_node->Name())->GetMutable<phi::DenseTensor>();
      w_scale_tensor_dst->Resize(scale_tensor_src.dims());
-      auto* dst_data =
+      auto* dst_data = w_scale_tensor_dst->mutable_data<float>(phi::CPUPlace());
-          w_scale_tensor_dst->mutable_data<float>(platform::CPUPlace());
      EigenVectorArrayMapFloat eigen_tensor_dst{dst_data,
                                                w_scale_tensor_dst->numel()};
      eigen_tensor_dst =

--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
@@ -19,7 +19,7 @@
 #include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h"  // NOLINT
 #include "paddle/fluid/framework/naive_executor.h"
 #include "paddle/fluid/imperative/type_defs.h"
-#include "paddle/fluid/platform/place.h"
+#include "paddle/phi/common/place.h"
 namespace paddle {
 namespace framework {
@@ -146,7 +146,7 @@ void PreparePass(std::unique_ptr<ir::Graph>* graph,
                 int* current_nodes_num,
                 std::string var_without_scale = "",
                 std::string var_signed = "") {
-  auto place = paddle::platform::CPUPlace();
+  auto place = phi::CPUPlace();
  NaiveExecutor exe{place};
  Scope scope;
  exe.CreateVariables(prog, 0, true, &scope);

--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc
@@ -18,9 +18,9 @@
 #include <string>
 #include <vector>
-#include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/mkldnn_helper.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/phi/core/enforce.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc
@@ -16,7 +16,7 @@
 #include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h"
 #include "paddle/fluid/framework/naive_executor.h"
-#include "paddle/fluid/platform/place.h"
+#include "paddle/phi/common/place.h"
 namespace paddle {
 namespace framework {
@@ -722,7 +722,7 @@ void InitTensorHolder(Scope* scope,
 }
 void PrepareGraph(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog) {
-  auto place = paddle::platform::CPUPlace();
+  auto place = phi::CPUPlace();
  NaiveExecutor exe{place};
  Scope scope;
  exe.CreateVariables(prog, 0, true, &scope);

--- a/paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc
@@ -16,9 +16,9 @@
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/platform/enforce.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/phi/core/enforce.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc
@@ -15,8 +15,8 @@
 #include "paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.cc
@@ -16,7 +16,7 @@
 #include "paddle/fluid/framework/ir/graph_traits.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc
@@ -14,8 +14,8 @@
 #include "paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.h"
-#include "paddle/fluid/platform/enforce.h"
+#include "paddle/phi/core/enforce.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.cc
@@ -15,8 +15,8 @@
 #include "paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/mkldnn_helper.h"
+#include "paddle/phi/core/enforce.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.cc
@@ -17,7 +17,7 @@
 #include <string>
 #include <vector>
-#include "paddle/fluid/platform/enforce.h"
+#include "paddle/phi/core/enforce.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/layer_norm_onednn_optimization_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/layer_norm_onednn_optimization_pass.cc
@@ -15,8 +15,8 @@
 #include "paddle/fluid/framework/ir/mkldnn/layer_norm_onednn_optimization_pass.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {
@@ -77,7 +77,7 @@ void LayerNormOneDNNOptimizationPass::ApplyImpl(Graph *graph) const {
    scale_shift_tensor->Resize(phi::make_ddim({channels * 2}));
-    memcpy(scale_shift_tensor->mutable_data<float>(platform::CPUPlace()),
+    memcpy(scale_shift_tensor->mutable_data<float>(phi::CPUPlace()),
           ln_scale_tensor->data<float>(),
           channels * sizeof(float));

--- a/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.cc
@@ -15,8 +15,8 @@
 #include "paddle/fluid/framework/ir/mkldnn/matmul_activation_mkldnn_fuse_pass.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/matmul_elementwise_add_mkldnn_fuse_pass.cc
@@ -16,7 +16,7 @@
 #include "paddle/fluid/framework/ir/graph_traits.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.cc
@@ -13,9 +13,9 @@
 // limitations under the License.
 #include "paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_mkldnn_fuse_pass.h"
-#include <paddle/fluid/string/pretty_log.h>
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/platform/enforce.h"
+#include "paddle/phi/core/enforce.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc
@@ -23,7 +23,7 @@
 #include "paddle/fluid/framework/ir/pass_tester_helper.h"
 #include "paddle/fluid/framework/naive_executor.h"
 #include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/platform/place.h"
+#include "paddle/phi/common/place.h"
 #include "paddle/phi/core/kernel_registry.h"
 PD_DECLARE_KERNEL(conv2d_transpose, CPU, ALL_LAYOUT);
@@ -195,7 +195,7 @@ class MKLDNNConvBatchNormPassTest {
  void FillTensorWithRandomData(phi::DenseTensor* tnsr,
                                float lowb,
                                float upb,
-                                platform::CPUPlace place) {
+                                phi::CPUPlace place) {
    float* ptr = tnsr->mutable_data<float>(place);
    // Initialize input data
    std::uniform_real_distribution<float> dist(static_cast<float>(lowb),
@@ -219,7 +219,7 @@ class MKLDNNConvBatchNormPassTest {
    std::unique_ptr<ir::Graph> graph(new ir::Graph(base_prog));
    Scope scope;
-    auto place = paddle::platform::CPUPlace();
+    auto place = phi::CPUPlace();
    NaiveExecutor exe{place};
    auto pass = PassRegistry::Instance().Get(

--- a/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h
+++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h
@@ -140,7 +140,7 @@ static void GetInfoFromTheFirstOp(ir::Graph* graph,
                                                op_desc->GetAttr(vector_name));
          phi::DenseTensor tensor;
          const int size = static_cast<int>(scales_vector.size());
-          auto data = tensor.mutable_data<double>({size}, platform::CPUPlace());
+          auto data = tensor.mutable_data<double>({size}, phi::CPUPlace());
          std::copy(scales_vector.begin(), scales_vector.end(), data);
          auto pair = std::make_pair(is_unsigned, tensor);
          info_map->insert(std::make_pair(var_name, pair));

--- a/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc
@@ -18,8 +18,8 @@
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
-#include "paddle/fluid/platform/errors.h"
+#include "paddle/phi/core/errors.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {
@@ -44,12 +44,11 @@ std::vector<std::string> JoinInputs(Node* op1,
 void MultiGRUFusePass::ApplyImpl(ir::Graph* graph) const {
  VLOG(3) << "Fusing two concatenated multi_gru ops.";
  PADDLE_ENFORCE_NOT_NULL(graph,
-                          platform::errors::InvalidArgument(
+                          phi::errors::InvalidArgument(
                              "Pointer to graph argument cannot be NULL."));
  FusePassBase::Init(name_scope_, graph);
  PADDLE_ENFORCE_NOT_NULL(
-      param_scope(),
+      param_scope(), phi::errors::InvalidArgument("Scope cannot be nullptr."));
-      platform::errors::InvalidArgument("Scope cannot be nullptr."));
  GraphPatternDetector gpd;
  patterns::TwoFusionGruConcat pattern{gpd.mutable_pattern(), name_scope_};

--- a/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc
@@ -21,9 +21,9 @@
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
-#include "paddle/fluid/platform/errors.h"
 #include "paddle/fluid/platform/mkldnn_helper.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/phi/core/errors.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {
@@ -48,12 +48,11 @@ std::vector<std::string> JoinInputs(Node* op1,
 void MultiGruSeqFusePass::ApplyImpl(ir::Graph* graph) const {
  VLOG(3) << "Fusing two consecutive multi_gru ops.";
  PADDLE_ENFORCE_NOT_NULL(graph,
-                          platform::errors::InvalidArgument(
+                          phi::errors::InvalidArgument(
                              "Pointer to graph argument cannot be NULL."));
  FusePassBase::Init(name_scope_, graph);
  PADDLE_ENFORCE_NOT_NULL(
-      param_scope(),
+      param_scope(), phi::errors::InvalidArgument("Scope cannot be nullptr."));
-      platform::errors::InvalidArgument("Scope cannot be nullptr."));
  GraphPatternDetector gpd;
  patterns::MultiGruSeq pattern{gpd.mutable_pattern(), name_scope_};

--- a/paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.cc
@@ -15,8 +15,8 @@
 #include "paddle/fluid/framework/ir/mkldnn/operator_reshape2_onednn_fuse_pass.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc
@@ -15,8 +15,8 @@
 #include "paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc
@@ -15,8 +15,8 @@
 #include "paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.cc
@@ -16,7 +16,7 @@
 #include "paddle/fluid/framework/op_version_registry.h"
 #include "paddle/fluid/platform/mkldnn_helper.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass_tester.cc
@@ -16,7 +16,7 @@
 #include "paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass.h"  // NOLINT
 #include "paddle/fluid/imperative/type_defs.h"
-#include "paddle/fluid/platform/place.h"
+#include "paddle/phi/common/place.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc
@@ -430,8 +430,8 @@ void QuantDequantMkldnnPass::TransposeWeight(phi::DenseTensor* input) const {
  phi::DenseTensor trans_tensor;
  trans_tensor.Resize(out_dims);
-  float* trans_data = trans_tensor.mutable_data<float>(platform::CPUPlace());
+  float* trans_data = trans_tensor.mutable_data<float>(phi::CPUPlace());
-  float* in_data = input->mutable_data<float>(platform::CPUPlace());
+  float* in_data = input->mutable_data<float>(phi::CPUPlace());
  for (int64_t out_idx = 0; out_idx < count; ++out_idx) {
    int64_t in_idx = 0;
@@ -493,8 +493,7 @@ void QuantDequantMkldnnPass::ConvertFromINT8ToFP32(
    weight_tensor->clear();  // clear int weight
    weight_tensor->Resize(phi::make_ddim(phi::vectorize(weight_dims)));
-    auto* new_weight_data =
+    auto* new_weight_data = weight_tensor->mutable_data<float>(phi::CPUPlace());
-        weight_tensor->mutable_data<float>(platform::CPUPlace());
    memcpy(new_weight_data,
           weight_data.data(),
           weight_tensor->numel() * sizeof(float));
@@ -536,8 +535,7 @@ void QuantDequantMkldnnPass::ConvertFromINT8ToFP32(
    }
    weight_tensor->clear();  // clear int weight
    weight_tensor->Resize(phi::make_ddim(phi::vectorize(weight_dims)));
-    auto* new_weight_data =
+    auto* new_weight_data = weight_tensor->mutable_data<float>(phi::CPUPlace());
-        weight_tensor->mutable_data<float>(platform::CPUPlace());
    memcpy(new_weight_data,
           weight_data.data(),
           weight_tensor->numel() * sizeof(float));
@@ -582,8 +580,7 @@ void QuantDequantMkldnnPass::DequantizeOpWeights(
          weight_var_name,
          op_desc->Type()));
  auto* weight_tensor = var->GetMutable<phi::DenseTensor>();
-  float* fp32_weight_data =
+  float* fp32_weight_data = weight_tensor->mutable_data<float>(phi::CPUPlace());
-      weight_tensor->mutable_data<float>(platform::CPUPlace());
  ConvertFromINT8ToFP32(
      scales, weight_tensor, nullptr, fp32_weight_data, weight_var_name);
 }
@@ -628,7 +625,7 @@ void QuantDequantMkldnnPass::DequantizeOpWeightsFromONNXFormat(
          op_desc->Type()));
  auto* weight_tensor = var->GetMutable<phi::DenseTensor>();
  int8_t* int8_weight_data =
-      weight_tensor->mutable_data<int8_t>(platform::CPUPlace());
+      weight_tensor->mutable_data<int8_t>(phi::CPUPlace());
  ConvertFromINT8ToFP32(
      scales, weight_tensor, int8_weight_data, nullptr, weight_var_name);

--- a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc
@@ -14,8 +14,8 @@
 #include "paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/platform/enforce.h"
+#include "paddle/phi/core/enforce.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc
@@ -26,7 +26,7 @@ void AddVarToScope(Scope* param_scope,
                   const DDim& dims) {
  auto* tensor = param_scope->Var(name)->GetMutable<phi::DenseTensor>();
  tensor->Resize(dims);
-  tensor->mutable_data<float>(platform::CPUPlace());
+  tensor->mutable_data<float>(phi::CPUPlace());
 }
 Scope* CreateParamScope() {

--- a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc
@@ -20,7 +20,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
 #include "paddle/fluid/framework/op_version_registry.h"
 #include "paddle/fluid/platform/mkldnn_helper.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc
@@ -28,7 +28,7 @@ void AddVarToScope(Scope* param_scope,
                   const DDim& dims) {
  auto* tensor = param_scope->Var(name)->GetMutable<phi::DenseTensor>();
  tensor->Resize(dims);
-  tensor->mutable_data<float>(platform::CPUPlace());
+  tensor->mutable_data<float>(phi::CPUPlace());
 }
 Scope* CreateParamScope() {

--- a/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.cc
@@ -16,9 +16,9 @@
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/platform/enforce.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/phi/core/enforce.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.cc
@@ -13,8 +13,8 @@
 // limitations under the License.
 #include "paddle/fluid/framework/ir/mkldnn/squeeze2_transpose2_onednn_fuse_pass.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/inference/api/mkldnn_quantizer.cc
+++ b/paddle/fluid/inference/api/mkldnn_quantizer.cc
@@ -30,14 +30,14 @@
 #include "paddle/fluid/inference/analysis/analyzer.h"
 #include "paddle/fluid/inference/api/analysis_predictor.h"
 #include "paddle/fluid/platform/mkldnn_helper.h"
-#include "paddle/fluid/platform/place.h"
+#include "paddle/phi/common/place.h"
-#include "paddle/fluid/string/pretty_log.h"
+#include "paddle/utils/string/pretty_log.h"
 namespace paddle {
 using framework::Variable;
 using framework::ir::Graph;
-using platform::CPUPlace;
+using phi::CPUPlace;
 using ConstEigenVectorArrayMap =
    Eigen::Map<const Eigen::Array<float, Eigen::Dynamic, 1>>;
 using EigenMatrixDoubleArray =

--- a/paddle/fluid/inference/api/mkldnn_quantizer_tester.cc
+++ b/paddle/fluid/inference/api/mkldnn_quantizer_tester.cc
@@ -111,7 +111,7 @@ TEST_F(MkldnnQuantizerTest, histogram_inverted_min_max) {
  var_tensor.Resize(phi::make_dim(values.size()));
  std::copy(begin(values),
            end(values),
-            var_tensor.mutable_data<float>(platform::CPUPlace()));
+            var_tensor.mutable_data<float>(phi::CPUPlace()));
  ASSERT_THROW(Histogram(var_tensor, max_val, min_val, 3),
               platform::EnforceNotMet);
@@ -127,7 +127,7 @@ TEST_F(MkldnnQuantizerTest, histogram_non_negative_to_3) {
  var_tensor.Resize(phi::make_dim(values.size()));
  std::copy(begin(values),
            end(values),
-            var_tensor.mutable_data<float>(platform::CPUPlace()));
+            var_tensor.mutable_data<float>(phi::CPUPlace()));
  std::vector<int> histogram;
  float bin_width;
@@ -151,7 +151,7 @@ TEST_F(MkldnnQuantizerTest, histogram_positive_and_negative_to_3) {
  var_tensor.Resize(phi::make_dim(values.size()));
  std::copy(begin(values),
            end(values),
-            var_tensor.mutable_data<float>(platform::CPUPlace()));
+            var_tensor.mutable_data<float>(phi::CPUPlace()));
  std::vector<int> histogram;
  float bin_width;
@@ -175,7 +175,7 @@ TEST_F(MkldnnQuantizerTest, histogram_zero_bins) {
  var_tensor.Resize(phi::make_dim(values.size()));
  std::copy(begin(values),
            end(values),
-            var_tensor.mutable_data<float>(platform::CPUPlace()));
+            var_tensor.mutable_data<float>(phi::CPUPlace()));
  ASSERT_THROW(Histogram(var_tensor, min_val, max_val, 0),
               platform::EnforceNotMet);
@@ -188,7 +188,7 @@ TEST_F(MkldnnQuantizerTest, histogram_empty) {
  // zero tensor
  phi::DenseTensor var_tensor;
  var_tensor.Resize({0});
-  var_tensor.mutable_data<double>(platform::CPUPlace());
+  var_tensor.mutable_data<double>(phi::CPUPlace());
  ASSERT_THROW(Histogram(var_tensor, -1, 1, 1), platform::EnforceNotMet);
 }
@@ -200,7 +200,7 @@ TEST_F(MkldnnQuantizerTest, kl_scaling_factor_signed) {
  var_tensor.Resize(phi::make_dim(values.size()));
  std::copy(begin(values),
            end(values),
-            var_tensor.mutable_data<float>(platform::CPUPlace()));
+            var_tensor.mutable_data<float>(phi::CPUPlace()));
  bool is_unsigned;
  phi::DenseTensor lod_tensor;
@@ -220,7 +220,7 @@ TEST_F(MkldnnQuantizerTest, max_scaling_factor_signed) {
  var_tensor.Resize(phi::make_dim(values.size()));
  std::copy(begin(values),
            end(values),
-            var_tensor.mutable_data<float>(platform::CPUPlace()));
+            var_tensor.mutable_data<float>(phi::CPUPlace()));
  bool is_unsigned;
  phi::DenseTensor lod_tensor;
@@ -240,7 +240,7 @@ TEST_F(MkldnnQuantizerTest, max_scaling_factor_unsigned) {
  var_tensor.Resize(phi::make_dim(values.size()));
  std::copy(begin(values),
            end(values),
-            var_tensor.mutable_data<float>(platform::CPUPlace()));
+            var_tensor.mutable_data<float>(phi::CPUPlace()));
  bool is_unsigned;
  phi::DenseTensor lod_tensor;
@@ -260,10 +260,10 @@ TEST_F(MkldnnQuantizerTest, max_scaling_factor_chwise_unsigned) {
  phi::DenseTensor var_tensor;
  var_tensor.Resize(phi::make_dim(channels, 1, 1, values.size()));
  for (int i = 0; i < channels; i++)
-    std::copy(begin(values),
+    std::copy(
+        begin(values),
        end(values),
-              var_tensor.mutable_data<float>(platform::CPUPlace()) +
+        var_tensor.mutable_data<float>(phi::CPUPlace()) + i * values.size());
-                  i * values.size());
  bool is_unsigned;
  phi::DenseTensor lod_tensor;
@@ -284,7 +284,7 @@ TEST_F(MkldnnQuantizerTest, kl_scaling_factor_unsigned) {
  var_tensor.Resize(phi::make_dim(values.size()));
  std::copy(begin(values),
            end(values),
-            var_tensor.mutable_data<float>(platform::CPUPlace()));
+            var_tensor.mutable_data<float>(phi::CPUPlace()));
  bool is_unsigned;
  phi::DenseTensor lod_tensor;
@@ -312,14 +312,14 @@ TEST_F(MkldnnQuantizerTest, max_ch_gru_scaling_factor) {
    std::copy(
        begin(wx[i]),
        end(wx[i]),
-        wx_tensor.mutable_data<float>(platform::CPUPlace()) + i * wx[0].size());
+        wx_tensor.mutable_data<float>(phi::CPUPlace()) + i * wx[0].size());
  wh_tensor.Resize(phi::make_dim(wh.size(), wh[0].size()));
  for (size_t i = 0; i < wh.size(); i++)
    std::copy(
        begin(wh[i]),
        end(wh[i]),
-        wh_tensor.mutable_data<float>(platform::CPUPlace()) + i * wh[0].size());
+        wh_tensor.mutable_data<float>(phi::CPUPlace()) + i * wh[0].size());
  bool is_unsigned;
  std::tie(is_unsigned, lod_tensor) =
@@ -342,14 +342,14 @@ TEST_F(MkldnnQuantizerTest, max_ch_lstm_scaling_factor) {
    std::copy(
        begin(wx[i]),
        end(wx[i]),
-        wx_tensor.mutable_data<float>(platform::CPUPlace()) + i * wx[0].size());
+        wx_tensor.mutable_data<float>(phi::CPUPlace()) + i * wx[0].size());
  wh_tensor.Resize(phi::make_dim(wh.size(), wh[0].size()));
  for (size_t i = 0; i < wh.size(); i++)
    std::copy(
        begin(wh[i]),
        end(wh[i]),
-        wh_tensor.mutable_data<float>(platform::CPUPlace()) + i * wh[0].size());
+        wh_tensor.mutable_data<float>(phi::CPUPlace()) + i * wh[0].size());
  bool is_unsigned;
  std::tie(is_unsigned, lod_tensor) =

--- a/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc
@@ -18,8 +18,7 @@ limitations under the License. */
 #include <iostream>
 #include "paddle/fluid/inference/tests/api/tester_helper.h"
-#include "paddle/fluid/platform/device_context.h"
+#include "paddle/phi/common/place.h"
-#include "paddle/fluid/platform/place.h"
 DEFINE_string(infer_shape, "", "data shape file");
 DEFINE_int32(sample, 20, "number of sample");
@@ -78,7 +77,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs,
 #ifdef PADDLE_WITH_MKLDNN
 int GetNumCachedObjects(void) {
  auto &pool = platform::DeviceContextPool::Instance();
-  platform::CPUPlace place;
+  phi::CPUPlace place;
  auto onednn_dev_ctx = dynamic_cast<phi::OneDNNContext *>(pool.Get(place));
  return onednn_dev_ctx->GetCachedObjectsNumber();
 }

--- a/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
+++ b/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
@@ -15,6 +15,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/operators/fused/fusion_gru_op.h"
 #include "paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
 #include "paddle/phi/core/expect.h"
 namespace paddle {
@@ -24,13 +25,14 @@ using phi::OneDNNContext;
 using phi::funcs::OneDNNGetDataType;
 using phi::funcs::OneDNNMemDesc;
 using phi::funcs::RNNReorderType;
+using OneDNNMemoryFormat = dnnl::memory::format_tag;
 template <typename T, typename T_out = T>
 class GRUMKLDNNHandler : public RNNMKLDNNHandler<T, dnnl::gru_forward, T_out> {
 public:
  GRUMKLDNNHandler(const paddle::framework::ExecutionContext& ctx,
                   const OneDNNContext& dev_ctx,
-                   const dnnl::engine mkldnn_engine,
+                   const dnnl::engine onednn_engine,
                   platform::Place cpu_place,
                   const phi::DenseTensor* input,
                   const phi::DenseTensor* weight_h,
@@ -44,7 +46,7 @@ class GRUMKLDNNHandler : public RNNMKLDNNHandler<T, dnnl::gru_forward, T_out> {
      : RNNMKLDNNHandler<T, dnnl::gru_forward, T_out>(
            ctx,
            dev_ctx,
-            mkldnn_engine,
+            onednn_engine,
            ctx.GetPlace(),
            input,
            weight_h,
@@ -256,7 +258,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel<T> {
  template <typename Tout = T>
  void RunKernel(const framework::ExecutionContext& ctx) const {
    auto& dev_ctx = ctx.template device_context<OneDNNContext>();
-    const auto& mkldnn_engine = dev_ctx.GetEngine();
+    const auto& onednn_engine = dev_ctx.GetEngine();
    // Get Tensors
    const auto* input = ctx.Input<phi::DenseTensor>("X");
@@ -294,7 +296,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel<T> {
    GRUMKLDNNHandler<T, Tout> handler(
        ctx,
        dev_ctx,
-        mkldnn_engine,
+        onednn_engine,
        ctx.GetPlace(),
        input,
        weight_h,
@@ -379,7 +381,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel<T> {
 namespace ops = paddle::operators;
 REGISTER_OP_KERNEL(fusion_gru,
                   MKLDNN,
-                   paddle::platform::CPUPlace,
+                   phi::CPUPlace,
                   ops::FusionGRUMKLDNNKernel<float>,
                   ops::FusionGRUMKLDNNKernel<paddle::platform::bfloat16>,
                   ops::FusionGRUMKLDNNKernel<uint8_t>);
--- a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc
+++ b/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc
@@ -24,6 +24,7 @@ using phi::OneDNNContext;
 using phi::funcs::OneDNNGetDataType;
 using phi::funcs::OneDNNMemDesc;
 using phi::funcs::RNNReorderType;
+using OneDNNMemoryFormat = dnnl::memory::format_tag;
 template <typename T, typename T_out = T>
 class LSTMMKLDNNHandler
@@ -31,7 +32,7 @@ class LSTMMKLDNNHandler
 public:
  LSTMMKLDNNHandler(const paddle::framework::ExecutionContext& ctx,
                    const OneDNNContext& dev_ctx,
-                    const dnnl::engine mkldnn_engine,
+                    const dnnl::engine onednn_engine,
                    platform::Place cpu_place,
                    const phi::DenseTensor* input,
                    const phi::DenseTensor* weight_h,
@@ -46,7 +47,7 @@ class LSTMMKLDNNHandler
      : RNNMKLDNNHandler<T, dnnl::lstm_forward, T_out>(
            ctx,
            dev_ctx,
-            mkldnn_engine,
+            onednn_engine,
            ctx.GetPlace(),
            input,
            weight_h,
@@ -338,7 +339,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel<T> {
  template <typename Tout = T>
  void RunKernel(const framework::ExecutionContext& ctx) const {
    auto& dev_ctx = ctx.template device_context<OneDNNContext>();
-    const auto& mkldnn_engine = dev_ctx.GetEngine();
+    const auto& onednn_engine = dev_ctx.GetEngine();
    // Get Tensors
    const auto* input = ctx.Input<phi::DenseTensor>("X");
@@ -379,7 +380,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel<T> {
    LSTMMKLDNNHandler<T, Tout> handler(
        ctx,
        dev_ctx,
-        mkldnn_engine,
+        onednn_engine,
        ctx.GetPlace(),
        input,
        weight_h,
@@ -474,7 +475,7 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel<T> {
 namespace ops = paddle::operators;
 REGISTER_OP_KERNEL(fusion_lstm,
                   MKLDNN,
-                   paddle::platform::CPUPlace,
+                   phi::CPUPlace,
                   ops::FusionLSTMMKLDNNKernel<float>,
                   ops::FusionLSTMMKLDNNKernel<paddle::platform::bfloat16>,
                   ops::FusionLSTMMKLDNNKernel<uint8_t>);
--- a/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h
+++ b/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h
@@ -14,7 +14,8 @@ limitations under the License. */
 #pragma once
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
 namespace paddle {
 namespace operators {
@@ -22,13 +23,14 @@ namespace operators {
 using phi::funcs::CreateKey;
 using phi::funcs::OneDNNGetDataType;
 using phi::funcs::RNNReorderType;
+using OneDNNMemoryFormat = dnnl::memory::format_tag;
 template <typename T, typename T_alg, typename T_out = T>
 class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> {
 public:
  RNNMKLDNNHandler(const paddle::framework::ExecutionContext& ctx,
                   const phi::OneDNNContext& dev_ctx,
-                   const dnnl::engine mkldnn_engine,
+                   const dnnl::engine onednn_engine,
                   platform::Place cpu_place,
                   const phi::DenseTensor* input,
                   const phi::DenseTensor* weight_h,

--- a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
+++ b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
@@ -18,10 +18,10 @@ limitations under the License. */
 #include "dnnl.hpp"  // NOLINT
 #include "paddle/fluid/framework/mixed_vector.h"
+#include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/operators/fused/multi_gru_op.h"
-#include "paddle/fluid/platform/errors.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
 namespace paddle {
 namespace operators {
@@ -31,6 +31,7 @@ using phi::funcs::OneDNNGetDataType;
 using phi::funcs::OneDNNMemDesc;
 using Direction = dnnl::rnn_direction;
 using phi::OneDNNContext;
+using OneDNNMemoryFormat = dnnl::memory::format_tag;
 namespace {
@@ -721,6 +722,6 @@ class MultiGRUMKLDNNKernel : public framework::OpKernel<T> {
 namespace ops = paddle::operators;
 REGISTER_OP_KERNEL(multi_gru,
                   MKLDNN,
-                   paddle::platform::CPUPlace,
+                   phi::CPUPlace,
                   ops::MultiGRUMKLDNNKernel<float>,
                   ops::MultiGRUMKLDNNKernel<uint8_t>);
--- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
@@ -14,11 +14,10 @@ limitations under the License. */
 #include "paddle/fluid/operators/dequantize_op.h"
-#include "paddle/fluid/framework/data_layout_transform.h"
 #include "paddle/fluid/framework/tensor.h"
-#include "paddle/fluid/platform/errors.h"
 #include "paddle/fluid/platform/mkldnn_helper.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
+#include "paddle/phi/core/errors.h"
 namespace paddle {
 namespace operators {
@@ -39,11 +38,11 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
    auto* out = ctx.Output<phi::DenseTensor>("Output");
    PADDLE_ENFORCE(quantization_scale != 0.0f,
-                   platform::errors::InvalidArgument(
+                   phi::errors::InvalidArgument(
                       "Dequantization scale must be different than 0.0f"));
    PADDLE_ENFORCE(quantization_shift <= 255 && quantization_shift >= 0,
-                   platform::errors::InvalidArgument(
+                   phi::errors::InvalidArgument(
                       "Dequantization shift must be lower or equal to ",
                       "255 and greater or equal to 0, but got %f",
                       quantization_shift));
@@ -91,7 +90,7 @@ namespace ops = paddle::operators;
 REGISTER_OP_KERNEL(dequantize,
                   MKLDNN,
-                   ::paddle::platform::CPUPlace,
+                   ::phi::CPUPlace,
                   ops::DeQuantOpKernel<uint8_t>,
                   ops::DeQuantOpKernel<int8_t>,
                   ops::DeQuantOpKernel<paddle::platform::bfloat16>);
--- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
@@ -14,9 +14,10 @@ limitations under the License. */
 #include <memory>
+#include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/fc_op.h"
 #include "paddle/fluid/platform/mkldnn_helper.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
 namespace paddle {
 namespace operators {
@@ -51,10 +52,10 @@ class FCMKLDNNHandler
                  const phi::DenseTensor* bias,
                  phi::DenseTensor* out,
                  const int in_num_col_dims,
-                  dnnl::engine mkldnn_engine,
+                  dnnl::engine onednn_engine,
                  platform::Place cpu_place)
      : phi::funcs::OneDNNHandlerNoCachingT<T_in, dnnl::inner_product_forward>(
-            mkldnn_engine, cpu_place),
+            onednn_engine, cpu_place),
        dev_ctx_(dev_ctx) {
    this->memory_key_ = ctx.InputName("W");
@@ -122,7 +123,7 @@ class FCMKLDNNHandler
      post_operations.append_eltwise(
          activation_scale, dnnl::algorithm::eltwise_relu, 0.0f, 0.0f);
    }
-    platform::AppendActivation(ctx, post_operations, activation_scale);
+    AppendActivation(ctx, post_operations, activation_scale);
    if (ctx.HasAttr("fused_output_scale")) {
      float scale_alpha = ctx.Attr<float>("fused_output_scale");
@@ -154,6 +155,59 @@ class FCMKLDNNHandler
    }
  }
+  void AppendActivation(const ExecutionContext& ctx,
+                        dnnl::post_ops& post_ops,  // NOLINT
+                        float activation_scale = 1.0f) {
+    const auto invalid_attribute =
+        ctx.HasAttr("fuse_activation")
+            ? ctx.Attr<std::string>("fuse_activation").empty()
+            : true;
+    if (invalid_attribute) return;
+    const auto fuse_activation = ctx.Attr<std::string>("fuse_activation");
+    const auto fuse_alpha =
+        ctx.HasAttr("fuse_alpha") ? ctx.Attr<float>("fuse_alpha") : 0.0f;
+    const auto fuse_beta =
+        ctx.HasAttr("fuse_beta") ? ctx.Attr<float>("fuse_beta") : 0.0f;
+    if (fuse_activation == "hard_sigmoid") {
+      post_ops.append_eltwise(activation_scale,
+                              dnnl::algorithm::eltwise_linear,
+                              fuse_alpha,
+                              fuse_beta);
+      post_ops.append_eltwise(
+          activation_scale, dnnl::algorithm::eltwise_clip, 0.0f, 1.0f);
+    } else {
+      const std::unordered_map<std::string, dnnl::algorithm> activation_map = {
+          {"abs", dnnl::algorithm::eltwise_abs},
+          {"clip", dnnl::algorithm::eltwise_clip},
+          {"gelu", dnnl::algorithm::eltwise_gelu_erf},
+          {"gelu_erf", dnnl::algorithm::eltwise_gelu_erf},
+          {"gelu_tanh", dnnl::algorithm::eltwise_gelu_tanh},
+          {"hard_swish", dnnl::algorithm::eltwise_hardswish},
+          {"leaky_relu", dnnl::algorithm::eltwise_relu},
+          {"mish", dnnl::algorithm::eltwise_mish},
+          {"relu", dnnl::algorithm::eltwise_relu},
+          {"relu6", dnnl::algorithm::eltwise_bounded_relu},
+          {"sigmoid", dnnl::algorithm::eltwise_logistic},
+          {"sqrt", dnnl::algorithm::eltwise_sqrt},
+          {"swish", dnnl::algorithm::eltwise_swish},
+          {"tanh", dnnl::algorithm::eltwise_tanh}};
+      const auto& activation_type = activation_map.find(fuse_activation);
+      PADDLE_ENFORCE_NE(
+          activation_type,
+          activation_map.end(),
+          platform::errors::InvalidArgument(
+              "Activation '%s' not found in oneDNN algorithms mapper",
+              fuse_activation));
+      post_ops.append_eltwise(
+          activation_scale, activation_type->second, fuse_alpha, fuse_beta);
+    }
+  }
  // Correct output scale, to take into account scaling of input and weights
  // Since the data that comes out of input and weight multiplication is
  // scaled with its own scales, this data needs to be divided by
@@ -396,10 +450,76 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
    }
  }
+  void SetOutMemDescWithUnsqueeze2FuseSupport(
+      const framework::ExecutionContext& ctx,
+      phi::DenseTensor* out,
+      const dnnl::memory::desc& out_md) const {
+    const std::vector<int>& fused_unsqueeze2_axes =
+        ctx.Attr<std::vector<int>>("fused_unsqueeze2_axes");
+    const std::vector<int64_t>& op_tz = out_md.dims();
+    std::vector<int64_t> unsqueezed_op_tz(
+        op_tz.size() + fused_unsqueeze2_axes.size(), 0);
+    for (const auto& axis : fused_unsqueeze2_axes) {
+      int positive_axis = axis < 0 ? unsqueezed_op_tz.size() + axis : axis;
+      unsqueezed_op_tz[positive_axis] = 1;
+    }
+    int j = 0;
+    for (size_t i = 0; i < unsqueezed_op_tz.size(); ++i) {
+      if (unsqueezed_op_tz[i] == 0) {
+        unsqueezed_op_tz[i] = op_tz[j++];
+      }
+    }
+    out->set_mem_desc(out_md.reshape(unsqueezed_op_tz));
+    out->Resize(phi::make_ddim(unsqueezed_op_tz));
+  }
+  void SetOutMemDescWithReshape2FuseSupport(
+      const framework::ExecutionContext& ctx,
+      phi::DenseTensor* out,
+      const dnnl::memory::desc& out_md) const {
+    std::vector<int64_t> fused_reshape2_shape(
+        ctx.Attr<std::vector<int>>("fused_reshape2_shape").begin(),
+        ctx.Attr<std::vector<int>>("fused_reshape2_shape").end());
+    const int out_shape_numel = out->numel();
+    const int new_shape_numel = std::accumulate(fused_reshape2_shape.begin(),
+                                                fused_reshape2_shape.end(),
+                                                1,
+                                                std::multiplies<int64_t>());
+    for (size_t i = 0; i < fused_reshape2_shape.size(); ++i) {
+      if (fused_reshape2_shape[i] == -1) {
+        fused_reshape2_shape[i] = -out_shape_numel / new_shape_numel;
+        break;
+      }
+    }
+    out->set_mem_desc(out_md.reshape(fused_reshape2_shape));
+    out->Resize(phi::make_ddim(fused_reshape2_shape));
+  }
+  void SetOutMemDescWithLogicalLayoutFusesSupport(
+      const framework::ExecutionContext& ctx,
+      phi::DenseTensor* out,
+      const dnnl::memory::desc& out_md) const {
+    if (ctx.HasAttr("fused_unsqueeze2_axes")) {
+      SetOutMemDescWithUnsqueeze2FuseSupport(ctx, out, out_md);
+    } else if (ctx.HasAttr("fused_reshape2_shape")) {
+      SetOutMemDescWithReshape2FuseSupport(ctx, out, out_md);
+    } else if (ctx.HasAttr("fused_squeeze2_axes")) {
+      out->set_mem_desc(out_md);
+      out->Resize(phi::make_ddim(out_md.dims()));
+    } else {
+      out->set_mem_desc(out_md);
+    }
+  }
  template <typename T_out, typename T_w>
  void RunKernel(const framework::ExecutionContext& ctx) const {
    const auto& dev_ctx = ctx.template device_context<OneDNNContext>();
-    const auto& mkldnn_engine = dev_ctx.GetEngine();
+    const auto& onednn_engine = dev_ctx.GetEngine();
    const auto* x = ctx.Input<phi::DenseTensor>("Input");
    const auto* weights = ctx.Input<phi::DenseTensor>("W");
@@ -433,7 +553,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
          inner_product_cache->inner_product_p);
      src_memory_p =
          std::make_shared<dnnl::memory>(inner_product_cache->src_mem);
-      PrepareSrcMem(fc_p, src_memory_p, x, mkldnn_engine);
+      PrepareSrcMem(fc_p, src_memory_p, x, onednn_engine);
      weights_memory_p =
          std::make_shared<dnnl::memory>(inner_product_cache->weights_mem);
@@ -463,7 +583,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
                                                bias,
                                                out,
                                                in_col_dims,
-                                                mkldnn_engine,
+                                                onednn_engine,
                                                ctx.GetPlace());
      src_memory_p = handler.AcquireSrcMemoryWithReorder(x);
@@ -504,7 +624,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
      dev_ctx.SetBlob(cache_key, ip_cache);
    }
-    platform::SetOutMemDescWithLogicalLayoutFusesSupport(
+    SetOutMemDescWithLogicalLayoutFusesSupport(
        ctx,
        out,
        dst_memory_p->get_desc().reshape(phi::vectorize(out->dims())));
@@ -541,7 +661,7 @@ namespace ops = paddle::operators;
 REGISTER_OP_KERNEL(fc,
                   MKLDNN,
-                   ::paddle::platform::CPUPlace,
+                   ::phi::CPUPlace,
                   ops::FCMKLDNNKernel<float>,
                   ops::FCMKLDNNKernel<paddle::platform::bfloat16>,
                   ops::FCMKLDNNKernel<uint8_t>,

--- a/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc
@@ -12,9 +12,8 @@
   See the License for the specific language governing permissions and
   limitations under the License. */
-#include "paddle/fluid/framework/data_layout_transform.h"
 #include "paddle/fluid/operators/interpolate_op.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
 namespace paddle {
 namespace operators {
@@ -25,6 +24,7 @@ using dnnl::reorder;
 using dnnl::resampling_forward;
 using dnnl::stream;
 using phi::DataLayout;
+using OneDNNMemoryFormat = dnnl::memory::format_tag;
 template <typename T = float>
 class InterpolateOneDNNHandler
@@ -131,7 +131,7 @@ class InterpolateOneDNNKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
    const auto& dev_ctx = ctx.template device_context<phi::OneDNNContext>();
-    const auto& mkldnn_engine = dev_ctx.GetEngine();
+    const auto& onednn_engine = dev_ctx.GetEngine();
    const auto* x = ctx.Input<phi::DenseTensor>("X");
    auto* out = ctx.Output<phi::DenseTensor>("Out");
@@ -146,7 +146,7 @@ class InterpolateOneDNNKernel : public framework::OpKernel<T> {
    out->Resize(dim_out);
    InterpolateOneDNNHandler<T> handler(
-        algo, mkldnn_engine, ctx.GetPlace(), x, out);
+        algo, onednn_engine, ctx.GetPlace(), x, out);
    auto src_memory_p = handler.AcquireSrcMemory(x);
    auto dst_memory_p = handler.AcquireDstMemory(out);
@@ -170,11 +170,11 @@ namespace ops = paddle::operators;
 REGISTER_OP_KERNEL(nearest_interp,
                   MKLDNN,
-                   ::paddle::platform::CPUPlace,
+                   ::phi::CPUPlace,
                   ops::InterpolateOneDNNKernel<float>,
                   ops::InterpolateOneDNNKernel<int8_t>,
                   ops::InterpolateOneDNNKernel<uint8_t>);
 REGISTER_OP_KERNEL(bilinear_interp,
                   MKLDNN,
-                   ::paddle::platform::CPUPlace,
+                   ::phi::CPUPlace,
                   ops::InterpolateOneDNNKernel<float>);
--- a/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc
@@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
 #include "paddle/phi/common/data_type.h"
 namespace paddle {
@@ -99,7 +100,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
    const bool is_test = ctx.Attr<bool>("is_test");
    auto& dev_ctx = ctx.template device_context<phi::OneDNNContext>();
-    const auto& mkldnn_engine = dev_ctx.GetEngine();
+    const auto& onednn_engine = dev_ctx.GetEngine();
    auto src_tz = phi::vectorize(x->dims());
    PADDLE_ENFORCE_EQ(begin_norm_axis,
@@ -117,7 +118,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
    }
    LayerNormOneDNNHandler<T> handler(
-        src_tz, epsilon, flags, is_test, x, mkldnn_engine, ctx.GetPlace());
+        src_tz, epsilon, flags, is_test, x, onednn_engine, ctx.GetPlace());
    auto src_memory = handler.AcquireSrcMemory(x);
    auto dst_memory = handler.AcquireDstMemory(out);
@@ -159,6 +160,6 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
 namespace ops = paddle::operators;
 REGISTER_OP_KERNEL(layer_norm,
                   MKLDNN,
-                   ::paddle::platform::CPUPlace,
+                   ::phi::CPUPlace,
                   ops::LayerNormMKLDNNOpKernel<float>,
                   ops::LayerNormMKLDNNOpKernel<paddle::platform::bfloat16>);
--- a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc
@@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
 namespace paddle {
 namespace operators {
@@ -25,13 +26,13 @@ class LRNOneDNNHandler
          OneDNNHandlerNoCachingT<T, dnnl::lrn_forward, dnnl::lrn_backward> {
 public:
  LRNOneDNNHandler(const framework::ExecutionContext& ctx,
-                   const dnnl::engine mkldnn_engine,
+                   const dnnl::engine onednn_engine,
                   platform::Place cpu_place,
                   const phi::DenseTensor* input)
      : phi::funcs::
            OneDNNHandlerNoCachingT<T, dnnl::lrn_forward, dnnl::lrn_backward>(
-                mkldnn_engine, cpu_place) {
+                onednn_engine, cpu_place) {
    const int n = ctx.Attr<int>("n");
    // MKL-DNN implements LRN in a caffe way:
    // http://caffe.berkeleyvision.org/tutorial/layers/lrn.html
@@ -56,14 +57,14 @@ class LRNOneDNNHandler
  }
  LRNOneDNNHandler(const framework::ExecutionContext& ctx,
-                   const dnnl::engine mkldnn_engine,
+                   const dnnl::engine onednn_engine,
                   platform::Place cpu_place,
                   const phi::DenseTensor* in_x,
                   const phi::DenseTensor* out_grad,
                   phi::DenseTensor* in_x_grad)
      : phi::funcs::
            OneDNNHandlerNoCachingT<T, dnnl::lrn_forward, dnnl::lrn_backward>(
-                mkldnn_engine, cpu_place) {
+                onednn_engine, cpu_place) {
    PADDLE_ENFORCE_EQ(
        ctx.Attr<bool>("is_test"),
        false,
@@ -125,13 +126,13 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
                      paddle::platform::errors::PreconditionNotMet(
                          "Operator DNNL LRN must use CPUPlace"));
    auto& dev_ctx = ctx.template device_context<OneDNNContext>();
-    const auto& mkldnn_engine = dev_ctx.GetEngine();
+    const auto& onednn_engine = dev_ctx.GetEngine();
    auto x = ctx.Input<phi::DenseTensor>("X");
    auto out = ctx.Output<phi::DenseTensor>("Out");
    auto mid = ctx.Output<phi::DenseTensor>("MidOut");
-    LRNOneDNNHandler<T> handler(ctx, mkldnn_engine, ctx.GetPlace(), x);
+    LRNOneDNNHandler<T> handler(ctx, onednn_engine, ctx.GetPlace(), x);
    auto src_memory = handler.AcquireSrcMemory(x);
    auto dst_memory = handler.AcquireDstMemory(out);
@@ -179,10 +180,10 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
    auto in_x_grad = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
    auto& dev_ctx = ctx.template device_context<OneDNNContext>();
-    const auto& mkldnn_engine = dev_ctx.GetEngine();
+    const auto& onednn_engine = dev_ctx.GetEngine();
    LRNOneDNNHandler<T> handler(
-        ctx, mkldnn_engine, ctx.GetPlace(), in_x, out_grad, in_x_grad);
+        ctx, onednn_engine, ctx.GetPlace(), in_x, out_grad, in_x_grad);
    auto src_memory = handler.AcquireSrcMemory(in_x);
    auto workspace = handler.AcquireBackwardWorkspaceMemory(mid);
@@ -207,11 +208,8 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
 namespace ops = paddle::operators;
-REGISTER_OP_KERNEL(lrn,
+REGISTER_OP_KERNEL(lrn, MKLDNN, phi::CPUPlace, ops::LRNMKLDNNOpKernel<float>);
-                   MKLDNN,
-                   paddle::platform::CPUPlace,
-                   ops::LRNMKLDNNOpKernel<float>);
 REGISTER_OP_KERNEL(lrn_grad,
                   MKLDNN,
-                   paddle::platform::CPUPlace,
+                   phi::CPUPlace,
                   ops::LRNMKLDNNGradOpKernel<float>);
--- a/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc
@@ -14,14 +14,13 @@ limitations under the License. */
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/tensor.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
 #include "paddle/phi/kernels/funcs/blas/blas.h"
 namespace {
 using dnnl::memory;
 using paddle::framework::ExecutionContext;
 using paddle::framework::GradVarName;
-using paddle::platform::MatMulV2MKLDNNHandler;
 using phi::OneDNNContext;
 using phi::vectorize;
 using phi::funcs::OneDNNGetDataType;
@@ -82,6 +81,239 @@ phi::DDim GetDimForInput(const ExecutionContext &ctx, std::string input_name) {
  return input_dims;
 }
+template <typename XT, typename YT, typename OT>
+class MatMulV2MKLDNNHandler
+    : public phi::funcs::OneDNNHandlerNoCachingT<XT, dnnl::matmul> {
+ public:
+  MatMulV2MKLDNNHandler(const ExecutionContext &ctx,
+                        const dnnl::engine engine,
+                        paddle::platform::Place cpu_place,
+                        const std::vector<int64_t> &x_org_dims,
+                        bool trans_x,
+                        const std::vector<int64_t> &y_org_dims,
+                        bool trans_y,
+                        bool is_output_fused,
+                        const std::vector<int64_t> &x_strides_override,
+                        const std::vector<int64_t> &y_strides_override)
+      : phi::funcs::OneDNNHandlerNoCachingT<XT, dnnl::matmul>(engine,
+                                                              cpu_place) {
+    // M X K * K X N
+    std::vector<int64_t> x_dims(x_org_dims);
+    std::vector<int64_t> y_dims(y_org_dims);
+    const int MB_idx = x_dims.size() - 3;
+    const int H_idx = x_dims.size() - 2;
+    const int W_idx = x_dims.size() - 1;
+    if (trans_x) std::swap(x_dims[H_idx], x_dims[W_idx]);
+    if (trans_y) std::swap(y_dims[H_idx], y_dims[W_idx]);
+    const memory::dim M = x_dims[H_idx];
+    const memory::dim K = x_dims[W_idx];
+    const memory::dim N = y_dims[W_idx];
+    std::vector<int64_t> x_strides(x_dims.size() - 3, 1);
+    std::vector<int64_t> y_strides(x_dims.size() - 3, 1);
+    std::vector<int64_t> out_strides(x_dims.size() - 3, 1);
+    std::vector<int64_t> out_ddims(x_dims.size() - 3, 1);
+    x_strides.reserve(x_dims.size());
+    y_strides.reserve(x_dims.size());
+    out_strides.reserve(x_dims.size());
+    if (!x_strides_override.empty()) {
+      x_strides = x_strides_override;
+    } else {
+      if (!trans_x) {
+        x_strides.insert(x_strides.end(), {M * K, K, 1});
+      } else {
+        x_strides.insert(x_strides.end(), {M * K, 1, M});
+      }
+    }
+    if (!y_strides_override.empty()) {
+      y_strides = y_strides_override;
+    } else {
+      if (!trans_y) {
+        y_strides.insert(y_strides.end(), {N * K, N, 1});
+      } else {
+        y_strides.insert(y_strides.end(), {N * K, 1, K});
+      }
+    }
+    out_strides.insert(out_strides.end(), {M * N, N, 1});
+    out_ddims.insert(out_ddims.end(),
+                     {std::max(x_dims[MB_idx], y_dims[MB_idx]), M, N});
+    for (int i = x_dims.size() - 4; i >= 0; --i) {
+      out_ddims[i] = std::max(x_dims[i], y_dims[i]);
+      if (x_strides_override.empty()) {
+        x_strides[i] = x_dims[i + 1] * x_strides[i + 1];
+      }
+      if (y_strides_override.empty()) {
+        y_strides[i] = y_dims[i + 1] * y_strides[i + 1];
+      }
+      out_strides[i] = out_ddims[i + 1] * out_strides[i + 1];
+    }
+    // TODO(jczaja): Why not for int8??
+    if (!phi::funcs::is_int8<OT>() && is_output_fused) {
+      out_strides = FakeTransposeStrides(out_ddims);
+    }
+    auto x_md =
+        memory::desc(x_dims, phi::funcs::OneDNNGetDataType<XT>(), x_strides);
+    auto y_md =
+        memory::desc(y_dims, phi::funcs::OneDNNGetDataType<YT>(), y_strides);
+    auto out_md = memory::desc(
+        out_ddims, phi::funcs::OneDNNGetDataType<OT>(), out_strides);
+    const dnnl::primitive_attr matmul_attrs = CreateMatmulAttrs(ctx);
+    this->AcquireForwardPrimitiveDescriptor(matmul_attrs, x_md, y_md, out_md);
+  }
+  void AppendActivation(const ExecutionContext &ctx,
+                        dnnl::post_ops &post_ops,  // NOLINT
+                        float activation_scale = 1.0f) {
+    const auto invalid_attribute =
+        ctx.HasAttr("fuse_activation")
+            ? ctx.Attr<std::string>("fuse_activation").empty()
+            : true;
+    if (invalid_attribute) return;
+    const auto fuse_activation = ctx.Attr<std::string>("fuse_activation");
+    const auto fuse_alpha =
+        ctx.HasAttr("fuse_alpha") ? ctx.Attr<float>("fuse_alpha") : 0.0f;
+    const auto fuse_beta =
+        ctx.HasAttr("fuse_beta") ? ctx.Attr<float>("fuse_beta") : 0.0f;
+    if (fuse_activation == "hard_sigmoid") {
+      post_ops.append_eltwise(activation_scale,
+                              dnnl::algorithm::eltwise_linear,
+                              fuse_alpha,
+                              fuse_beta);
+      post_ops.append_eltwise(
+          activation_scale, dnnl::algorithm::eltwise_clip, 0.0f, 1.0f);
+    } else {
+      const std::unordered_map<std::string, dnnl::algorithm> activation_map = {
+          {"abs", dnnl::algorithm::eltwise_abs},
+          {"clip", dnnl::algorithm::eltwise_clip},
+          {"gelu", dnnl::algorithm::eltwise_gelu_erf},
+          {"gelu_erf", dnnl::algorithm::eltwise_gelu_erf},
+          {"gelu_tanh", dnnl::algorithm::eltwise_gelu_tanh},
+          {"hard_swish", dnnl::algorithm::eltwise_hardswish},
+          {"leaky_relu", dnnl::algorithm::eltwise_relu},
+          {"mish", dnnl::algorithm::eltwise_mish},
+          {"relu", dnnl::algorithm::eltwise_relu},
+          {"relu6", dnnl::algorithm::eltwise_bounded_relu},
+          {"sigmoid", dnnl::algorithm::eltwise_logistic},
+          {"sqrt", dnnl::algorithm::eltwise_sqrt},
+          {"swish", dnnl::algorithm::eltwise_swish},
+          {"tanh", dnnl::algorithm::eltwise_tanh}};
+      const auto &activation_type = activation_map.find(fuse_activation);
+      PADDLE_ENFORCE_NE(
+          activation_type,
+          activation_map.end(),
+          phi::errors::InvalidArgument(
+              "Activation '%s' not found in oneDNN algorithms mapper",
+              fuse_activation));
+      post_ops.append_eltwise(
+          activation_scale, activation_type->second, fuse_alpha, fuse_beta);
+    }
+  }
+  float ComputeOutputScale(const ExecutionContext &ctx) {
+    float alpha = ctx.HasAttr("alpha") ? ctx.Attr<float>("alpha") : 1.0f;
+    if (ctx.HasAttr("Scale_x") && ctx.HasAttr("Scale_y") &&
+        ctx.HasAttr("Scale_out")) {
+      float scale_x = ctx.Attr<float>("Scale_x");
+      float scale_y = ctx.Attr<float>("Scale_y");
+      bool force_fp32_out = ctx.HasAttr("force_fp32_output")
+                                ? ctx.Attr<bool>("force_fp32_output")
+                                : false;
+      float scale_out = force_fp32_out ? 1.f : ctx.Attr<float>("Scale_out");
+      alpha *= scale_out / (scale_x * scale_y);
+    }
+    return alpha;
+  }
+  dnnl::primitive_attr CreateMatmulAttrs(const ExecutionContext &ctx) {
+    dnnl::primitive_attr matmul_attrs;
+    dnnl::post_ops post_operations;
+    float scale_out = ComputeOutputScale(ctx);
+    if (scale_out != 1.0f) {
+      matmul_attrs.set_output_scales(0, {scale_out});
+    }
+    if (ctx.HasInput("ResidualData")) {
+      auto *residual_data = ctx.Input<phi::DenseTensor>("ResidualData");
+      auto residual_data_tz = phi::vectorize(residual_data->dims());
+      auto residual_data_md = memory::desc(residual_data_tz,
+                                           phi::funcs::OneDNNGetDataType<OT>(),
+                                           dnnl::memory::format_tag::any);
+      post_operations.append_binary(dnnl::algorithm::binary_add,
+                                    residual_data_md);
+      if (ctx.HasAttr("Scale_in_eltwise")) {
+        float sum_scale = scale_out / ctx.Attr<float>("Scale_in_eltwise");
+        post_operations.append_sum(sum_scale);
+      }
+    }
+    AppendActivation(ctx, post_operations);
+    if (ctx.HasAttr("fused_output_scale")) {
+      float scale_alpha = ctx.Attr<float>("fused_output_scale");
+      post_operations.append_eltwise(
+          1.0, dnnl::algorithm::eltwise_linear, scale_alpha, 0.0f);
+    }
+    matmul_attrs.set_post_ops(post_operations);
+    return matmul_attrs;
+  }
+  std::vector<int64_t> FakeTransposeStrides(
+      const std::vector<int64_t> &matmul_out_dims) const {
+    // fuse matmul_v2 + transpose + reshape guarantees that output is 4D and
+    // transpose axis are: {0, 2, 1, 3}
+    std::vector<int64_t> transpose_axis = {0, 2, 1, 3};
+    std::vector<int64_t> fake_strides(transpose_axis.size());
+    int ndims = static_cast<int>(transpose_axis.size());
+    int total_stride = 1;
+    for (int i = ndims - 1; i >= 0; --i) {
+      fake_strides[transpose_axis[i]] = total_stride;
+      total_stride *= matmul_out_dims[transpose_axis[i]];
+    }
+    return fake_strides;
+  }
+  std::shared_ptr<memory> AcquireWeightsMemory(const phi::DenseTensor *input) {
+    const YT *input_data = input->data<YT>();
+    return this->AcquireMemoryFromPrimitive(
+        this->fwd_pd_->weights_desc(),
+        phi::funcs::to_void_cast<YT>(input_data));
+  }
+  std::shared_ptr<dnnl::memory> AcquireDstMemory(phi::DenseTensor *output) {
+    // We cannot use base AcquireDstMemory as it makes an allocation request
+    // base on DST memory primitive size. This is fine in general, but in MatMul
+    // we have primitive that covers only one batch of Data and then shift
+    // pointer for every new batch. Hence phi::DenseTensor size is bigger that
+    // dst memory primitive size. So would we request less memory that is there
+    // and it triggers an assertion.  So as there is no 'any' format here we can
+    // leave default size of phi::DenseTensor as computed in ComputeInferShape
+    OT *ptr = output->mutable_data<OT>(this->place_);
+    return this->AcquireMemoryFromPrimitive(this->fwd_pd_->dst_desc(), ptr);
+  }
+};
 template <typename XT, typename YT, typename OT>
 class MatMulMKLDNNHandler
    : public phi::funcs::OneDNNHandlerNoCachingT<XT, dnnl::matmul> {
@@ -696,7 +928,7 @@ class MatMulGradMKLDNNKernel : public paddle::framework::OpKernel<T> {
 REGISTER_OP_KERNEL(matmul,
                   MKLDNN,
-                   ::paddle::platform::CPUPlace,
+                   ::phi::CPUPlace,
                   MatMulMKLDNNKernel<float>,
                   MatMulMKLDNNKernel<paddle::platform::bfloat16>,
                   MatMulMKLDNNKernel<int8_t>,
@@ -704,6 +936,6 @@ REGISTER_OP_KERNEL(matmul,
 REGISTER_OP_KERNEL(matmul_grad,
                   MKLDNN,
-                   ::paddle::platform::CPUPlace,
+                   ::phi::CPUPlace,
                   MatMulGradMKLDNNKernel<float>,
                   MatMulGradMKLDNNKernel<paddle::platform::bfloat16>);
--- a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
@@ -14,10 +14,10 @@ limitations under the License. */
 #include "paddle/fluid/operators/quantize_op.h"
-#include "paddle/fluid/framework/data_layout_transform.h"
+#include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/platform/mkldnn_helper.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
 namespace paddle {
 namespace operators {
@@ -106,5 +106,5 @@ namespace ops = paddle::operators;
 REGISTER_OP_KERNEL(quantize,
                   MKLDNN,
-                   ::paddle::platform::CPUPlace,
+                   ::phi::CPUPlace,
                   ops::QuantOpKernel<float>);
--- a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
@@ -14,7 +14,6 @@ limitations under the License. */
 #include <iterator>  // NOLINT
 #include "dnnl.hpp"  // NOLINT
-#include "paddle/fluid/framework/data_layout_transform.h"
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/operators/requantize_op.h"
 #include "paddle/phi/backends/onednn/onednn_helper.h"
@@ -115,7 +114,7 @@ namespace ops = paddle::operators;
 REGISTER_OP_KERNEL(requantize,
                   MKLDNN,
-                   ::paddle::platform::CPUPlace,
+                   ::phi::CPUPlace,
                   ops::ReQuantOpKernel<int8_t>,
                   ops::ReQuantOpKernel<uint8_t>,
                   ops::ReQuantOpKernel<paddle::platform::bfloat16>);
--- a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
@@ -12,9 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/flatten_op.h"
 #include "paddle/fluid/operators/squeeze_op.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
 namespace {
 enum class ReshapeKernelOpName {
@@ -357,7 +358,7 @@ namespace ops = paddle::operators;
 REGISTER_OP_KERNEL(
    squeeze,
    MKLDNN,
-    paddle::platform::CPUPlace,
+    phi::CPUPlace,
    ops::ReshapeMKLDNNKernel<float, ReshapeKernelOpName::squeeze>,
    ops::ReshapeMKLDNNKernel<paddle::platform::bfloat16,
                             ReshapeKernelOpName::squeeze>);
@@ -365,7 +366,7 @@ REGISTER_OP_KERNEL(
 REGISTER_OP_KERNEL(
    squeeze_grad,
    MKLDNN,
-    paddle::platform::CPUPlace,
+    phi::CPUPlace,
    ops::ReshapeGradMKLDNNKernel<float, ReshapeKernelOpName::squeeze>,
    ops::ReshapeGradMKLDNNKernel<paddle::platform::bfloat16,
                                 ReshapeKernelOpName::squeeze>);
@@ -373,7 +374,7 @@ REGISTER_OP_KERNEL(
 REGISTER_OP_KERNEL(
    reshape,
    MKLDNN,
-    paddle::platform::CPUPlace,
+    phi::CPUPlace,
    ops::ReshapeMKLDNNKernel<float, ReshapeKernelOpName::reshape>,
    ops::ReshapeMKLDNNKernel<paddle::platform::bfloat16,
                             ReshapeKernelOpName::reshape>);
@@ -381,7 +382,7 @@ REGISTER_OP_KERNEL(
 REGISTER_OP_KERNEL(
    reshape_grad,
    MKLDNN,
-    paddle::platform::CPUPlace,
+    phi::CPUPlace,
    ops::ReshapeGradMKLDNNKernel<float, ReshapeKernelOpName::reshape>,
    ops::ReshapeGradMKLDNNKernel<paddle::platform::bfloat16,
                                 ReshapeKernelOpName::reshape>);
@@ -389,7 +390,7 @@ REGISTER_OP_KERNEL(
 REGISTER_OP_KERNEL(
    reshape2_grad,
    MKLDNN,
-    paddle::platform::CPUPlace,
+    phi::CPUPlace,
    ops::ReshapeGradMKLDNNKernel<float, ReshapeKernelOpName::reshape2>,
    ops::ReshapeGradMKLDNNKernel<paddle::platform::bfloat16,
                                 ReshapeKernelOpName::reshape2>);
@@ -397,7 +398,7 @@ REGISTER_OP_KERNEL(
 REGISTER_OP_KERNEL(
    flatten,
    MKLDNN,
-    paddle::platform::CPUPlace,
+    phi::CPUPlace,
    ops::ReshapeMKLDNNKernel<float, ReshapeKernelOpName::flatten>,
    ops::ReshapeMKLDNNKernel<paddle::platform::bfloat16,
                             ReshapeKernelOpName::flatten>);
@@ -405,7 +406,7 @@ REGISTER_OP_KERNEL(
 REGISTER_OP_KERNEL(
    flatten_grad,
    MKLDNN,
-    paddle::platform::CPUPlace,
+    phi::CPUPlace,
    ops::ReshapeGradMKLDNNKernel<float, ReshapeKernelOpName::flatten>,
    ops::ReshapeGradMKLDNNKernel<paddle::platform::bfloat16,
                                 ReshapeKernelOpName::flatten>);
@@ -413,7 +414,7 @@ REGISTER_OP_KERNEL(
 REGISTER_OP_KERNEL(
    flatten2,
    MKLDNN,
-    paddle::platform::CPUPlace,
+    phi::CPUPlace,
    ops::ReshapeMKLDNNKernel<float, ReshapeKernelOpName::flatten2>,
    ops::ReshapeMKLDNNKernel<paddle::platform::bfloat16,
                             ReshapeKernelOpName::flatten2>);
@@ -421,7 +422,7 @@ REGISTER_OP_KERNEL(
 REGISTER_OP_KERNEL(
    flatten2_grad,
    MKLDNN,
-    paddle::platform::CPUPlace,
+    phi::CPUPlace,
    ops::ReshapeGradMKLDNNKernel<float, ReshapeKernelOpName::flatten2>,
    ops::ReshapeGradMKLDNNKernel<paddle::platform::bfloat16,
                                 ReshapeKernelOpName::flatten2>);
--- a/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc
@@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
 namespace paddle {
 namespace operators {
@@ -38,7 +39,7 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
    const auto& dev_ctx = ctx.template device_context<phi::OneDNNContext>();
-    const auto& mkldnn_engine = dev_ctx.GetEngine();
+    const auto& onednn_engine = dev_ctx.GetEngine();
    const auto* x = ctx.Input<phi::DenseTensor>("X");
    auto* out = ctx.Output<phi::DenseTensor>("Out");
@@ -47,7 +48,7 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel<T> {
    const int group = x->dims()[1] / ctx.Attr<int>("group");
    ShuffleChannelMKLDNNHandler<T> handler(
-        x, group, mkldnn_engine, ctx.GetPlace());
+        x, group, onednn_engine, ctx.GetPlace());
    auto src_memory_p = handler.AcquireSrcMemory(x);
    auto dst_memory_p = handler.AcquireDstMemory(out);
@@ -69,6 +70,6 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel<T> {
 namespace ops = paddle::operators;
 REGISTER_OP_KERNEL(shuffle_channel,
                   MKLDNN,
-                   paddle::platform::CPUPlace,
+                   phi::CPUPlace,
                   ops::ShuffleChannelMKLDNNKernel<float>,
                   ops::ShuffleChannelMKLDNNKernel<paddle::platform::bfloat16>);
--- a/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc
+++ b/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc
@@ -22,9 +22,8 @@
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/framework/scope.h"
-#include "paddle/fluid/platform/device_context.h"
+#include "paddle/phi/common/place.h"
-#include "paddle/fluid/platform/enforce.h"
+#include "paddle/phi/core/enforce.h"
-#include "paddle/fluid/platform/place.h"
 #include "paddle/phi/core/kernel_registry.h"
 USE_OP_ITSELF(elementwise_add);
@@ -51,7 +50,7 @@ class CacheTester {
  CacheTester() {
    // Clear oneDNN cache
    auto &pool = platform::DeviceContextPool::Instance();
-    platform::CPUPlace place;
+    phi::CPUPlace place;
    onednn_dev_ctx_ = dynamic_cast<phi::OneDNNContext *>(pool.Get(place));
    onednn_dev_ctx_->ResetBlobMap(nullptr);
  }
@@ -140,7 +139,7 @@ void RunOperator(const platform::Place &place,
 TEST(test_conv2d_reuse_cache, cpu_place) {
  framework::DDim dims({1, 16, 32, 64});
-  platform::CPUPlace p;
+  phi::CPUPlace p;
  CacheTester ct;
  RunOperator<float>(p, "conv2d", dims, "input_signal");
  RunOperator<float>(p, "conv2d", dims, "input_signal");
@@ -152,7 +151,7 @@ TEST(test_conv2d_reuse_cache, cpu_place) {
 TEST(test_conv2d_noreuse_cache, cpu_place) {
  framework::DDim dims({1, 16, 32, 64});
-  platform::CPUPlace p;
+  phi::CPUPlace p;
  CacheTester ct;
  RunOperator<float>(p, "conv2d", dims, "input_signal");
  RunOperator<float>(p, "conv2d", dims, "input_signal2");

--- a/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc
+++ b/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc
@@ -22,9 +22,8 @@
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/framework/scope.h"
-#include "paddle/fluid/platform/device_context.h"
+#include "paddle/phi/common/place.h"
-#include "paddle/fluid/platform/enforce.h"
+#include "paddle/phi/core/enforce.h"
-#include "paddle/fluid/platform/place.h"
 #include "paddle/phi/core/kernel_registry.h"
 USE_OP_ITSELF(elementwise_add);
@@ -137,13 +136,13 @@ bool TestMain(const platform::Place &place,
 TEST(test_softmax_inplace, cpu_place) {
  framework::DDim dims({32, 64});
-  platform::CPUPlace p;
+  phi::CPUPlace p;
  ASSERT_TRUE(TestMain<float>(p, "softmax", dims, 1));
 }
 TEST(test_relu_inplace, cpu_place) {
  framework::DDim dims({1, 12, 20, 20});
-  platform::CPUPlace p;
+  phi::CPUPlace p;
  ASSERT_TRUE(TestMain<float>(p, "relu", dims, 1));
 }

--- a/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc
+++ b/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc
@@ -22,9 +22,8 @@
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/framework/scope.h"
-#include "paddle/fluid/platform/device_context.h"
+#include "paddle/phi/common/place.h"
-#include "paddle/fluid/platform/enforce.h"
+#include "paddle/phi/core/enforce.h"
-#include "paddle/fluid/platform/place.h"
 #include "paddle/phi/core/kernel_registry.h"
 USE_OP_ITSELF(pool2d);
@@ -53,7 +52,7 @@ struct InputVars {
 TEST(test_pool2d_transpose_nhwc, cpu_place) {
  framework::DDim dims({1, 4, 8, 512});           // NHWC shape
  framework::DDim expected_dims({1, 7, 512, 3});  // NHWC expected shape
-  platform::CPUPlace p;
+  phi::CPUPlace p;
  framework::Scope scope;
  InputVars input_name = {"x", scope.Var("x")->GetMutable<phi::DenseTensor>()};
@@ -109,7 +108,7 @@ TEST(test_pool2d_transpose_nhwc, cpu_place) {
 TEST(test_pool2d_relu_relu_nhwc, cpu_place) {
  framework::DDim dims({1, 4, 8, 512});           // NHWC shape
  framework::DDim expected_dims({1, 512, 3, 7});  // NCHW expected shape
-  platform::CPUPlace p;
+  phi::CPUPlace p;
  framework::Scope scope;
  InputVars input_name = {"x", scope.Var("x")->GetMutable<phi::DenseTensor>()};
@@ -172,7 +171,7 @@ TEST(test_pool2d_relu_relu_nhwc, cpu_place) {
 TEST(test_pool2d_shape_nhwc, cpu_place) {
  framework::DDim dims({1, 4, 8, 512});              // NHWC shape
  std::vector<int32_t> expected_dims{1, 3, 7, 512};  // NHWC expected shape
-  platform::CPUPlace p;
+  phi::CPUPlace p;
  framework::Scope scope;
  InputVars input_name = {"x", scope.Var("x")->GetMutable<phi::DenseTensor>()};
@@ -227,7 +226,7 @@ TEST(test_pool2d_shape_nhwc, cpu_place) {
 TEST(test_pool2d_crop_nhwc, cpu_place) {
  framework::DDim dims({1, 4, 8, 512});           // NHWC shape
  framework::DDim expected_dims({1, 3, 7, 512});  // NCHW expected shape
-  platform::CPUPlace p;
+  phi::CPUPlace p;
  framework::Scope scope;
  InputVars input_name = {"x", scope.Var("x")->GetMutable<phi::DenseTensor>()};

--- a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
@@ -12,10 +12,9 @@
   See the License for the specific language governing permissions and
   limitations under the License. */
-#include "paddle/fluid/framework/data_layout_transform.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/memory/malloc.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+#include "paddle/phi/backends/onednn/onednn_reuse.h"
 namespace paddle {
 namespace operators {
@@ -166,10 +165,10 @@ namespace ops = paddle::operators;
 REGISTER_OP_KERNEL(transpose,
                   MKLDNN,
-                   ::paddle::platform::CPUPlace,
+                   ::phi::CPUPlace,
                   ops::TransposeMKLDNNOpKernel<float>);
 REGISTER_OP_KERNEL(transpose_grad,
                   MKLDNN,
-                   ::paddle::platform::CPUPlace,
+                   ::phi::CPUPlace,
                   ops::TransposeMKLDNNGradOpKernel<float>);
--- a/paddle/fluid/operators/pool_op.h
+++ b/paddle/fluid/operators/pool_op.h
@@ -12,9 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-// NOTE(Ruibiao): Difficult to remove code from this header file because too
-// many files rely on it through "mkldnn_reuse.h"
 #pragma once
 #include "paddle/fluid/framework/op_registry.h"

--- a/paddle/fluid/platform/mkldnn_helper.h
+++ b/paddle/fluid/platform/mkldnn_helper.h
@@ -23,11 +23,10 @@ limitations under the License. */
 #include "dnnl.hpp"  // NOLINT
 #include "paddle/fluid/framework/operator.h"
-#include "paddle/fluid/platform/place.h"
 #include "paddle/phi/backends/onednn/onednn_helper.h"
+#include "paddle/phi/common/place.h"
 namespace paddle {
 #ifdef PADDLE_WITH_MKLDNN
-using OneDNNMemoryFormat = dnnl::memory::format_tag;
 using phi::OneDNNContext;
 #endif
 namespace platform {

--- a/paddle/fluid/platform/mkldnn_reuse.h
+++ b/paddle/fluid/platform/mkldnn_reuse.h
-/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#pragma once
-#include <algorithm>
-#include <memory>
-#include <sstream>
-#include <string>
-#include <utility>
-#include <vector>
-#include "paddle/fluid/framework/data_layout_transform.h"
-#include "paddle/fluid/framework/operator.h"
-#include "paddle/fluid/operators/pool_op.h"
-#include "paddle/fluid/platform/mkldnn_helper.h"
-#include "paddle/fluid/platform/place.h"
-#include "paddle/phi/backends/onednn/onednn_reuse.h"
-namespace paddle {
-namespace platform {
-using memory = dnnl::memory;
-static void AppendActivation(const framework::ExecutionContext& ctx,
-                             dnnl::post_ops& post_ops,  // NOLINT
-                             float activation_scale = 1.0f) {
-  const auto invalid_attribute =
-      ctx.HasAttr("fuse_activation")
-          ? ctx.Attr<std::string>("fuse_activation").empty()
-          : true;
-  if (invalid_attribute) return;
-  const auto fuse_activation = ctx.Attr<std::string>("fuse_activation");
-  const auto fuse_alpha =
-      ctx.HasAttr("fuse_alpha") ? ctx.Attr<float>("fuse_alpha") : 0.0f;
-  const auto fuse_beta =
-      ctx.HasAttr("fuse_beta") ? ctx.Attr<float>("fuse_beta") : 0.0f;
-  if (fuse_activation == "hard_sigmoid") {
-    post_ops.append_eltwise(activation_scale,
-                            dnnl::algorithm::eltwise_linear,
-                            fuse_alpha,
-                            fuse_beta);
-    post_ops.append_eltwise(
-        activation_scale, dnnl::algorithm::eltwise_clip, 0.0f, 1.0f);
-  } else {
-    const std::unordered_map<std::string, dnnl::algorithm> activation_map = {
-        {"abs", dnnl::algorithm::eltwise_abs},
-        {"clip", dnnl::algorithm::eltwise_clip},
-        {"gelu", dnnl::algorithm::eltwise_gelu_erf},
-        {"gelu_erf", dnnl::algorithm::eltwise_gelu_erf},
-        {"gelu_tanh", dnnl::algorithm::eltwise_gelu_tanh},
-        {"hard_swish", dnnl::algorithm::eltwise_hardswish},
-        {"leaky_relu", dnnl::algorithm::eltwise_relu},
-        {"mish", dnnl::algorithm::eltwise_mish},
-        {"relu", dnnl::algorithm::eltwise_relu},
-        {"relu6", dnnl::algorithm::eltwise_bounded_relu},
-        {"sigmoid", dnnl::algorithm::eltwise_logistic},
-        {"sqrt", dnnl::algorithm::eltwise_sqrt},
-        {"swish", dnnl::algorithm::eltwise_swish},
-        {"tanh", dnnl::algorithm::eltwise_tanh}};
-    const auto& activation_type = activation_map.find(fuse_activation);
-    PADDLE_ENFORCE_NE(
-        activation_type,
-        activation_map.end(),
-        platform::errors::InvalidArgument(
-            "Activation '%s' not found in oneDNN algorithms mapper",
-            fuse_activation));
-    post_ops.append_eltwise(
-        activation_scale, activation_type->second, fuse_alpha, fuse_beta);
-  }
-}
-static void SetOutMemDescWithUnsqueeze2FuseSupport(
-    const framework::ExecutionContext& ctx,
-    phi::DenseTensor* out,
-    const dnnl::memory::desc& out_md) {
-  const std::vector<int>& fused_unsqueeze2_axes =
-      ctx.Attr<std::vector<int>>("fused_unsqueeze2_axes");
-  const std::vector<int64_t>& op_tz = out_md.dims();
-  std::vector<int64_t> unsqueezed_op_tz(
-      op_tz.size() + fused_unsqueeze2_axes.size(), 0);
-  for (const auto& axis : fused_unsqueeze2_axes) {
-    int positive_axis = axis < 0 ? unsqueezed_op_tz.size() + axis : axis;
-    unsqueezed_op_tz[positive_axis] = 1;
-  }
-  int j = 0;
-  for (size_t i = 0; i < unsqueezed_op_tz.size(); ++i) {
-    if (unsqueezed_op_tz[i] == 0) {
-      unsqueezed_op_tz[i] = op_tz[j++];
-    }
-  }
-  out->set_mem_desc(out_md.reshape(unsqueezed_op_tz));
-  out->Resize(phi::make_ddim(unsqueezed_op_tz));
-}
-static void SetOutMemDescWithReshape2FuseSupport(
-    const framework::ExecutionContext& ctx,
-    phi::DenseTensor* out,
-    const dnnl::memory::desc& out_md) {
-  std::vector<int64_t> fused_reshape2_shape(
-      ctx.Attr<std::vector<int>>("fused_reshape2_shape").begin(),
-      ctx.Attr<std::vector<int>>("fused_reshape2_shape").end());
-  const int out_shape_numel = out->numel();
-  const int new_shape_numel = std::accumulate(fused_reshape2_shape.begin(),
-                                              fused_reshape2_shape.end(),
-                                              1,
-                                              std::multiplies<int64_t>());
-  for (size_t i = 0; i < fused_reshape2_shape.size(); ++i) {
-    if (fused_reshape2_shape[i] == -1) {
-      fused_reshape2_shape[i] = -out_shape_numel / new_shape_numel;
-      break;
-    }
-  }
-  out->set_mem_desc(out_md.reshape(fused_reshape2_shape));
-  out->Resize(phi::make_ddim(fused_reshape2_shape));
-}
-static void SetOutMemDescWithLogicalLayoutFusesSupport(
-    const framework::ExecutionContext& ctx,
-    phi::DenseTensor* out,
-    const dnnl::memory::desc& out_md) {
-  if (ctx.HasAttr("fused_unsqueeze2_axes")) {
-    SetOutMemDescWithUnsqueeze2FuseSupport(ctx, out, out_md);
-  } else if (ctx.HasAttr("fused_reshape2_shape")) {
-    SetOutMemDescWithReshape2FuseSupport(ctx, out, out_md);
-  } else if (ctx.HasAttr("fused_squeeze2_axes")) {
-    out->set_mem_desc(out_md);
-    out->Resize(phi::make_ddim(out_md.dims()));
-  } else {
-    out->set_mem_desc(out_md);
-  }
-}
-template <typename XT, typename YT, typename OT>
-class MatMulV2MKLDNNHandler
-    : public phi::funcs::OneDNNHandlerNoCachingT<XT, dnnl::matmul> {
- public:
-  MatMulV2MKLDNNHandler(const framework::ExecutionContext& ctx,
-                        const dnnl::engine engine,
-                        paddle::platform::Place cpu_place,
-                        const std::vector<int64_t>& x_org_dims,
-                        bool trans_x,
-                        const std::vector<int64_t>& y_org_dims,
-                        bool trans_y,
-                        bool is_output_fused,
-                        const std::vector<int64_t>& x_strides_override,
-                        const std::vector<int64_t>& y_strides_override)
-      : phi::funcs::OneDNNHandlerNoCachingT<XT, dnnl::matmul>(engine,
-                                                              cpu_place) {
-    // M X K * K X N
-    std::vector<int64_t> x_dims(x_org_dims);
-    std::vector<int64_t> y_dims(y_org_dims);
-    const int MB_idx = x_dims.size() - 3;
-    const int H_idx = x_dims.size() - 2;
-    const int W_idx = x_dims.size() - 1;
-    if (trans_x) std::swap(x_dims[H_idx], x_dims[W_idx]);
-    if (trans_y) std::swap(y_dims[H_idx], y_dims[W_idx]);
-    const memory::dim M = x_dims[H_idx];
-    const memory::dim K = x_dims[W_idx];
-    const memory::dim N = y_dims[W_idx];
-    std::vector<int64_t> x_strides(x_dims.size() - 3, 1);
-    std::vector<int64_t> y_strides(x_dims.size() - 3, 1);
-    std::vector<int64_t> out_strides(x_dims.size() - 3, 1);
-    std::vector<int64_t> out_ddims(x_dims.size() - 3, 1);
-    x_strides.reserve(x_dims.size());
-    y_strides.reserve(x_dims.size());
-    out_strides.reserve(x_dims.size());
-    if (!x_strides_override.empty()) {
-      x_strides = x_strides_override;
-    } else {
-      if (!trans_x) {
-        x_strides.insert(x_strides.end(), {M * K, K, 1});
-      } else {
-        x_strides.insert(x_strides.end(), {M * K, 1, M});
-      }
-    }
-    if (!y_strides_override.empty()) {
-      y_strides = y_strides_override;
-    } else {
-      if (!trans_y) {
-        y_strides.insert(y_strides.end(), {N * K, N, 1});
-      } else {
-        y_strides.insert(y_strides.end(), {N * K, 1, K});
-      }
-    }
-    out_strides.insert(out_strides.end(), {M * N, N, 1});
-    out_ddims.insert(out_ddims.end(),
-                     {std::max(x_dims[MB_idx], y_dims[MB_idx]), M, N});
-    for (int i = x_dims.size() - 4; i >= 0; --i) {
-      out_ddims[i] = std::max(x_dims[i], y_dims[i]);
-      if (x_strides_override.empty()) {
-        x_strides[i] = x_dims[i + 1] * x_strides[i + 1];
-      }
-      if (y_strides_override.empty()) {
-        y_strides[i] = y_dims[i + 1] * y_strides[i + 1];
-      }
-      out_strides[i] = out_ddims[i + 1] * out_strides[i + 1];
-    }
-    // TODO(jczaja): Why not for int8??
-    if (!phi::funcs::is_int8<OT>() && is_output_fused) {
-      out_strides = FakeTransposeStrides(out_ddims);
-    }
-    auto x_md =
-        memory::desc(x_dims, phi::funcs::OneDNNGetDataType<XT>(), x_strides);
-    auto y_md =
-        memory::desc(y_dims, phi::funcs::OneDNNGetDataType<YT>(), y_strides);
-    auto out_md = memory::desc(
-        out_ddims, phi::funcs::OneDNNGetDataType<OT>(), out_strides);
-    const dnnl::primitive_attr matmul_attrs = CreateMatmulAttrs(ctx);
-    this->AcquireForwardPrimitiveDescriptor(matmul_attrs, x_md, y_md, out_md);
-  }
-  float ComputeOutputScale(const framework::ExecutionContext& ctx) {
-    float alpha = ctx.HasAttr("alpha") ? ctx.Attr<float>("alpha") : 1.0f;
-    if (ctx.HasAttr("Scale_x") && ctx.HasAttr("Scale_y") &&
-        ctx.HasAttr("Scale_out")) {
-      float scale_x = ctx.Attr<float>("Scale_x");
-      float scale_y = ctx.Attr<float>("Scale_y");
-      bool force_fp32_out = ctx.HasAttr("force_fp32_output")
-                                ? ctx.Attr<bool>("force_fp32_output")
-                                : false;
-      float scale_out = force_fp32_out ? 1.f : ctx.Attr<float>("Scale_out");
-      alpha *= scale_out / (scale_x * scale_y);
-    }
-    return alpha;
-  }
-  dnnl::primitive_attr CreateMatmulAttrs(
-      const framework::ExecutionContext& ctx) {
-    dnnl::primitive_attr matmul_attrs;
-    dnnl::post_ops post_operations;
-    float scale_out = ComputeOutputScale(ctx);
-    if (scale_out != 1.0f) {
-      matmul_attrs.set_output_scales(0, {scale_out});
-    }
-    if (ctx.HasInput("ResidualData")) {
-      auto* residual_data = ctx.Input<phi::DenseTensor>("ResidualData");
-      auto residual_data_tz = phi::vectorize(residual_data->dims());
-      auto residual_data_md = memory::desc(residual_data_tz,
-                                           phi::funcs::OneDNNGetDataType<OT>(),
-                                           dnnl::memory::format_tag::any);
-      post_operations.append_binary(dnnl::algorithm::binary_add,
-                                    residual_data_md);
-      if (ctx.HasAttr("Scale_in_eltwise")) {
-        float sum_scale = scale_out / ctx.Attr<float>("Scale_in_eltwise");
-        post_operations.append_sum(sum_scale);
-      }
-    }
-    AppendActivation(ctx, post_operations);
-    if (ctx.HasAttr("fused_output_scale")) {
-      float scale_alpha = ctx.Attr<float>("fused_output_scale");
-      post_operations.append_eltwise(
-          1.0, dnnl::algorithm::eltwise_linear, scale_alpha, 0.0f);
-    }
-    matmul_attrs.set_post_ops(post_operations);
-    return matmul_attrs;
-  }
-  std::vector<int64_t> FakeTransposeStrides(
-      const std::vector<int64_t>& matmul_out_dims) const {
-    // fuse matmul_v2 + transpose + reshape guarantees that output is 4D and
-    // transpose axis are: {0, 2, 1, 3}
-    std::vector<int64_t> transpose_axis = {0, 2, 1, 3};
-    std::vector<int64_t> fake_strides(transpose_axis.size());
-    int ndims = static_cast<int>(transpose_axis.size());
-    int total_stride = 1;
-    for (int i = ndims - 1; i >= 0; --i) {
-      fake_strides[transpose_axis[i]] = total_stride;
-      total_stride *= matmul_out_dims[transpose_axis[i]];
-    }
-    return fake_strides;
-  }
-  std::shared_ptr<memory> AcquireWeightsMemory(const phi::DenseTensor* input) {
-    const YT* input_data = input->data<YT>();
-    return this->AcquireMemoryFromPrimitive(
-        this->fwd_pd_->weights_desc(),
-        phi::funcs::to_void_cast<YT>(input_data));
-  }
-  std::shared_ptr<dnnl::memory> AcquireDstMemory(phi::DenseTensor* output) {
-    // We cannot use base AcquireDstMemory as it makes an allocation request
-    // base on DST memory primitive size. This is fine in general, but in MatMul
-    // we have primitive that covers only one batch of Data and then shift
-    // pointer for every new batch. Hence phi::DenseTensor size is bigger that
-    // dst memory primitive size. So would we request less memory that is there
-    // and it triggers an assertion.  So as there is no 'any' format here we can
-    // leave default size of phi::DenseTensor as computed in ComputeInferShape
-    OT* ptr = output->mutable_data<OT>(this->place_);
-    return this->AcquireMemoryFromPrimitive(this->fwd_pd_->dst_desc(), ptr);
-  }
-};
-}  // namespace platform
-}  // namespace paddle
--- a/paddle/phi/kernels/onednn/conv_function.h
+++ b/paddle/phi/kernels/onednn/conv_function.h
@@ -178,7 +178,7 @@ void ComputeINT8(const OneDNNContext& dev_ctx,
  const std::string& unique_name =
      dev_ctx.GetInputsName("Input")[0] + dev_ctx.GetInputsName("Filter")[0];
  PD_VISIT_FLOAT_AND_INT8_TYPES(
-      filter->dtype(), "ConvMKLDNNHandlerT", ([&] {
+      filter->dtype(), "ConvOneDNNHandlerT", ([&] {
        onednn::ConvOneDNNHandlerT<T, data_t, T_out> handler(dev_ctx,
                                                             onednn_engine,
                                                             dev_ctx.GetPlace(),

--- a/paddle/phi/kernels/onednn/conv_handler.h
+++ b/paddle/phi/kernels/onednn/conv_handler.h
@@ -40,7 +40,7 @@ class ConvOneDNNHandlerT
                                   dnnl::convolution_backward_weights> {
 public:
  ConvOneDNNHandlerT(const OneDNNContext& dev_ctx,
-                     const dnnl::engine mkldnn_engine,
+                     const dnnl::engine onednn_engine,
                     Place cpu_place,
                     const phi::DenseTensor* input,
                     const phi::DenseTensor* filter,
@@ -63,7 +63,7 @@ class ConvOneDNNHandlerT
                              dnnl::convolution_backward_data,
                              dnnl::convolution_backward_weights>(
            dev_ctx,
-            mkldnn_engine,
+            onednn_engine,
            cpu_place,
            funcs::CreateKey(
                dev_ctx, phi::vectorize(input->dims()), unique_name)) {