diff --git a/paddle/fluid/operators/batch_norm_op.cu b/paddle/fluid/operators/batch_norm_op.cu
index e643efcb8b9f565877bf9ef6321b8243c18985c0..f93cb32a850efb04ac195997dc69751ec643ca0e 100644
--- a/paddle/fluid/operators/batch_norm_op.cu
+++ b/paddle/fluid/operators/batch_norm_op.cu
@@ -25,8 +25,8 @@ namespace cub = hipcub;
 #endif
 #include "paddle/fluid/framework/data_layout.h"
 #include "paddle/fluid/operators/batch_norm_op.h"
-#include "paddle/fluid/operators/norm_utils.cu.h"
 #include "paddle/fluid/platform/float16.h"
+#include "paddle/phi/backends/gpu/gpu_dnn.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
 DECLARE_bool(cudnn_batchnorm_spatial_persistent);
@@ -36,7 +36,7 @@ namespace operators {
 
 using DataLayout = phi::DataLayout;
 template <typename T>
-using CudnnDataType = platform::CudnnDataType<T>;
+using CudnnDataType = phi::backends::gpu::CudnnDataType<T>;
 template <typename T>
 using BatchNormParamType = typename CudnnDataType<T>::BatchNormParamType;
 
diff --git a/paddle/fluid/operators/batch_norm_op.h b/paddle/fluid/operators/batch_norm_op.h
index 40cdb68329fb279d49c46c1085e1f25a62fa3011..0e579010a91d796831856128c971a05bd6af04c7 100644
--- a/paddle/fluid/operators/batch_norm_op.h
+++ b/paddle/fluid/operators/batch_norm_op.h
@@ -21,8 +21,8 @@ limitations under the License. */
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/layout_utils.h"
-#include "paddle/fluid/operators/norm_utils.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/kernels/funcs/norm_utils.h"
 
 namespace paddle {
 namespace operators {
diff --git a/paddle/fluid/operators/fused/fused_bn_activation_op.cu b/paddle/fluid/operators/fused/fused_bn_activation_op.cu
index 4023aaa8445f95ea07809900c97a9f6d0172e7fc..35d1b45408b1f435d2952c32b5b7ba1080849a7c 100644
--- a/paddle/fluid/operators/fused/fused_bn_activation_op.cu
+++ b/paddle/fluid/operators/fused/fused_bn_activation_op.cu
@@ -21,10 +21,10 @@
 #include "paddle/fluid/framework/data_layout.h"
 #include "paddle/fluid/operators/activation_op.h"
 #include "paddle/fluid/operators/fused/fused_bn_activation_op.h"
-#include "paddle/fluid/operators/norm_utils.h"
 #include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
 #include "paddle/fluid/platform/float16.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/kernels/funcs/norm_utils.h"
 
 DECLARE_bool(cudnn_batchnorm_spatial_persistent);
 
@@ -91,7 +91,7 @@ class FusedBatchNormActKernel<phi::GPUContext, T>
 
     int N, C, H, W, D;
     const DataLayout data_layout = DataLayout::kNHWC;
-    ExtractNCWHD(x_dims, data_layout, &N, &C, &H, &W, &D);
+    phi::funcs::ExtractNCWHD(x_dims, data_layout, &N, &C, &H, &W, &D);
 
     if ((N * H * W * D) == 1) {
       // Only 1 element in normalization dimension,
@@ -257,7 +257,7 @@ class FusedBatchNormActGradKernel<phi::GPUContext, T>
                           "The Input dim size should be between 2 and 5"));
     int N, C, H, W, D;
     const DataLayout data_layout = DataLayout::kNHWC;
-    ExtractNCWHD(x_dims, data_layout, &N, &C, &H, &W, &D);
+    phi::funcs::ExtractNCWHD(x_dims, data_layout, &N, &C, &H, &W, &D);
 
     // init output
     auto *d_x = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu
index 4c4756b8e19792885475c3226ae49bf91ee54d23..4d92a7865eb2c937e94df17c8eb50b18f684ceb5 100644
--- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu
+++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu
@@ -20,11 +20,11 @@
 #include "paddle/fluid/framework/data_layout.h"
 #include "paddle/fluid/operators/activation_op.h"
 #include "paddle/fluid/operators/fused/fused_bn_add_activation_op.h"
-#include "paddle/fluid/operators/norm_utils.h"
 #include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
 #include "paddle/fluid/platform/float16.h"
 #include "paddle/phi/common/data_type.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/kernels/funcs/norm_utils.h"
 
 DECLARE_bool(cudnn_batchnorm_spatial_persistent);
 
@@ -85,7 +85,7 @@ class FusedBatchNormAddActKernel<phi::GPUContext, T>
 
     int N, C, H, W, D;
     const DataLayout data_layout = DataLayout::kNHWC;
-    ExtractNCWHD(in_dims, data_layout, &N, &C, &H, &W, &D);
+    phi::funcs::ExtractNCWHD(in_dims, data_layout, &N, &C, &H, &W, &D);
 
     // ------------------- cudnn descriptors ---------------------
     auto handle = dev_ctx.cudnn_handle();
@@ -231,7 +231,7 @@ class FusedBatchNormAddActGradKernel<phi::GPUContext, T>
 
     int N, C, H, W, D;
     const DataLayout data_layout = DataLayout::kNHWC;
-    ExtractNCWHD(in_dims, data_layout, &N, &C, &H, &W, &D);
+    phi::funcs::ExtractNCWHD(in_dims, data_layout, &N, &C, &H, &W, &D);
 
     // init output
     auto *d_x = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
diff --git a/paddle/fluid/operators/norm_utils.h b/paddle/fluid/operators/norm_utils.h
deleted file mode 100644
index edaf19f68f1b38e20be51d37bdbb02d2f6d08454..0000000000000000000000000000000000000000
--- a/paddle/fluid/operators/norm_utils.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-#include <memory>
-
-#include "paddle/fluid/framework/op_registry.h"
-
-namespace paddle {
-namespace operators {
-
-using DataLayout = phi::DataLayout;
-
-inline void ExtractNCWHD(const framework::DDim &dims,
-                         const DataLayout &data_layout,
-                         int *N,
-                         int *C,
-                         int *H,
-                         int *W,
-                         int *D) {
-  *N = dims[0];
-  if (dims.size() == 2) {
-    *C = dims[1];
-    *H = 1;
-    *W = 1;
-    *D = 1;
-  } else {
-    *C = data_layout == DataLayout::kNCHW ? dims[1] : dims[dims.size() - 1];
-    *H = data_layout == DataLayout::kNCHW ? dims[2] : dims[1];
-    *W = dims.size() > 3
-             ? (data_layout == DataLayout::kNCHW ? dims[3] : dims[2])
-             : 1;
-    *D = dims.size() > 4
-             ? (data_layout == DataLayout::kNCHW ? dims[4] : dims[3])
-             : 1;
-  }
-}
-
-}  // namespace operators
-}  // namespace paddle
diff --git a/paddle/fluid/operators/sync_batch_norm_op_mlu.cc b/paddle/fluid/operators/sync_batch_norm_op_mlu.cc
index 2d037a7c3ecc1a03ec4f7fd696338355c502f455..6d9e161806d8201245d858e9e064eb0689708256 100644
--- a/paddle/fluid/operators/sync_batch_norm_op_mlu.cc
+++ b/paddle/fluid/operators/sync_batch_norm_op_mlu.cc
@@ -72,7 +72,7 @@ class SyncBatchNormMLUKernel : public framework::OpKernel<T> {
                           "The Input dim size should be less than 6."));
 
     int N, C, H, W, D;
-    ExtractNCWHD(x_dims, layout, &N, &C, &H, &W, &D);
+    phi::funcs::ExtractNCWHD(x_dims, layout, &N, &C, &H, &W, &D);
 
     y->mutable_data<T>(ctx.GetPlace());
     mean_out->mutable_data<MPDType>(ctx.GetPlace());
@@ -320,7 +320,7 @@ class SyncBatchNormMLUGradKernel : public framework::OpKernel<T> {
                           "The Input X dim size should be less than 6."));
 
     int N, C, H, W, D;
-    ExtractNCWHD(x_dims, layout, &N, &C, &H, &W, &D);
+    phi::funcs::ExtractNCWHD(x_dims, layout, &N, &C, &H, &W, &D);
     PADDLE_ENFORCE_EQ(scale->dims()[0],
                       C,
                       platform::errors::InvalidArgument(
diff --git a/paddle/fluid/operators/sync_batch_norm_op_npu.cc b/paddle/fluid/operators/sync_batch_norm_op_npu.cc
index 46b1ccc140ddb86d4008b1072d458cfd928fac03..b25ca5b3823cef3715135790a0c8759fa11a4af2 100644
--- a/paddle/fluid/operators/sync_batch_norm_op_npu.cc
+++ b/paddle/fluid/operators/sync_batch_norm_op_npu.cc
@@ -344,7 +344,7 @@ class SyncBatchNormNPUKernel : public framework::OpKernel<T> {
                           x_dims.size()));
 
     int N, C, H, W, D;
-    ExtractNCWHD(x_dims, layout, &N, &C, &H, &W, &D);
+    phi::funcs::ExtractNCWHD(x_dims, layout, &N, &C, &H, &W, &D);
 
     int x_numel = x->numel();
     auto place = ctx.GetPlace();
@@ -598,7 +598,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel<T> {
     }
 
     int N, C, H, W, D;
-    ExtractNCWHD(x->dims(), layout, &N, &C, &H, &W, &D);
+    phi::funcs::ExtractNCWHD(x->dims(), layout, &N, &C, &H, &W, &D);
 
     int x_numel = x->numel();
     auto place = ctx.GetPlace();
diff --git a/paddle/fluid/operators/norm_utils.cu.h b/paddle/phi/kernels/funcs/norm_utils.cu.h
similarity index 98%
rename from paddle/fluid/operators/norm_utils.cu.h
rename to paddle/phi/kernels/funcs/norm_utils.cu.h
index 2412913995b95cab225cfbdb845a7abe54784099..0971db10529a9654e52acf658d7a2589675d2b59 100644
--- a/paddle/fluid/operators/norm_utils.cu.h
+++ b/paddle/phi/kernels/funcs/norm_utils.cu.h
@@ -24,8 +24,7 @@ limitations under the License. */
 #include <hipcub/hipcub.hpp>
 namespace cub = hipcub;
 #endif
-#include "paddle/fluid/framework/data_layout.h"
-#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
+#include "paddle/phi/common/layout.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
 #ifdef __HIPCC__
@@ -34,8 +33,8 @@ namespace cub = hipcub;
 #define LAUNCH_BOUNDS(BlockDim)
 #endif
 
-namespace paddle {
-namespace operators {
+namespace phi {
+namespace funcs {
 
 using DataLayout = phi::DataLayout;
 
@@ -464,7 +463,8 @@ void NormDoubleGradFunctor(const DeviceContext &ctx,
   const int sample_size = num / N / C;
   phi::DenseTensor scale_tmp;
   if (!Scale) {
-    scale_tmp.mutable_data<T>({C}, ctx.GetPlace());
+    scale_tmp.Resize({C});
+    ctx.template Alloc<T>(&scale_tmp);
     set_constant(ctx, &scale_tmp, static_cast<T>(1));
   }
   const T *scale_data = Scale ? Scale->data<T>() : scale_tmp.data<T>();
@@ -495,7 +495,7 @@ void NormDoubleGradFunctor(const DeviceContext &ctx,
   }
 
   if (dX) {
-    T *dx_data = dX->mutable_data<T>(ctx.GetPlace());
+    T *dx_data = ctx.template Alloc<T>(dX);
     set_constant(ctx, dX, static_cast<T>(0));
     if (use_global_stats) {
       if (data_layout == DataLayout::kNHWC) {
@@ -552,7 +552,7 @@ void NormDoubleGradFunctor(const DeviceContext &ctx,
     }
   }
   if (dScale) {
-    T *dscale_data = dScale->mutable_data<T>(ctx.GetPlace());
+    T *dscale_data = ctx.template Alloc<T>(dScale);
     set_constant(ctx, dScale, static_cast<T>(0));
     if (use_global_stats) {
       if (data_layout == DataLayout::kNHWC) {
@@ -605,7 +605,7 @@ void NormDoubleGradFunctor(const DeviceContext &ctx,
     }
   }
   if (ddY) {
-    T *ddy_data = ddY->mutable_data<T>(ctx.GetPlace());
+    T *ddy_data = ctx.template Alloc<T>(ddY);
     set_constant(ctx, ddY, static_cast<T>(0));
     if (use_global_stats) {
       if (data_layout == DataLayout::kNHWC) {
@@ -670,5 +670,5 @@ void NormDoubleGradFunctor(const DeviceContext &ctx,
     }
   }
 }
-}  // namespace operators
-}  // namespace paddle
+}  // namespace funcs
+}  // namespace phi
diff --git a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
index cfad86506c9099d08b92471edf27cf437ba85dd9..1c6d1debbabd9333b4d2c2ff6f7ebc60b9ec914c 100644
--- a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
@@ -13,7 +13,6 @@
 // limitations under the License.
 
 #include "paddle/fluid/operators/layout_utils.h"
-#include "paddle/fluid/operators/norm_utils.cu.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/backends/gpu/gpu_dnn.h"
 #include "paddle/phi/common/layout.h"
@@ -24,6 +23,7 @@
 #include "paddle/phi/kernels/empty_kernel.h"
 #include "paddle/phi/kernels/funcs/batch_norm_utils.h"
 #include "paddle/phi/kernels/funcs/eigen/common.h"
+#include "paddle/phi/kernels/funcs/norm_utils.cu.h"
 #include "paddle/phi/kernels/funcs/norm_utils.h"
 #include "paddle/phi/kernels/funcs/reduce_function.h"
 
@@ -1352,24 +1352,23 @@ void BatchNormDoubleGradKernel(
     running_mean = mean.get_ptr();
     running_variance = variance.get_ptr();
   }
-  paddle::operators::NormDoubleGradFunctor<Context, T>(
-      ctx,
-      data_layout,
-      &x,
-      &scale,
-      &y_grad,
-      &saved_mean,
-      &saved_variance,
-      running_mean,
-      running_variance,
-      epsilon,
-      use_global_stats,
-      x_grad_grad.get_ptr(),
-      scale_grad_grad.get_ptr(),
-      bias_grad_grad.get_ptr(),
-      x_grad,
-      scale_grad,
-      y_grad_grad);
+  phi::funcs::NormDoubleGradFunctor<Context, T>(ctx,
+                                                data_layout,
+                                                &x,
+                                                &scale,
+                                                &y_grad,
+                                                &saved_mean,
+                                                &saved_variance,
+                                                running_mean,
+                                                running_variance,
+                                                epsilon,
+                                                use_global_stats,
+                                                x_grad_grad.get_ptr(),
+                                                scale_grad_grad.get_ptr(),
+                                                bias_grad_grad.get_ptr(),
+                                                x_grad,
+                                                scale_grad,
+                                                y_grad_grad);
 }
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/gpu/batch_norm_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_kernel.cu
index d01397c1fa0665a2059ed0e11a1c306acddfb98e..01e4f08c29bdd5669c5d7094c90f551ce18368a4 100644
--- a/paddle/phi/kernels/gpu/batch_norm_kernel.cu
+++ b/paddle/phi/kernels/gpu/batch_norm_kernel.cu
@@ -21,7 +21,6 @@ namespace cub = hipcub;
 #endif
 
 #include "paddle/fluid/operators/layout_utils.h"
-#include "paddle/fluid/operators/norm_utils.cu.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/backends/gpu/gpu_dnn.h"
 #include "paddle/phi/common/layout.h"