[PHI decoupling] move norm_utils.cu.h from fluid to phi and remove norm_utils.h in fluid (#48930)

* move norm_utils.cu.h from fluid to phi * remove norm_utils.h in fluid * fix bugs and replace mutable_data with Alloc * replace mutable_data with Alloc

[PHI decoupling] move norm_utils.cu.h from fluid to phi and remove norm_utils.h in fluid (#48930)
* move norm_utils.cu.h from fluid to phi * remove norm_utils.h in fluid * fix bugs and replace mutable_data with Alloc * replace mutable_data with Alloc
3cb8db8f · huangjiyi · GitHub · 8f87f0c7 · 3cb8db8f · 3cb8db8f
10 changed file
--- a/paddle/fluid/operators/batch_norm_op.cu
+++ b/paddle/fluid/operators/batch_norm_op.cu
@@ -25,8 +25,8 @@ namespace cub = hipcub;
 #endif
 #include "paddle/fluid/framework/data_layout.h"
 #include "paddle/fluid/operators/batch_norm_op.h"
-#include "paddle/fluid/operators/norm_utils.cu.h"
 #include "paddle/fluid/platform/float16.h"
+#include "paddle/phi/backends/gpu/gpu_dnn.h"
 #include "paddle/phi/kernels/funcs/math_function.h"

 DECLARE_bool(cudnn_batchnorm_spatial_persistent);
@@ -36,7 +36,7 @@ namespace operators {

 using DataLayout = phi::DataLayout;
 template <typename T>
-using CudnnDataType = platform::CudnnDataType<T>;
+using CudnnDataType = phi::backends::gpu::CudnnDataType<T>;
 template <typename T>
 using BatchNormParamType = typename CudnnDataType<T>::BatchNormParamType;


--- a/paddle/fluid/operators/batch_norm_op.h
+++ b/paddle/fluid/operators/batch_norm_op.h
@@ -21,8 +21,8 @@ limitations under the License. */
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/layout_utils.h"
-#include "paddle/fluid/operators/norm_utils.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/kernels/funcs/norm_utils.h"

 namespace paddle {
 namespace operators {

--- a/paddle/fluid/operators/fused/fused_bn_activation_op.cu
+++ b/paddle/fluid/operators/fused/fused_bn_activation_op.cu
@@ -21,10 +21,10 @@
 #include "paddle/fluid/framework/data_layout.h"
 #include "paddle/fluid/operators/activation_op.h"
 #include "paddle/fluid/operators/fused/fused_bn_activation_op.h"
-#include "paddle/fluid/operators/norm_utils.h"
 #include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
 #include "paddle/fluid/platform/float16.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/kernels/funcs/norm_utils.h"

 DECLARE_bool(cudnn_batchnorm_spatial_persistent);

@@ -91,7 +91,7 @@ class FusedBatchNormActKernel<phi::GPUContext, T>

    int N, C, H, W, D;
    const DataLayout data_layout = DataLayout::kNHWC;
-    ExtractNCWHD(x_dims, data_layout, &N, &C, &H, &W, &D);
+    phi::funcs::ExtractNCWHD(x_dims, data_layout, &N, &C, &H, &W, &D);

    if ((N * H * W * D) == 1) {
      // Only 1 element in normalization dimension,
@@ -257,7 +257,7 @@ class FusedBatchNormActGradKernel<phi::GPUContext, T>
                          "The Input dim size should be between 2 and 5"));
    int N, C, H, W, D;
    const DataLayout data_layout = DataLayout::kNHWC;
-    ExtractNCWHD(x_dims, data_layout, &N, &C, &H, &W, &D);
+    phi::funcs::ExtractNCWHD(x_dims, data_layout, &N, &C, &H, &W, &D);

    // init output
    auto *d_x = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));

--- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu
+++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu
@@ -20,11 +20,11 @@
 #include "paddle/fluid/framework/data_layout.h"
 #include "paddle/fluid/operators/activation_op.h"
 #include "paddle/fluid/operators/fused/fused_bn_add_activation_op.h"
-#include "paddle/fluid/operators/norm_utils.h"
 #include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
 #include "paddle/fluid/platform/float16.h"
 #include "paddle/phi/common/data_type.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/kernels/funcs/norm_utils.h"

 DECLARE_bool(cudnn_batchnorm_spatial_persistent);

@@ -85,7 +85,7 @@ class FusedBatchNormAddActKernel<phi::GPUContext, T>

    int N, C, H, W, D;
    const DataLayout data_layout = DataLayout::kNHWC;
-    ExtractNCWHD(in_dims, data_layout, &N, &C, &H, &W, &D);
+    phi::funcs::ExtractNCWHD(in_dims, data_layout, &N, &C, &H, &W, &D);

    // ------------------- cudnn descriptors ---------------------
    auto handle = dev_ctx.cudnn_handle();
@@ -231,7 +231,7 @@ class FusedBatchNormAddActGradKernel<phi::GPUContext, T>

    int N, C, H, W, D;
    const DataLayout data_layout = DataLayout::kNHWC;
-    ExtractNCWHD(in_dims, data_layout, &N, &C, &H, &W, &D);
+    phi::funcs::ExtractNCWHD(in_dims, data_layout, &N, &C, &H, &W, &D);

    // init output
    auto *d_x = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));

--- a/paddle/fluid/operators/norm_utils.h
+++ b/paddle/fluid/operators/norm_utils.h
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-#include <memory>
-
-#include "paddle/fluid/framework/op_registry.h"
-
-namespace paddle {
-namespace operators {
-
-using DataLayout = phi::DataLayout;
-
-inline void ExtractNCWHD(const framework::DDim &dims,
-                         const DataLayout &data_layout,
-                         int *N,
-                         int *C,
-                         int *H,
-                         int *W,
-                         int *D) {
-  *N = dims[0];
-  if (dims.size() == 2) {
-    *C = dims[1];
-    *H = 1;
-    *W = 1;
-    *D = 1;
-  } else {
-    *C = data_layout == DataLayout::kNCHW ? dims[1] : dims[dims.size() - 1];
-    *H = data_layout == DataLayout::kNCHW ? dims[2] : dims[1];
-    *W = dims.size() > 3
-             ? (data_layout == DataLayout::kNCHW ? dims[3] : dims[2])
-             : 1;
-    *D = dims.size() > 4
-             ? (data_layout == DataLayout::kNCHW ? dims[4] : dims[3])
-             : 1;
-  }
-}
-
-}  // namespace operators
-}  // namespace paddle
--- a/paddle/fluid/operators/sync_batch_norm_op_mlu.cc
+++ b/paddle/fluid/operators/sync_batch_norm_op_mlu.cc
@@ -72,7 +72,7 @@ class SyncBatchNormMLUKernel : public framework::OpKernel<T> {
                          "The Input dim size should be less than 6."));

    int N, C, H, W, D;
-    ExtractNCWHD(x_dims, layout, &N, &C, &H, &W, &D);
+    phi::funcs::ExtractNCWHD(x_dims, layout, &N, &C, &H, &W, &D);

    y->mutable_data<T>(ctx.GetPlace());
    mean_out->mutable_data<MPDType>(ctx.GetPlace());
@@ -320,7 +320,7 @@ class SyncBatchNormMLUGradKernel : public framework::OpKernel<T> {
                          "The Input X dim size should be less than 6."));

    int N, C, H, W, D;
-    ExtractNCWHD(x_dims, layout, &N, &C, &H, &W, &D);
+    phi::funcs::ExtractNCWHD(x_dims, layout, &N, &C, &H, &W, &D);
    PADDLE_ENFORCE_EQ(scale->dims()[0],
                      C,
                      platform::errors::InvalidArgument(

--- a/paddle/fluid/operators/sync_batch_norm_op_npu.cc
+++ b/paddle/fluid/operators/sync_batch_norm_op_npu.cc
@@ -344,7 +344,7 @@ class SyncBatchNormNPUKernel : public framework::OpKernel<T> {
                          x_dims.size()));

    int N, C, H, W, D;
-    ExtractNCWHD(x_dims, layout, &N, &C, &H, &W, &D);
+    phi::funcs::ExtractNCWHD(x_dims, layout, &N, &C, &H, &W, &D);

    int x_numel = x->numel();
    auto place = ctx.GetPlace();
@@ -598,7 +598,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel<T> {
    }

    int N, C, H, W, D;
-    ExtractNCWHD(x->dims(), layout, &N, &C, &H, &W, &D);
+    phi::funcs::ExtractNCWHD(x->dims(), layout, &N, &C, &H, &W, &D);

    int x_numel = x->numel();
    auto place = ctx.GetPlace();

--- a/paddle/fluid/operators/norm_utils.cu.h
+++ b/paddle/fluid/operators/norm_utils.cu.h
@@ -24,8 +24,7 @@ limitations under the License. */
 #include <hipcub/hipcub.hpp>
 namespace cub = hipcub;
 #endif
-#include "paddle/fluid/framework/data_layout.h"
-#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
+#include "paddle/phi/common/layout.h"
 #include "paddle/phi/kernels/funcs/math_function.h"

 #ifdef __HIPCC__
@@ -34,8 +33,8 @@ namespace cub = hipcub;
 #define LAUNCH_BOUNDS(BlockDim)
 #endif

-namespace paddle {
-namespace operators {
+namespace phi {
+namespace funcs {

 using DataLayout = phi::DataLayout;

@@ -464,7 +463,8 @@ void NormDoubleGradFunctor(const DeviceContext &ctx,
  const int sample_size = num / N / C;
  phi::DenseTensor scale_tmp;
  if (!Scale) {
-    scale_tmp.mutable_data<T>({C}, ctx.GetPlace());
+    scale_tmp.Resize({C});
+    ctx.template Alloc<T>(&scale_tmp);
    set_constant(ctx, &scale_tmp, static_cast<T>(1));
  }
  const T *scale_data = Scale ? Scale->data<T>() : scale_tmp.data<T>();
@@ -495,7 +495,7 @@ void NormDoubleGradFunctor(const DeviceContext &ctx,
  }

  if (dX) {
-    T *dx_data = dX->mutable_data<T>(ctx.GetPlace());
+    T *dx_data = ctx.template Alloc<T>(dX);
    set_constant(ctx, dX, static_cast<T>(0));
    if (use_global_stats) {
      if (data_layout == DataLayout::kNHWC) {
@@ -552,7 +552,7 @@ void NormDoubleGradFunctor(const DeviceContext &ctx,
    }
  }
  if (dScale) {
-    T *dscale_data = dScale->mutable_data<T>(ctx.GetPlace());
+    T *dscale_data = ctx.template Alloc<T>(dScale);
    set_constant(ctx, dScale, static_cast<T>(0));
    if (use_global_stats) {
      if (data_layout == DataLayout::kNHWC) {
@@ -605,7 +605,7 @@ void NormDoubleGradFunctor(const DeviceContext &ctx,
    }
  }
  if (ddY) {
-    T *ddy_data = ddY->mutable_data<T>(ctx.GetPlace());
+    T *ddy_data = ctx.template Alloc<T>(ddY);
    set_constant(ctx, ddY, static_cast<T>(0));
    if (use_global_stats) {
      if (data_layout == DataLayout::kNHWC) {
@@ -670,5 +670,5 @@ void NormDoubleGradFunctor(const DeviceContext &ctx,
    }
  }
 }
-}  // namespace operators
-}  // namespace paddle
+}  // namespace funcs
+}  // namespace phi
--- a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
@@ -13,7 +13,6 @@
 // limitations under the License.

 #include "paddle/fluid/operators/layout_utils.h"
-#include "paddle/fluid/operators/norm_utils.cu.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/backends/gpu/gpu_dnn.h"
 #include "paddle/phi/common/layout.h"
@@ -24,6 +23,7 @@
 #include "paddle/phi/kernels/empty_kernel.h"
 #include "paddle/phi/kernels/funcs/batch_norm_utils.h"
 #include "paddle/phi/kernels/funcs/eigen/common.h"
+#include "paddle/phi/kernels/funcs/norm_utils.cu.h"
 #include "paddle/phi/kernels/funcs/norm_utils.h"
 #include "paddle/phi/kernels/funcs/reduce_function.h"

@@ -1352,8 +1352,7 @@ void BatchNormDoubleGradKernel(
    running_mean = mean.get_ptr();
    running_variance = variance.get_ptr();
  }
-  paddle::operators::NormDoubleGradFunctor<Context, T>(
-      ctx,
+  phi::funcs::NormDoubleGradFunctor<Context, T>(ctx,
                                                data_layout,
                                                &x,
                                                &scale,

--- a/paddle/phi/kernels/gpu/batch_norm_kernel.cu
+++ b/paddle/phi/kernels/gpu/batch_norm_kernel.cu
@@ -21,7 +21,6 @@ namespace cub = hipcub;
 #endif

 #include "paddle/fluid/operators/layout_utils.h"
-#include "paddle/fluid/operators/norm_utils.cu.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/backends/gpu/gpu_dnn.h"
 #include "paddle/phi/common/layout.h"