From 90650534144286b6b1daf1da29902bacd230d04e Mon Sep 17 00:00:00 2001
From: Huang Jiyi <43315610+huangjiyi@users.noreply.github.com>
Date: Thu, 9 Feb 2023 15:36:58 +0800
Subject: [PATCH] remove layout_utils in phi (#50355)

---
 .../phi/kernels/gpu/batch_norm_grad_kernel.cu | 60 +++++++++----------
 paddle/phi/kernels/gpu/batch_norm_kernel.cu   | 54 ++++++++---------
 2 files changed, 51 insertions(+), 63 deletions(-)
diff --git a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
index 5e73edcb34..3b09890e22 100644
--- a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/fluid/operators/layout_utils.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/backends/gpu/gpu_dnn.h"
 #include "paddle/phi/common/layout.h"
@@ -630,7 +629,7 @@ void BatchNormGradRawKernel(const Context &ctx,
   if (!use_global_stats) {
     if ((N * H * W * D) == 1) {
       if (d_x) {
-        paddle::framework::TensorCopy(*d_y, ctx.GetPlace(), d_x);
+        phi::Copy(ctx, *d_y, ctx.GetPlace(), false, d_x);
       }
       phi::funcs::SetConstant<Context, BatchNormParamType<T>> functor;
       functor(ctx, d_scale, static_cast<BatchNormParamType<T>>(0));
@@ -655,10 +654,9 @@ void BatchNormGradRawKernel(const Context &ctx,
     cudnnBatchNormMode_t mode_;
 
     PADDLE_ENFORCE_GPU_SUCCESS(
-        paddle::platform::dynload::cudnnCreateTensorDescriptor(&data_desc_));
+        phi::dynload::cudnnCreateTensorDescriptor(&data_desc_));
     PADDLE_ENFORCE_GPU_SUCCESS(
-        paddle::platform::dynload::cudnnCreateTensorDescriptor(
-            &bn_param_desc_));
+        phi::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_));
 #endif
     if (epsilon <= CUDNN_BN_MIN_EPSILON - FLT_EPSILON) {
       LOG(ERROR) << "Provided epsilon is smaller than "
@@ -695,16 +693,14 @@ void BatchNormGradRawKernel(const Context &ctx,
 //     platform::dynload::miopenDeriveBNTensorDescriptor(bn_param_desc_,
 //                                                       data_desc_, mode_));
 #else
-    PADDLE_ENFORCE_GPU_SUCCESS(
-        paddle::platform::dynload::cudnnSetTensorNdDescriptor(
-            data_desc_,
-            CudnnDataType<T>::type,
-            x_dims.size() > 3 ? x_dims.size() : 4,
-            dims.data(),
-            strides.data()));
-    PADDLE_ENFORCE_GPU_SUCCESS(
-        paddle::platform::dynload::cudnnDeriveBNTensorDescriptor(
-            bn_param_desc_, data_desc_, mode_));
+    PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor(
+        data_desc_,
+        CudnnDataType<T>::type,
+        x_dims.size() > 3 ? x_dims.size() : 4,
+        dims.data(),
+        strides.data()));
+    PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnDeriveBNTensorDescriptor(
+        bn_param_desc_, data_desc_, mode_));
 #endif
 
     const auto *saved_mean_data =
@@ -934,26 +930,25 @@ void BatchNormGradRawKernel(const Context &ctx,
         auto reserve_space_size = reserve_space->memory_size();
         // --------------- cudnn batchnorm workspace ---------------
         PADDLE_ENFORCE_GPU_SUCCESS(
-            paddle::platform::dynload::
-                cudnnGetBatchNormalizationBackwardExWorkspaceSize(
-                    /*handle=*/ctx.cudnn_handle(),
-                    /*mode=*/mode_,
-                    /*bnIps=*/CUDNN_BATCHNORM_OPS_BN,
-                    /*xDesc=*/data_desc_,
-                    /*yDesc=*/data_desc_,
-                    /*dyDesc=*/data_desc_,
-                    /*dzDesc=*/nullptr,
-                    /*dxDesc=*/data_desc_,
-                    /*bnScaleBiasMeanVarDesc=*/bn_param_desc_,
-                    /*activationDesc=*/nullptr,
-                    /*sizeInBytes=*/&workspace_size));
+            phi::dynload::cudnnGetBatchNormalizationBackwardExWorkspaceSize(
+                /*handle=*/ctx.cudnn_handle(),
+                /*mode=*/mode_,
+                /*bnIps=*/CUDNN_BATCHNORM_OPS_BN,
+                /*xDesc=*/data_desc_,
+                /*yDesc=*/data_desc_,
+                /*dyDesc=*/data_desc_,
+                /*dzDesc=*/nullptr,
+                /*dxDesc=*/data_desc_,
+                /*bnScaleBiasMeanVarDesc=*/bn_param_desc_,
+                /*activationDesc=*/nullptr,
+                /*sizeInBytes=*/&workspace_size));
 
         workspace_tensor.Resize({static_cast<int64_t>(workspace_size)});
         workspace_ptr =
             static_cast<void *>(ctx.template Alloc<uint8_t>(&workspace_tensor));
 
         PADDLE_ENFORCE_GPU_SUCCESS(
-            paddle::platform::dynload::cudnnBatchNormalizationBackwardEx(
+            phi::dynload::cudnnBatchNormalizationBackwardEx(
                 /*handle=*/ctx.cudnn_handle(),
                 /*mode=*/mode_,
                 /*bnOps=*/CUDNN_BATCHNORM_OPS_BN,
@@ -989,7 +984,7 @@ void BatchNormGradRawKernel(const Context &ctx,
                 /*reserveSpaceSizeInBytes=*/reserve_space_size));
 #else
         PADDLE_ENFORCE_GPU_SUCCESS(
-            paddle::platform::dynload::cudnnBatchNormalizationBackward(
+            phi::dynload::cudnnBatchNormalizationBackward(
                 ctx.cudnn_handle(),
                 mode_,
                 CudnnDataType<T>::kOne(),
@@ -1089,10 +1084,9 @@ void BatchNormGradRawKernel(const Context &ctx,
 #else
     // clean when exit.
     PADDLE_ENFORCE_GPU_SUCCESS(
-        paddle::platform::dynload::cudnnDestroyTensorDescriptor(data_desc_));
+        phi::dynload::cudnnDestroyTensorDescriptor(data_desc_));
     PADDLE_ENFORCE_GPU_SUCCESS(
-        paddle::platform::dynload::cudnnDestroyTensorDescriptor(
-            bn_param_desc_));
+        phi::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_));
 #endif
 
   } else {
diff --git a/paddle/phi/kernels/gpu/batch_norm_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_kernel.cu
index fc460574b7..e2d716560d 100644
--- a/paddle/phi/kernels/gpu/batch_norm_kernel.cu
+++ b/paddle/phi/kernels/gpu/batch_norm_kernel.cu
@@ -20,7 +20,6 @@
 namespace cub = hipcub;
 #endif
 
-#include "paddle/fluid/operators/layout_utils.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/backends/gpu/gpu_dnn.h"
 #include "paddle/phi/common/layout.h"
@@ -598,9 +597,9 @@ void BatchNormKernel(const Context &ctx,
   cudnnBatchNormMode_t mode_;
 
   PADDLE_ENFORCE_GPU_SUCCESS(
-      paddle::platform::dynload::cudnnCreateTensorDescriptor(&data_desc_));
+      phi::dynload::cudnnCreateTensorDescriptor(&data_desc_));
   PADDLE_ENFORCE_GPU_SUCCESS(
-      paddle::platform::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_));
+      phi::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_));
 #endif
 
   if (epsilon <= CUDNN_BN_MIN_EPSILON - FLT_EPSILON) {
@@ -651,19 +650,15 @@ void BatchNormKernel(const Context &ctx,
 //     platform::dynload::miopenDeriveBNTensorDescriptor(
 //         bn_param_desc_, data_desc_, test_mode ? miopenBNSpatial : mode_));
 #else
-  PADDLE_ENFORCE_GPU_SUCCESS(
-      paddle::platform::dynload::cudnnSetTensorNdDescriptor(
-          data_desc_,
-          CudnnDataType<T>::type,
-          x_dims.size() > 3 ? x_dims.size() : 4,
-          dims.data(),
-          strides.data()));
+  PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor(
+      data_desc_,
+      CudnnDataType<T>::type,
+      x_dims.size() > 3 ? x_dims.size() : 4,
+      dims.data(),
+      strides.data()));
   // Note: PERSISTENT not implemented for inference
-  PADDLE_ENFORCE_GPU_SUCCESS(
-      paddle::platform::dynload::cudnnDeriveBNTensorDescriptor(
-          bn_param_desc_,
-          data_desc_,
-          test_mode ? CUDNN_BATCHNORM_SPATIAL : mode_));
+  PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnDeriveBNTensorDescriptor(
+      bn_param_desc_, data_desc_, test_mode ? CUDNN_BATCHNORM_SPATIAL : mode_));
 #endif
 
   auto handle = ctx.cudnn_handle();
@@ -830,7 +825,7 @@ void BatchNormKernel(const Context &ctx,
       }
     } else {
       PADDLE_ENFORCE_GPU_SUCCESS(
-          paddle::platform::dynload::cudnnBatchNormalizationForwardInference(
+          phi::dynload::cudnnBatchNormalizationForwardInference(
               handle,
               // Note: PERSISTENT not implemented for inference
               CUDNN_BATCHNORM_SPATIAL,
@@ -873,7 +868,7 @@ void BatchNormKernel(const Context &ctx,
     if ((N * H * W * D) == 1) {
       // Only 1 element in normalization dimension,
       // skip the batch norm calculation, let y = x.
-      paddle::framework::TensorCopy(x, ctx.GetPlace(), y);
+      phi::Copy(ctx, x, ctx.GetPlace(), false, y);
     } else {
       double this_factor = 1. - momentum;
 #ifdef PADDLE_WITH_HIP
@@ -1114,7 +1109,7 @@ void BatchNormKernel(const Context &ctx,
                 "The argument ReserveSpace of batch_norm op is not found."));
         // --------------- cudnn batchnorm workspace ---------------
         PADDLE_ENFORCE_GPU_SUCCESS(
-            paddle::platform::dynload::
+            phi::dynload::
                 cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize(
                     /*handle=*/handle,
                     /*mode=*/mode_,
@@ -1128,14 +1123,13 @@ void BatchNormKernel(const Context &ctx,
 
         // -------------- cudnn batchnorm reserve space --------------
         PADDLE_ENFORCE_GPU_SUCCESS(
-            paddle::platform::dynload::
-                cudnnGetBatchNormalizationTrainingExReserveSpaceSize(
-                    /*handle=*/handle,
-                    /*mode=*/mode_,
-                    /*bnOps=*/CUDNN_BATCHNORM_OPS_BN,
-                    /*activationDesc=*/nullptr,
-                    /*xDesc=*/data_desc_,
-                    /*sizeInBytes=*/&reserve_space_size));
+            phi::dynload::cudnnGetBatchNormalizationTrainingExReserveSpaceSize(
+                /*handle=*/handle,
+                /*mode=*/mode_,
+                /*bnOps=*/CUDNN_BATCHNORM_OPS_BN,
+                /*activationDesc=*/nullptr,
+                /*xDesc=*/data_desc_,
+                /*sizeInBytes=*/&reserve_space_size));
 
         reserve_space->Resize({static_cast<int64_t>(reserve_space_size)});
         reserve_space_ptr =
@@ -1144,7 +1138,7 @@ void BatchNormKernel(const Context &ctx,
         workspace_ptr =
             static_cast<void *>(ctx.template Alloc<uint8_t>(&workspace_tensor));
         PADDLE_ENFORCE_GPU_SUCCESS(
-            paddle::platform::dynload::cudnnBatchNormalizationForwardTrainingEx(
+            phi::dynload::cudnnBatchNormalizationForwardTrainingEx(
                 handle,
                 mode_,
                 CUDNN_BATCHNORM_OPS_BN,
@@ -1172,7 +1166,7 @@ void BatchNormKernel(const Context &ctx,
                 reserve_space_size));
 #else
         PADDLE_ENFORCE_GPU_SUCCESS(
-            paddle::platform::dynload::cudnnBatchNormalizationForwardTraining(
+            phi::dynload::cudnnBatchNormalizationForwardTraining(
                 handle,
                 mode_,
                 CudnnDataType<T>::kOne(),
@@ -1211,9 +1205,9 @@ void BatchNormKernel(const Context &ctx,
 #else
   // clean when exit.
   PADDLE_ENFORCE_GPU_SUCCESS(
-      paddle::platform::dynload::cudnnDestroyTensorDescriptor(data_desc_));
+      phi::dynload::cudnnDestroyTensorDescriptor(data_desc_));
   PADDLE_ENFORCE_GPU_SUCCESS(
-      paddle::platform::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_));
+      phi::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_));
 #endif
 }
 
-- 
GitLab