From 90650534144286b6b1daf1da29902bacd230d04e Mon Sep 17 00:00:00 2001 From: Huang Jiyi <43315610+huangjiyi@users.noreply.github.com> Date: Thu, 9 Feb 2023 15:36:58 +0800 Subject: [PATCH] remove layout_utils in phi (#50355) --- .../phi/kernels/gpu/batch_norm_grad_kernel.cu | 60 +++++++++---------- paddle/phi/kernels/gpu/batch_norm_kernel.cu | 54 ++++++++--------- 2 files changed, 51 insertions(+), 63 deletions(-) diff --git a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu index 5e73edcb34..3b09890e22 100644 --- a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/operators/layout_utils.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/common/layout.h" @@ -630,7 +629,7 @@ void BatchNormGradRawKernel(const Context &ctx, if (!use_global_stats) { if ((N * H * W * D) == 1) { if (d_x) { - paddle::framework::TensorCopy(*d_y, ctx.GetPlace(), d_x); + phi::Copy(ctx, *d_y, ctx.GetPlace(), false, d_x); } phi::funcs::SetConstant> functor; functor(ctx, d_scale, static_cast>(0)); @@ -655,10 +654,9 @@ void BatchNormGradRawKernel(const Context &ctx, cudnnBatchNormMode_t mode_; PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload::cudnnCreateTensorDescriptor(&data_desc_)); + phi::dynload::cudnnCreateTensorDescriptor(&data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload::cudnnCreateTensorDescriptor( - &bn_param_desc_)); + phi::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_)); #endif if (epsilon <= CUDNN_BN_MIN_EPSILON - FLT_EPSILON) { LOG(ERROR) << "Provided epsilon is smaller than " @@ -695,16 +693,14 @@ void BatchNormGradRawKernel(const Context &ctx, // platform::dynload::miopenDeriveBNTensorDescriptor(bn_param_desc_, // data_desc_, mode_)); #else - PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload::cudnnSetTensorNdDescriptor( - data_desc_, - CudnnDataType::type, - x_dims.size() > 3 ? x_dims.size() : 4, - dims.data(), - strides.data())); - PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload::cudnnDeriveBNTensorDescriptor( - bn_param_desc_, data_desc_, mode_)); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor( + data_desc_, + CudnnDataType::type, + x_dims.size() > 3 ? x_dims.size() : 4, + dims.data(), + strides.data())); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnDeriveBNTensorDescriptor( + bn_param_desc_, data_desc_, mode_)); #endif const auto *saved_mean_data = @@ -934,26 +930,25 @@ void BatchNormGradRawKernel(const Context &ctx, auto reserve_space_size = reserve_space->memory_size(); // --------------- cudnn batchnorm workspace --------------- PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload:: - cudnnGetBatchNormalizationBackwardExWorkspaceSize( - /*handle=*/ctx.cudnn_handle(), - /*mode=*/mode_, - /*bnIps=*/CUDNN_BATCHNORM_OPS_BN, - /*xDesc=*/data_desc_, - /*yDesc=*/data_desc_, - /*dyDesc=*/data_desc_, - /*dzDesc=*/nullptr, - /*dxDesc=*/data_desc_, - /*bnScaleBiasMeanVarDesc=*/bn_param_desc_, - /*activationDesc=*/nullptr, - /*sizeInBytes=*/&workspace_size)); + phi::dynload::cudnnGetBatchNormalizationBackwardExWorkspaceSize( + /*handle=*/ctx.cudnn_handle(), + /*mode=*/mode_, + /*bnIps=*/CUDNN_BATCHNORM_OPS_BN, + /*xDesc=*/data_desc_, + /*yDesc=*/data_desc_, + /*dyDesc=*/data_desc_, + /*dzDesc=*/nullptr, + /*dxDesc=*/data_desc_, + /*bnScaleBiasMeanVarDesc=*/bn_param_desc_, + /*activationDesc=*/nullptr, + /*sizeInBytes=*/&workspace_size)); workspace_tensor.Resize({static_cast(workspace_size)}); workspace_ptr = static_cast(ctx.template Alloc(&workspace_tensor)); PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload::cudnnBatchNormalizationBackwardEx( + phi::dynload::cudnnBatchNormalizationBackwardEx( /*handle=*/ctx.cudnn_handle(), /*mode=*/mode_, /*bnOps=*/CUDNN_BATCHNORM_OPS_BN, @@ -989,7 +984,7 @@ void BatchNormGradRawKernel(const Context &ctx, /*reserveSpaceSizeInBytes=*/reserve_space_size)); #else PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload::cudnnBatchNormalizationBackward( + phi::dynload::cudnnBatchNormalizationBackward( ctx.cudnn_handle(), mode_, CudnnDataType::kOne(), @@ -1089,10 +1084,9 @@ void BatchNormGradRawKernel(const Context &ctx, #else // clean when exit. PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload::cudnnDestroyTensorDescriptor(data_desc_)); + phi::dynload::cudnnDestroyTensorDescriptor(data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload::cudnnDestroyTensorDescriptor( - bn_param_desc_)); + phi::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_)); #endif } else { diff --git a/paddle/phi/kernels/gpu/batch_norm_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_kernel.cu index fc460574b7..e2d716560d 100644 --- a/paddle/phi/kernels/gpu/batch_norm_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_kernel.cu @@ -20,7 +20,6 @@ namespace cub = hipcub; #endif -#include "paddle/fluid/operators/layout_utils.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/common/layout.h" @@ -598,9 +597,9 @@ void BatchNormKernel(const Context &ctx, cudnnBatchNormMode_t mode_; PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload::cudnnCreateTensorDescriptor(&data_desc_)); + phi::dynload::cudnnCreateTensorDescriptor(&data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_)); + phi::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_)); #endif if (epsilon <= CUDNN_BN_MIN_EPSILON - FLT_EPSILON) { @@ -651,19 +650,15 @@ void BatchNormKernel(const Context &ctx, // platform::dynload::miopenDeriveBNTensorDescriptor( // bn_param_desc_, data_desc_, test_mode ? miopenBNSpatial : mode_)); #else - PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload::cudnnSetTensorNdDescriptor( - data_desc_, - CudnnDataType::type, - x_dims.size() > 3 ? x_dims.size() : 4, - dims.data(), - strides.data())); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor( + data_desc_, + CudnnDataType::type, + x_dims.size() > 3 ? x_dims.size() : 4, + dims.data(), + strides.data())); // Note: PERSISTENT not implemented for inference - PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload::cudnnDeriveBNTensorDescriptor( - bn_param_desc_, - data_desc_, - test_mode ? CUDNN_BATCHNORM_SPATIAL : mode_)); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnDeriveBNTensorDescriptor( + bn_param_desc_, data_desc_, test_mode ? CUDNN_BATCHNORM_SPATIAL : mode_)); #endif auto handle = ctx.cudnn_handle(); @@ -830,7 +825,7 @@ void BatchNormKernel(const Context &ctx, } } else { PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload::cudnnBatchNormalizationForwardInference( + phi::dynload::cudnnBatchNormalizationForwardInference( handle, // Note: PERSISTENT not implemented for inference CUDNN_BATCHNORM_SPATIAL, @@ -873,7 +868,7 @@ void BatchNormKernel(const Context &ctx, if ((N * H * W * D) == 1) { // Only 1 element in normalization dimension, // skip the batch norm calculation, let y = x. - paddle::framework::TensorCopy(x, ctx.GetPlace(), y); + phi::Copy(ctx, x, ctx.GetPlace(), false, y); } else { double this_factor = 1. - momentum; #ifdef PADDLE_WITH_HIP @@ -1114,7 +1109,7 @@ void BatchNormKernel(const Context &ctx, "The argument ReserveSpace of batch_norm op is not found.")); // --------------- cudnn batchnorm workspace --------------- PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload:: + phi::dynload:: cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize( /*handle=*/handle, /*mode=*/mode_, @@ -1128,14 +1123,13 @@ void BatchNormKernel(const Context &ctx, // -------------- cudnn batchnorm reserve space -------------- PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload:: - cudnnGetBatchNormalizationTrainingExReserveSpaceSize( - /*handle=*/handle, - /*mode=*/mode_, - /*bnOps=*/CUDNN_BATCHNORM_OPS_BN, - /*activationDesc=*/nullptr, - /*xDesc=*/data_desc_, - /*sizeInBytes=*/&reserve_space_size)); + phi::dynload::cudnnGetBatchNormalizationTrainingExReserveSpaceSize( + /*handle=*/handle, + /*mode=*/mode_, + /*bnOps=*/CUDNN_BATCHNORM_OPS_BN, + /*activationDesc=*/nullptr, + /*xDesc=*/data_desc_, + /*sizeInBytes=*/&reserve_space_size)); reserve_space->Resize({static_cast(reserve_space_size)}); reserve_space_ptr = @@ -1144,7 +1138,7 @@ void BatchNormKernel(const Context &ctx, workspace_ptr = static_cast(ctx.template Alloc(&workspace_tensor)); PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload::cudnnBatchNormalizationForwardTrainingEx( + phi::dynload::cudnnBatchNormalizationForwardTrainingEx( handle, mode_, CUDNN_BATCHNORM_OPS_BN, @@ -1172,7 +1166,7 @@ void BatchNormKernel(const Context &ctx, reserve_space_size)); #else PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload::cudnnBatchNormalizationForwardTraining( + phi::dynload::cudnnBatchNormalizationForwardTraining( handle, mode_, CudnnDataType::kOne(), @@ -1211,9 +1205,9 @@ void BatchNormKernel(const Context &ctx, #else // clean when exit. PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload::cudnnDestroyTensorDescriptor(data_desc_)); + phi::dynload::cudnnDestroyTensorDescriptor(data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_)); + phi::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_)); #endif } -- GitLab