From fd9c91c3ce7a0ffaf00928ad20c73efd5d6a642e Mon Sep 17 00:00:00 2001 From: huangjiyi <43315610+huangjiyi@users.noreply.github.com> Date: Mon, 28 Nov 2022 15:40:30 +0800 Subject: [PATCH] [PHI decoupling] move several header files from fluid to phi (#48415) * decouple cudnn_desc.h from fluid * move cudnn_desc.h from fluid to phi * fix bugs * decouple cudnn_helper.h from fluid * fix bugs * move cudnn_helper.h from fluid to phi * add fluid cudnn_helper.h * move miopen_desc.h from fluid to phi * move miopen_helper.h from fluid to phi * fix bugs * move gpu_dnn.h from fluid to phi * fix bugs * update copyright year * simplify gpu_dnn.h in fluid * fix bugs * fix xpu build bug * fix compile bug * fix bug --- .../fluid/operators/cuda_graph_with_in_out.h | 1 + .../fused/cudnn_bn_stats_finalize.cu.h | 4 +- .../operators/fused/cudnn_norm_conv.cu.h | 10 +- .../fused/cudnn_scale_bias_add_relu.cu.h | 12 +- .../device/gpu/cuda/cudnn_helper_test.cc | 40 ++-- .../platform/device/gpu/cudnn_desc_test.cc | 16 +- paddle/fluid/platform/device/gpu/gpu_dnn.h | 32 ++- .../device/gpu/rocm/miopen_helper_test.cc | 16 +- .../backends}/gpu/cuda/cudnn_desc.h | 104 +++++---- .../backends}/gpu/cuda/cudnn_helper.h | 198 ++++++++++-------- paddle/phi/backends/gpu/gpu_dnn.h | 27 +++ .../backends}/gpu/rocm/miopen_desc.h | 75 +++---- .../backends}/gpu/rocm/miopen_helper.h | 181 ++++++++-------- paddle/phi/kernels/funcs/cross_entropy.cu | 2 +- .../phi/kernels/gpu/batch_norm_grad_kernel.cu | 6 +- paddle/phi/kernels/gpu/batch_norm_kernel.cu | 6 +- .../kernels/gpu/cross_entropy_grad_kernel.cu | 2 +- .../phi/kernels/gpu/cross_entropy_kernel.cu | 18 +- paddle/phi/kernels/gpu/instance_norm_utils.h | 4 +- paddle/phi/kernels/gpu/rnn_functor.h | 28 +-- .../phi/kernels/gpu/sync_batch_norm_utils.h | 4 +- .../kernels/gpudnn/affine_grid_grad_kernel.cu | 4 +- .../phi/kernels/gpudnn/affine_grid_kernel.cu | 4 +- .../phi/kernels/gpudnn/conv_cudnn_frontend.h | 7 +- paddle/phi/kernels/gpudnn/conv_cudnn_v7.h | 6 +- paddle/phi/kernels/gpudnn/conv_gpudnn_base.h | 13 +- paddle/phi/kernels/gpudnn/conv_gpudnn_info.h | 2 +- paddle/phi/kernels/gpudnn/conv_grad_kernel.cu | 81 +++---- paddle/phi/kernels/gpudnn/conv_kernel.cu | 64 +++--- .../gpudnn/conv_transpose_grad_kernel.cu | 15 +- .../kernels/gpudnn/conv_transpose_kernel.cu | 10 +- paddle/phi/kernels/gpudnn/pool_gpudnn.h | 12 +- paddle/phi/kernels/gpudnn/pool_grad_kernel.cu | 2 +- paddle/phi/kernels/gpudnn/pool_kernel.cu | 2 +- paddle/phi/kernels/gpudnn/softmax_gpudnn.h | 22 +- 35 files changed, 554 insertions(+), 476 deletions(-) rename paddle/{fluid/platform/device => phi/backends}/gpu/cuda/cudnn_desc.h (69%) rename paddle/{fluid/platform/device => phi/backends}/gpu/cuda/cudnn_helper.h (74%) create mode 100644 paddle/phi/backends/gpu/gpu_dnn.h rename paddle/{fluid/platform/device => phi/backends}/gpu/rocm/miopen_desc.h (76%) rename paddle/{fluid/platform/device => phi/backends}/gpu/rocm/miopen_helper.h (75%) diff --git a/paddle/fluid/operators/cuda_graph_with_in_out.h b/paddle/fluid/operators/cuda_graph_with_in_out.h index a667c40234d..40896c585c3 100644 --- a/paddle/fluid/operators/cuda_graph_with_in_out.h +++ b/paddle/fluid/operators/cuda_graph_with_in_out.h @@ -14,6 +14,7 @@ #pragma once +#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/platform/cuda_graph_with_memory_pool.h" diff --git a/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h b/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h index b2201c89295..0325a0e585e 100644 --- a/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h +++ b/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h @@ -54,8 +54,8 @@ struct BNStatsFinalizeArgs { cudnnDataType_t param_dtype; cudnnTensorFormat_t format; - platform::TensorDescriptor in_desc; - platform::TensorDescriptor out_desc; + phi::backends::gpu::TensorDescriptor in_desc; + phi::backends::gpu::TensorDescriptor out_desc; }; template diff --git a/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h b/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h index a5e210dc7fe..bf0e06b825e 100644 --- a/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h +++ b/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h @@ -163,11 +163,11 @@ struct NormConvolutionArgs { std::vector paddings; std::vector dilations; - platform::TensorDescriptor in_desc; - platform::FilterDescriptor filter_desc; - platform::TensorDescriptor out_desc; - platform::TensorDescriptor out_stats_desc; - platform::ConvolutionDescriptor conv_desc; + phi::backends::gpu::TensorDescriptor in_desc; + phi::backends::gpu::FilterDescriptor filter_desc; + phi::backends::gpu::TensorDescriptor out_desc; + phi::backends::gpu::TensorDescriptor out_stats_desc; + phi::backends::gpu::ConvolutionDescriptor conv_desc; bool is_support; }; diff --git a/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h b/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h index 188f767daf1..df79ed758db 100644 --- a/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h +++ b/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h @@ -89,12 +89,12 @@ struct ScaleBiasAddReluArgs { cudnnDataType_t param_dtype; cudnnTensorFormat_t format; - platform::TensorDescriptor in_desc; - platform::TensorDescriptor out_desc; - platform::TensorDescriptor equiv_scale_bias_desc; - platform::TensorDescriptor scale_bias_mean_var_desc; - platform::TensorDescriptor bitmask_desc; - platform::ActivationDescriptor activation_desc; + phi::backends::gpu::TensorDescriptor in_desc; + phi::backends::gpu::TensorDescriptor out_desc; + phi::backends::gpu::TensorDescriptor equiv_scale_bias_desc; + phi::backends::gpu::TensorDescriptor scale_bias_mean_var_desc; + phi::backends::gpu::TensorDescriptor bitmask_desc; + phi::backends::gpu::ActivationDescriptor activation_desc; }; template diff --git a/paddle/fluid/platform/device/gpu/cuda/cudnn_helper_test.cc b/paddle/fluid/platform/device/gpu/cuda/cudnn_helper_test.cc index dae9eb5313c..5b2e90b3291 100644 --- a/paddle/fluid/platform/device/gpu/cuda/cudnn_helper_test.cc +++ b/paddle/fluid/platform/device/gpu/cuda/cudnn_helper_test.cc @@ -20,8 +20,8 @@ limitations under the License. */ #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" TEST(CudnnHelper, ScopedTensorDescriptor) { - using paddle::platform::DataLayout; - using paddle::platform::ScopedTensorDescriptor; + using phi::backends::gpu::DataLayout; + using phi::backends::gpu::ScopedTensorDescriptor; ScopedTensorDescriptor tensor_desc; std::vector shape = {2, 4, 6, 6}; @@ -31,7 +31,7 @@ TEST(CudnnHelper, ScopedTensorDescriptor) { int nd; std::vector dims(4); std::vector strides(4); - paddle::platform::dynload::cudnnGetTensorNdDescriptor( + phi::dynload::cudnnGetTensorNdDescriptor( desc, 4, &type, &nd, dims.data(), strides.data()); EXPECT_EQ(nd, 4); @@ -50,7 +50,7 @@ TEST(CudnnHelper, ScopedTensorDescriptor) { std::vector dims_5d(5); std::vector strides_5d(5); - paddle::platform::dynload::cudnnGetTensorNdDescriptor( + phi::dynload::cudnnGetTensorNdDescriptor( desc_5d, 5, &type, &nd, dims_5d.data(), strides_5d.data()); EXPECT_EQ(nd, 5); @@ -65,8 +65,8 @@ TEST(CudnnHelper, ScopedTensorDescriptor) { } TEST(CudnnHelper, ScopedFilterDescriptor) { - using paddle::platform::DataLayout; - using paddle::platform::ScopedFilterDescriptor; + using phi::backends::gpu::DataLayout; + using phi::backends::gpu::ScopedFilterDescriptor; ScopedFilterDescriptor filter_desc; std::vector shape = {2, 3, 3}; @@ -76,7 +76,7 @@ TEST(CudnnHelper, ScopedFilterDescriptor) { int nd; cudnnTensorFormat_t format; std::vector kernel(3); - paddle::platform::dynload::cudnnGetFilterNdDescriptor( + phi::dynload::cudnnGetFilterNdDescriptor( desc, 3, &type, &format, &nd, kernel.data()); EXPECT_EQ(GetCudnnTensorFormat(DataLayout::kNCHW), format); @@ -90,7 +90,7 @@ TEST(CudnnHelper, ScopedFilterDescriptor) { auto desc_4d = filter_desc.descriptor(DataLayout::kNCDHW, shape_4d); std::vector kernel_4d(4); - paddle::platform::dynload::cudnnGetFilterNdDescriptor( + phi::dynload::cudnnGetFilterNdDescriptor( desc_4d, 4, &type, &format, &nd, kernel_4d.data()); EXPECT_EQ(GetCudnnTensorFormat(DataLayout::kNCHW), format); @@ -101,7 +101,7 @@ TEST(CudnnHelper, ScopedFilterDescriptor) { } TEST(CudnnHelper, ScopedConvolutionDescriptor) { - using paddle::platform::ScopedConvolutionDescriptor; + using phi::backends::gpu::ScopedConvolutionDescriptor; ScopedConvolutionDescriptor conv_desc; std::vector src_pads = {2, 2, 2}; @@ -115,14 +115,14 @@ TEST(CudnnHelper, ScopedConvolutionDescriptor) { std::vector pads(3); std::vector strides(3); std::vector dilations(3); - paddle::platform::dynload::cudnnGetConvolutionNdDescriptor(desc, - 3, - &nd, - pads.data(), - strides.data(), - dilations.data(), - &mode, - &type); + phi::dynload::cudnnGetConvolutionNdDescriptor(desc, + 3, + &nd, + pads.data(), + strides.data(), + dilations.data(), + &mode, + &type); EXPECT_EQ(nd, 3); for (size_t i = 0; i < src_pads.size(); ++i) { @@ -134,8 +134,8 @@ TEST(CudnnHelper, ScopedConvolutionDescriptor) { } TEST(CudnnHelper, ScopedPoolingDescriptor) { - using paddle::platform::PoolingMode; - using paddle::platform::ScopedPoolingDescriptor; + using phi::backends::gpu::PoolingMode; + using phi::backends::gpu::ScopedPoolingDescriptor; ScopedPoolingDescriptor pool_desc; std::vector src_kernel = {2, 2, 5}; @@ -150,7 +150,7 @@ TEST(CudnnHelper, ScopedPoolingDescriptor) { std::vector kernel(3); std::vector pads(3); std::vector strides(3); - paddle::platform::dynload::cudnnGetPoolingNdDescriptor( + phi::dynload::cudnnGetPoolingNdDescriptor( desc, 3, &mode, &nan_t, &nd, kernel.data(), pads.data(), strides.data()); EXPECT_EQ(nd, 3); diff --git a/paddle/fluid/platform/device/gpu/cudnn_desc_test.cc b/paddle/fluid/platform/device/gpu/cudnn_desc_test.cc index cbe322ef0c4..e3b07d11324 100644 --- a/paddle/fluid/platform/device/gpu/cudnn_desc_test.cc +++ b/paddle/fluid/platform/device/gpu/cudnn_desc_test.cc @@ -20,20 +20,22 @@ namespace paddle { namespace platform { TEST(TensorDescriptor, Empty) { - ActivationDescriptor a; - TensorDescriptor t; - TensorDescriptor t1; - TensorDescriptor *t11 = new TensorDescriptor(); + phi::backends::gpu::ActivationDescriptor a; + phi::backends::gpu::TensorDescriptor t; + phi::backends::gpu::TensorDescriptor t1; + phi::backends::gpu::TensorDescriptor *t11 = + new phi::backends::gpu::TensorDescriptor(); delete t11; - std::unique_ptr tt(new TensorDescriptor()); + std::unique_ptr tt( + new phi::backends::gpu::TensorDescriptor()); } TEST(TensorDescriptor, Normal) { phi::DenseTensor tt; tt.Resize({2, 3, 4}); - tt.mutable_data(platform::CPUPlace()); + tt.mutable_data(phi::CPUPlace()); - TensorDescriptor desc; + phi::backends::gpu::TensorDescriptor desc; desc.set(tt); EXPECT_TRUE(desc.desc() != nullptr); } diff --git a/paddle/fluid/platform/device/gpu/gpu_dnn.h b/paddle/fluid/platform/device/gpu/gpu_dnn.h index 3f9bc5e6de8..f6f6392c4c2 100644 --- a/paddle/fluid/platform/device/gpu/gpu_dnn.h +++ b/paddle/fluid/platform/device/gpu/gpu_dnn.h @@ -14,14 +14,34 @@ #pragma once +#include "paddle/phi/backends/gpu/gpu_dnn.h" + #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#ifdef PADDLE_WITH_HIP -#include "paddle/fluid/platform/device/gpu/rocm/miopen_desc.h" -#include "paddle/fluid/platform/device/gpu/rocm/miopen_helper.h" -#else // CUDA -#include "paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h" -#include "paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h" +namespace paddle { +namespace platform { + +using DataLayout = phi::backends::gpu::DataLayout; +using PoolingMode = phi::backends::gpu::PoolingMode; +template +using CudnnDataType = phi::backends::gpu::CudnnDataType; +using ScopedTensorDescriptor = phi::backends::gpu::ScopedTensorDescriptor; +using ScopedDropoutDescriptor = phi::backends::gpu::ScopedDropoutDescriptor; +using ScopedRNNDescriptor = phi::backends::gpu::ScopedRNNDescriptor; +using ScopedFilterDescriptor = phi::backends::gpu::ScopedFilterDescriptor; +using ScopedConvolutionDescriptor = + phi::backends::gpu::ScopedConvolutionDescriptor; +using ScopedPoolingDescriptor = phi::backends::gpu::ScopedPoolingDescriptor; +using ScopedActivationDescriptor = + phi::backends::gpu::ScopedActivationDescriptor; + +#if defined(PADDLE_WITH_CUDA) +using ScopedRNNTensorDescriptor = phi::backends::gpu::ScopedRNNTensorDescriptor; +using ScopedSpatialTransformerDescriptor = + phi::backends::gpu::ScopedSpatialTransformerDescriptor; #endif +} // namespace platform +} // namespace paddle + #endif diff --git a/paddle/fluid/platform/device/gpu/rocm/miopen_helper_test.cc b/paddle/fluid/platform/device/gpu/rocm/miopen_helper_test.cc index 1a5c46a065e..0f73aefdd42 100644 --- a/paddle/fluid/platform/device/gpu/rocm/miopen_helper_test.cc +++ b/paddle/fluid/platform/device/gpu/rocm/miopen_helper_test.cc @@ -20,8 +20,8 @@ limitations under the License. */ #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" TEST(MIOpenHelper, ScopedTensorDescriptor) { - using paddle::platform::DataLayout; - using paddle::platform::ScopedTensorDescriptor; + using phi::backends::gpu::DataLayout; + using phi::backends::gpu::ScopedTensorDescriptor; ScopedTensorDescriptor tensor_desc; std::vector shape = {2, 4, 6, 6}; @@ -31,9 +31,9 @@ TEST(MIOpenHelper, ScopedTensorDescriptor) { int nd; std::vector dims(4); std::vector strides(4); - paddle::platform::dynload::miopenGetTensorDescriptor( + phi::dynload::miopenGetTensorDescriptor( desc, &type, dims.data(), strides.data()); - paddle::platform::dynload::miopenGetTensorDescriptorSize(desc, &nd); + phi::dynload::miopenGetTensorDescriptorSize(desc, &nd); EXPECT_EQ(nd, 4); for (size_t i = 0; i < dims.size(); ++i) { @@ -51,9 +51,9 @@ TEST(MIOpenHelper, ScopedTensorDescriptor) { std::vector dims_5d(5); std::vector strides_5d(5); - paddle::platform::dynload::miopenGetTensorDescriptor( + phi::dynload::miopenGetTensorDescriptor( desc_5d, &type, dims_5d.data(), strides_5d.data()); - paddle::platform::dynload::miopenGetTensorDescriptorSize(desc_5d, &nd); + phi::dynload::miopenGetTensorDescriptorSize(desc_5d, &nd); EXPECT_EQ(nd, 5); for (size_t i = 0; i < dims_5d.size(); ++i) { @@ -67,7 +67,7 @@ TEST(MIOpenHelper, ScopedTensorDescriptor) { } TEST(MIOpenHelper, ScopedConvolutionDescriptor) { - using paddle::platform::ScopedConvolutionDescriptor; + using phi::backends::gpu::ScopedConvolutionDescriptor; ScopedConvolutionDescriptor conv_desc; std::vector src_pads = {2, 2, 2}; @@ -80,7 +80,7 @@ TEST(MIOpenHelper, ScopedConvolutionDescriptor) { std::vector pads(3); std::vector strides(3); std::vector dilations(3); - paddle::platform::dynload::miopenGetConvolutionNdDescriptor( + phi::dynload::miopenGetConvolutionNdDescriptor( desc, 3, &nd, pads.data(), strides.data(), dilations.data(), &mode); EXPECT_EQ(nd, 3); diff --git a/paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h b/paddle/phi/backends/gpu/cuda/cudnn_desc.h similarity index 69% rename from paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h rename to paddle/phi/backends/gpu/cuda/cudnn_desc.h index 677dc49cce4..d4fb6930bcc 100644 --- a/paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h +++ b/paddle/phi/backends/gpu/cuda/cudnn_desc.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,22 +23,12 @@ #include #include -#include "paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h" -#include "paddle/fluid/platform/device_context.h" +#include "paddle/phi/backends/gpu/cuda/cudnn_helper.h" #include "paddle/phi/core/utils/data_type.h" namespace phi { -class DenseTensor; -} // namespace phi - -namespace paddle { -namespace platform { - -template -inline cudnnDataType_t ToCudnnDataType(const T& t) { - auto type = framework::ToDataType(t); - return ToCudnnDataType(phi::TransToPhiDataType(type)); -} +namespace backends { +namespace gpu { template inline std::vector TransformDimOrder(const std::vector& dims) { @@ -67,7 +57,6 @@ inline std::vector TransformDimOrder(const std::vector& dims) { return transformed_dims; } -template <> inline cudnnDataType_t ToCudnnDataType(const phi::DataType& t) { cudnnDataType_t type = CUDNN_DATA_FLOAT; switch (t) { @@ -98,7 +87,7 @@ class ActivationDescriptor { void operator()(T* t) { if (t != nullptr) { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnDestroyActivationDescriptor(t)); + phi::dynload::cudnnDestroyActivationDescriptor(t)); t = nullptr; } } @@ -106,12 +95,12 @@ class ActivationDescriptor { ActivationDescriptor() { T* raw_ptr; PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnCreateActivationDescriptor(&raw_ptr)); + phi::dynload::cudnnCreateActivationDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } template void set(cudnnActivationMode_t mode, const T& coef) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetActivationDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetActivationDescriptor( desc_.get(), mode, CUDNN_NOT_PROPAGATE_NAN, static_cast(coef))); } @@ -128,14 +117,16 @@ class TensorDescriptor { struct Deleter { void operator()(T* t) { if (t != nullptr) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyTensorDescriptor(t)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnDestroyTensorDescriptor(t)); t = nullptr; } } }; TensorDescriptor() { T* raw_ptr; - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateTensorDescriptor(&raw_ptr)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnCreateTensorDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } T* desc() { return desc_.get(); } @@ -151,12 +142,12 @@ class TensorDescriptor { if (groups > 1) { dims_with_group[1] = dims_with_group[1] / groups; } - PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetTensorNdDescriptor(desc_.get(), - ToCudnnDataType(tensor.dtype()), - dims_with_group.size(), - dims_with_group.data(), - strides.data())); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor( + desc_.get(), + ToCudnnDataType(tensor.dtype()), + dims_with_group.size(), + dims_with_group.data(), + strides.data())); } void set(const std::vector& dims, @@ -169,11 +160,11 @@ class TensorDescriptor { transformed_dims = dims; } PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetTensorNdDescriptorEx(desc_.get(), - format, - dtype, - transformed_dims.size(), - transformed_dims.data())); + phi::dynload::cudnnSetTensorNdDescriptorEx(desc_.get(), + format, + dtype, + transformed_dims.size(), + transformed_dims.data())); } void set(const phi::DenseTensor& tensor, const cudnnTensorFormat_t format) { @@ -192,14 +183,16 @@ class FilterDescriptor { struct Deleter { void operator()(T* t) { if (t != nullptr) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyFilterDescriptor(t)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnDestroyFilterDescriptor(t)); t = nullptr; } } }; FilterDescriptor() { T* raw_ptr; - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateFilterDescriptor(&raw_ptr)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnCreateFilterDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } T* desc() { return desc_.get(); } @@ -219,11 +212,11 @@ class FilterDescriptor { transformed_dims[1] = transformed_dims[1] / groups; } PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetFilterNdDescriptor(desc_.get(), - dtype, - format, - transformed_dims.size(), - transformed_dims.data())); + phi::dynload::cudnnSetFilterNdDescriptor(desc_.get(), + dtype, + format, + transformed_dims.size(), + transformed_dims.data())); } void set(const phi::DenseTensor& tensor, @@ -245,7 +238,7 @@ class ConvolutionDescriptor { void operator()(T* t) { if (t != nullptr) { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnDestroyConvolutionDescriptor(t)); + phi::dynload::cudnnDestroyConvolutionDescriptor(t)); t = nullptr; } } @@ -253,7 +246,7 @@ class ConvolutionDescriptor { ConvolutionDescriptor() { T* raw_ptr; PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnCreateConvolutionDescriptor(&raw_ptr)); + phi::dynload::cudnnCreateConvolutionDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } T* desc() { return desc_.get(); } @@ -270,31 +263,31 @@ class ConvolutionDescriptor { (dtype == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT; T* desc = desc_.get(); PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetConvolutionNdDescriptor(desc, - pads.size(), - pads.data(), - strides.data(), - dilations.data(), - CUDNN_CROSS_CORRELATION, - compute_type)); + phi::dynload::cudnnSetConvolutionNdDescriptor(desc, + pads.size(), + pads.data(), + strides.data(), + dilations.data(), + CUDNN_CROSS_CORRELATION, + compute_type)); #if CUDNN_VERSION_MIN(7, 0, 1) PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnSetConvolutionGroupCount(desc, groups)); + phi::dynload::cudnnSetConvolutionGroupCount(desc, groups)); #if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1) - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnSetConvolutionMathType( - desc, CUDNN_DEFAULT_MATH)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnSetConvolutionMathType(desc, CUDNN_DEFAULT_MATH)); if (dtype == CUDNN_DATA_HALF) { - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnSetConvolutionMathType( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType( desc, CUDNN_TENSOR_OP_MATH)); #if CUDA_VERSION >= 11000 #if CUDNN_VERSION_MIN(8, 1, 0) } else if (dtype == CUDNN_DATA_BFLOAT16) { - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnSetConvolutionMathType( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType( desc, CUDNN_TENSOR_OP_MATH)); #endif // CUDNN_VERSION_MIN(8,1,0) } else if (dtype == CUDNN_DATA_FLOAT && !allow_tf32) { PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnSetConvolutionMathType(desc, CUDNN_FMA_MATH)); + phi::dynload::cudnnSetConvolutionMathType(desc, CUDNN_FMA_MATH)); #endif // CUDA_VERSION >= 11000 } #endif @@ -307,5 +300,6 @@ class ConvolutionDescriptor { std::unique_ptr desc_; }; -} // namespace platform -} // namespace paddle +} // namespace gpu +} // namespace backends +} // namespace phi diff --git a/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h b/paddle/phi/backends/gpu/cuda/cudnn_helper.h similarity index 74% rename from paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h rename to paddle/phi/backends/gpu/cuda/cudnn_helper.h index 7181fc34f88..670c1f43c82 100644 --- a/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h +++ b/paddle/phi/backends/gpu/cuda/cudnn_helper.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,16 +17,20 @@ limitations under the License. */ #include #include -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/platform/dynload/cudnn.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/float16.h" -#include "paddle/fluid/platform/macros.h" +#include "paddle/phi/backends/dynload/cudnn.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/float16.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/phi/core/errors.h" +#include "paddle/phi/core/macros.h" DECLARE_bool(cudnn_deterministic); -namespace paddle { -namespace platform { +namespace phi { +namespace backends { +namespace gpu { #define CUDNN_VERSION_MIN(major, minor, patch) \ (CUDNN_VERSION >= ((major)*1000 + (minor)*100 + (patch))) @@ -68,7 +72,7 @@ inline cudnnPoolingMode_t GetPoolingMode(const PoolingMode& mode) { return CUDNN_POOLING_MAX; default: PADDLE_THROW( - platform::errors::Unimplemented("Unexpected CUDNN pooling mode.")); + phi::errors::Unimplemented("Unexpected CUDNN pooling mode.")); } } @@ -88,7 +92,7 @@ inline ActivationMode StringToActivationMode(const std::string& str) { } else if (str == "bandpass") { return ActivationMode::kBandPass; } else { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unknown CUDNN activation string: %s.", str)); } } @@ -99,7 +103,7 @@ class CudnnDataType; // CUDNN_DATA_BFLOAT16 is not valid before cudnn8.1 #if CUDNN_VERSION_MIN(8, 1, 0) template <> -class CudnnDataType { +class CudnnDataType { public: static const cudnnDataType_t type = CUDNN_DATA_BFLOAT16; using ScalingParamType = const float; @@ -116,7 +120,7 @@ class CudnnDataType { #endif template <> -class CudnnDataType { +class CudnnDataType { public: static const cudnnDataType_t type = CUDNN_DATA_HALF; // The scaling param type is float for HALF and FLOAT tensors @@ -176,7 +180,7 @@ inline cudnnTensorFormat_t GetCudnnTensorFormat( case DataLayout::kNDHWC: return CUDNN_TENSOR_NHWC; // add, liyamei default: - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "CUDNN has no equivalent dataLayout for input order.")); } return CUDNN_TENSOR_NCHW; @@ -185,10 +189,12 @@ inline cudnnTensorFormat_t GetCudnnTensorFormat( class ScopedTensorDescriptor { public: ScopedTensorDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateTensorDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnCreateTensorDescriptor(&desc_)); } ~ScopedTensorDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyTensorDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnDestroyTensorDescriptor(desc_)); } inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format, @@ -211,25 +217,25 @@ class ScopedTensorDescriptor { if (dims.size() == 4) { if (format == CUDNN_TENSOR_NCHW) { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetTensorNdDescriptor(desc_, - type, - dims_with_group.size(), - dims_with_group.data(), - strides.data())); + phi::dynload::cudnnSetTensorNdDescriptor(desc_, + type, + dims_with_group.size(), + dims_with_group.data(), + strides.data())); } else { // CUDNN_TENSOR_NHWC - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensor4dDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensor4dDescriptor( desc_, format, type, dims[0], dims[3], dims[1], dims[2])); } } else if (dims.size() == 5) { if (format == CUDNN_TENSOR_NCHW) { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetTensorNdDescriptor(desc_, - type, - dims_with_group.size(), - dims_with_group.data(), - strides.data())); + phi::dynload::cudnnSetTensorNdDescriptor(desc_, + type, + dims_with_group.size(), + dims_with_group.data(), + strides.data())); } else { // CUDNN_TENSOR_NHWC - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensorNdDescriptorEx( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptorEx( desc_, format, type, dims.size(), dims.data())); } } @@ -247,7 +253,7 @@ class ScopedTensorDescriptor { inline cudnnTensorDescriptor_t descriptor(const cudnnDataType_t cudnn_type, const std::vector& dim, const std::vector& stride) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensorNdDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor( desc_, cudnn_type, dim.size(), dim.data(), stride.data())); return desc_; } @@ -269,11 +275,13 @@ class ScopedTensorDescriptor { class ScopedRNNTensorDescriptor { public: ScopedRNNTensorDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateRNNDataDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnCreateRNNDataDescriptor(&desc_)); } ~ScopedRNNTensorDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyRNNDataDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnDestroyRNNDataDescriptor(desc_)); } inline cudnnRNNDataDescriptor_t descriptor( @@ -292,15 +300,15 @@ class ScopedRNNTensorDescriptor { layout = CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED; } - PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetRNNDataDescriptor(desc_, - cudnn_type, - layout, - max_seq_length, - batch_size, - input_size, - seq_length.data(), - static_cast(&padding_fill))); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetRNNDataDescriptor( + desc_, + cudnn_type, + layout, + max_seq_length, + batch_size, + input_size, + seq_length.data(), + static_cast(&padding_fill))); return desc_; } @@ -331,14 +339,16 @@ class ScopedRNNTensorDescriptor { class ScopedDropoutDescriptor { public: ScopedDropoutDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateDropoutDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnCreateDropoutDescriptor(&desc_)); } ~ScopedDropoutDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyDropoutDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnDestroyDropoutDescriptor(desc_)); } inline cudnnDropoutDescriptor_t descriptor(const cudnnHandle_t& handle, - const platform::Place& place, + const phi::Place& place, bool initialized, float dropout_prob_, phi::DenseTensor* dropout_state_, @@ -346,22 +356,22 @@ class ScopedDropoutDescriptor { size_t state_size) { if (dropout_state_ == nullptr) { // for no dropout or test PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetDropoutDescriptor(desc_, - handle, - 0 /* dropout */, - nullptr, - 0 /* state_size */, - 0 /* seed */)); + phi::dynload::cudnnSetDropoutDescriptor(desc_, + handle, + 0 /* dropout */, + nullptr, + 0 /* state_size */, + 0 /* seed */)); return desc_; } auto* dropout_state_data = dropout_state_->data(); if (!initialized) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetDropoutDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetDropoutDescriptor( desc_, handle, dropout_prob_, dropout_state_data, state_size, seed)); } else { auto dropout_state_dims = dropout_state_->dims(); state_size = dropout_state_dims[0]; - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnRestoreDropoutDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnRestoreDropoutDescriptor( desc_, handle, dropout_prob_, dropout_state_data, state_size, 0)); } return desc_; @@ -376,10 +386,10 @@ class ScopedDropoutDescriptor { class ScopedRNNDescriptor { public: ScopedRNNDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateRNNDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnCreateRNNDescriptor(&desc_)); } ~ScopedRNNDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyRNNDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnDestroyRNNDescriptor(desc_)); } inline cudnnRNNDescriptor_t desc() { return desc_; } @@ -392,10 +402,12 @@ class ScopedRNNDescriptor { class ScopedFilterDescriptor { public: ScopedFilterDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateFilterDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnCreateFilterDescriptor(&desc_)); } ~ScopedFilterDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyFilterDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnDestroyFilterDescriptor(desc_)); } inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format, @@ -412,11 +424,11 @@ class ScopedFilterDescriptor { // NOTE: input filter(C) of the filter is already asserted to be C/groups. } PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetFilterNdDescriptor(desc_, - type, - format, - kernel_with_group.size(), - kernel_with_group.data())); + phi::dynload::cudnnSetFilterNdDescriptor(desc_, + type, + format, + kernel_with_group.size(), + kernel_with_group.data())); return desc_; } @@ -439,11 +451,11 @@ class ScopedConvolutionDescriptor { public: ScopedConvolutionDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnCreateConvolutionDescriptor(&desc_)); + phi::dynload::cudnnCreateConvolutionDescriptor(&desc_)); } ~ScopedConvolutionDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnDestroyConvolutionDescriptor(desc_)); + phi::dynload::cudnnDestroyConvolutionDescriptor(desc_)); } inline cudnnConvolutionDescriptor_t descriptor( @@ -453,7 +465,7 @@ class ScopedConvolutionDescriptor { const std::vector& dilations) { PADDLE_ENFORCE_EQ(pads.size(), strides.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of pads and strides should be equal. But " "received size of pads is %d, size of strides is %d.", pads.size(), @@ -461,7 +473,7 @@ class ScopedConvolutionDescriptor { PADDLE_ENFORCE_EQ( pads.size(), dilations.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of pads and dilations should be equal. But received size " "of pads is %d, size of dilations is %d.", pads.size(), @@ -470,13 +482,13 @@ class ScopedConvolutionDescriptor { cudnnDataType_t compute_type = (type == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT; PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetConvolutionNdDescriptor(desc_, - pads.size(), - pads.data(), - strides.data(), - dilations.data(), - CUDNN_CROSS_CORRELATION, - compute_type)); + phi::dynload::cudnnSetConvolutionNdDescriptor(desc_, + pads.size(), + pads.data(), + strides.data(), + dilations.data(), + CUDNN_CROSS_CORRELATION, + compute_type)); return desc_; } @@ -496,10 +508,12 @@ class ScopedConvolutionDescriptor { class ScopedPoolingDescriptor { public: ScopedPoolingDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreatePoolingDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnCreatePoolingDescriptor(&desc_)); } ~ScopedPoolingDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyPoolingDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnDestroyPoolingDescriptor(desc_)); } inline cudnnPoolingDescriptor_t descriptor(const PoolingMode& mode, @@ -508,7 +522,7 @@ class ScopedPoolingDescriptor { const std::vector& strides) { PADDLE_ENFORCE_EQ(kernel.size(), pads.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of kernel and pads should be equal. But " "received size of kernel is %d, size of pads is %d.", kernel.size(), @@ -516,12 +530,12 @@ class ScopedPoolingDescriptor { PADDLE_ENFORCE_EQ( kernel.size(), strides.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of kernel and strides should be equal. But " "received size of kernel is %d, size of strides is %d.", kernel.size(), strides.size())); - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetPoolingNdDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetPoolingNdDescriptor( desc_, (GetPoolingMode(mode)), CUDNN_PROPAGATE_NAN, // Always propagate nans. @@ -541,18 +555,23 @@ class ScopedSpatialTransformerDescriptor { public: ScopedSpatialTransformerDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnCreateSpatialTransformerDescriptor(&desc_)); + phi::dynload::cudnnCreateSpatialTransformerDescriptor(&desc_)); } ~ScopedSpatialTransformerDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnDestroySpatialTransformerDescriptor(desc_)); + phi::dynload::cudnnDestroySpatialTransformerDescriptor(desc_)); } template inline cudnnSpatialTransformerDescriptor_t descriptor(const int nbDims, const int dimA[]) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetSpatialTransformerNdDescriptor( - desc_, CUDNN_SAMPLER_BILINEAR, CudnnDataType::type, nbDims, dimA)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnSetSpatialTransformerNdDescriptor( + desc_, + CUDNN_SAMPLER_BILINEAR, + CudnnDataType::type, + nbDims, + dimA)); return desc_; } @@ -565,11 +584,11 @@ class ScopedActivationDescriptor { public: ScopedActivationDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnCreateActivationDescriptor(&desc_)); + phi::dynload::cudnnCreateActivationDescriptor(&desc_)); } ~ScopedActivationDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnDestroyActivationDescriptor(desc_)); + phi::dynload::cudnnDestroyActivationDescriptor(desc_)); } template @@ -602,11 +621,11 @@ class ScopedActivationDescriptor { mode = CUDNN_ACTIVATION_TANH; break; default: - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unrecognized CUDNN activation mode: %d.", static_cast(activation_mode))); } - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetActivationDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetActivationDescriptor( desc_, mode, CUDNN_NOT_PROPAGATE_NAN, relu_ceiling)); return desc_; } @@ -620,16 +639,18 @@ class ScopedActivationDescriptor { class ScopedCTCLossDescriptor { public: ScopedCTCLossDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateCTCLossDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnCreateCTCLossDescriptor(&desc_)); } ~ScopedCTCLossDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyCTCLossDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnDestroyCTCLossDescriptor(desc_)); } template inline cudnnCTCLossDescriptor_t descriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetCTCLossDescriptor(desc_, CudnnDataType::type)); + phi::dynload::cudnnSetCTCLossDescriptor(desc_, CudnnDataType::type)); return desc_; } @@ -639,5 +660,6 @@ class ScopedCTCLossDescriptor { }; #endif -} // namespace platform -} // namespace paddle +} // namespace gpu +} // namespace backends +} // namespace phi diff --git a/paddle/phi/backends/gpu/gpu_dnn.h b/paddle/phi/backends/gpu/gpu_dnn.h new file mode 100644 index 00000000000..f37afa3deeb --- /dev/null +++ b/paddle/phi/backends/gpu/gpu_dnn.h @@ -0,0 +1,27 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + +#ifdef PADDLE_WITH_HIP +#include "paddle/phi/backends/gpu/rocm/miopen_desc.h" +#include "paddle/phi/backends/gpu/rocm/miopen_helper.h" +#else // CUDA +#include "paddle/phi/backends/gpu/cuda/cudnn_desc.h" +#include "paddle/phi/backends/gpu/cuda/cudnn_helper.h" +#endif + +#endif diff --git a/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h b/paddle/phi/backends/gpu/rocm/miopen_desc.h similarity index 76% rename from paddle/fluid/platform/device/gpu/rocm/miopen_desc.h rename to paddle/phi/backends/gpu/rocm/miopen_desc.h index 1ce4df05be6..ae0e274ca65 100644 --- a/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h +++ b/paddle/phi/backends/gpu/rocm/miopen_desc.h @@ -1,4 +1,4 @@ -// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,22 +23,12 @@ #include #include -#include "paddle/fluid/platform/device/gpu/rocm/miopen_helper.h" -#include "paddle/fluid/platform/device_context.h" +#include "paddle/phi/backends/gpu/rocm/miopen_helper.h" #include "paddle/phi/core/utils/data_type.h" namespace phi { -class DenseTensor; -} // namespace phi - -namespace paddle { -namespace platform { - -template -inline miopenDataType_t ToCudnnDataType(const T& t) { - auto type = framework::ToDataType(t); - return ToCudnnDataType(phi::TransToPhiDataType(type)); -} +namespace backends { +namespace gpu { inline std::vector TransformDimOrder(const std::vector& dims) { std::vector transformed_dims(dims.begin(), dims.end()); @@ -63,7 +53,6 @@ inline std::vector TransformDimOrder(const std::vector& dims) { return transformed_dims; } -template <> inline miopenDataType_t ToCudnnDataType(const phi::DataType& t) { miopenDataType_t type = miopenFloat; switch (t) { @@ -86,7 +75,7 @@ class ActivationDescriptor { void operator()(T* t) { if (t != nullptr) { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenDestroyActivationDescriptor(t)); + phi::dynload::miopenDestroyActivationDescriptor(t)); t = nullptr; } } @@ -94,12 +83,12 @@ class ActivationDescriptor { ActivationDescriptor() { T* raw_ptr; PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenCreateActivationDescriptor(&raw_ptr)); + phi::dynload::miopenCreateActivationDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } template void set(miopenActivationMode_t mode, const T& coef) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetActivationDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetActivationDescriptor( desc_.get(), mode, static_cast(coef), 0.0, 0.0)); } @@ -116,14 +105,16 @@ class TensorDescriptor { struct Deleter { void operator()(T* t) { if (t != nullptr) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyTensorDescriptor(t)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenDestroyTensorDescriptor(t)); t = nullptr; } } }; TensorDescriptor() { T* raw_ptr; - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateTensorDescriptor(&raw_ptr)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenCreateTensorDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } T* desc() { return desc_.get(); } @@ -140,7 +131,7 @@ class TensorDescriptor { if (groups > 1) { dims_with_group[1] = dims_with_group[1] / groups; } - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( (miopenTensorDescriptor_t)(desc_.get()), ToCudnnDataType(tensor.dtype()), static_cast(dims_with_group.size()), @@ -150,10 +141,10 @@ class TensorDescriptor { void set(const phi::DenseTensor& tensor, const miopenTensorFormat_t format) { const int groups = 1; - PADDLE_ENFORCE_EQ(format, - MIOPEN_TENSOR_NCHW, - platform::errors::InvalidArgument( - "format should ONLY be NCHW in MIOPEN.")); + PADDLE_ENFORCE_EQ( + format, + MIOPEN_TENSOR_NCHW, + phi::errors::InvalidArgument("format should ONLY be NCHW in MIOPEN.")); auto dims = phi::vectorize(tensor.dims()); std::vector strides(dims.size()); strides[dims.size() - 1] = 1; @@ -164,7 +155,7 @@ class TensorDescriptor { if (groups > 1) { dims_with_group[1] = dims_with_group[1] / groups; } - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( (miopenTensorDescriptor_t)(desc_.get()), ToCudnnDataType(tensor.dtype()), static_cast(dims_with_group.size()), @@ -182,14 +173,16 @@ class FilterDescriptor { struct Deleter { void operator()(T* t) { if (t != nullptr) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyTensorDescriptor(t)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenDestroyTensorDescriptor(t)); t = nullptr; } } }; FilterDescriptor() { T* raw_ptr; - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateTensorDescriptor(&raw_ptr)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenCreateTensorDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } T* desc() { return desc_.get(); } @@ -198,10 +191,10 @@ class FilterDescriptor { void set(const phi::DenseTensor& tensor, const miopenTensorFormat_t format, const int groups = 1) { - PADDLE_ENFORCE_EQ(format, - MIOPEN_TENSOR_NCHW, - platform::errors::InvalidArgument( - "format should ONLY be NCHW in MIOPEN.")); + PADDLE_ENFORCE_EQ( + format, + MIOPEN_TENSOR_NCHW, + phi::errors::InvalidArgument("format should ONLY be NCHW in MIOPEN.")); auto dims = phi::vectorize(tensor.dims()); std::vector strides(dims.size()); strides[dims.size() - 1] = 1; @@ -212,7 +205,7 @@ class FilterDescriptor { if (groups > 1) { dims_with_group[1] = dims_with_group[1] / groups; } - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( (miopenTensorDescriptor_t)(desc_.get()), ToCudnnDataType(tensor.dtype()), static_cast(dims_with_group.size()), @@ -231,7 +224,7 @@ class ConvolutionDescriptor { void operator()(T* t) { if (t != nullptr) { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenDestroyConvolutionDescriptor(t)); + phi::dynload::miopenDestroyConvolutionDescriptor(t)); t = nullptr; } } @@ -239,7 +232,7 @@ class ConvolutionDescriptor { ConvolutionDescriptor() { T* raw_ptr; PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenCreateConvolutionDescriptor(&raw_ptr)); + phi::dynload::miopenCreateConvolutionDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } T* desc() { return desc_.get(); } @@ -251,21 +244,21 @@ class ConvolutionDescriptor { const std::vector& dilations, bool allow_tf32, const int groups = 1) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenInitConvolutionNdDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenInitConvolutionNdDescriptor( (miopenConvolutionDescriptor_t)desc_.get(), static_cast(pads.size()), const_cast(pads.data()), const_cast(strides.data()), const_cast(dilations.data()), miopenConvolution)); - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::miopenSetConvolutionGroupCount( - (miopenConvolutionDescriptor_t)desc_.get(), groups)); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetConvolutionGroupCount( + (miopenConvolutionDescriptor_t)desc_.get(), groups)); } private: std::unique_ptr desc_; }; -} // namespace platform -} // namespace paddle +} // namespace gpu +} // namespace backends +} // namespace phi diff --git a/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h b/paddle/phi/backends/gpu/rocm/miopen_helper.h similarity index 75% rename from paddle/fluid/platform/device/gpu/rocm/miopen_helper.h rename to paddle/phi/backends/gpu/rocm/miopen_helper.h index fb2392e6575..fc602d90fc6 100644 --- a/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h +++ b/paddle/phi/backends/gpu/rocm/miopen_helper.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,20 +17,24 @@ limitations under the License. */ #include #include -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/platform/device/gpu/gpu_types.h" -#include "paddle/fluid/platform/dynload/miopen.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/float16.h" -#include "paddle/fluid/platform/macros.h" +#include "paddle/phi/backends/dynload/miopen.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/float16.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/phi/core/errors.h" +#include "paddle/phi/core/macros.h" // MIOPEN do not have epslion definition #define CUDNN_BN_MIN_EPSILON 1e-05 DECLARE_bool(cudnn_deterministic); -namespace paddle { -namespace platform { +namespace phi { +namespace backends { +namespace gpu { + inline const char* miopenGetErrorString(miopenStatus_t status) { switch (status) { case miopenStatusSuccess: @@ -95,7 +99,7 @@ inline miopenPoolingMode_t GetPoolingMode(const PoolingMode& mode) { return miopenPoolingMax; default: PADDLE_THROW( - platform::errors::Unimplemented("Unexpected MIOPEN pooling mode.")); + phi::errors::Unimplemented("Unexpected MIOPEN pooling mode.")); } } @@ -115,7 +119,7 @@ inline ActivationMode StringToActivationMode(const std::string& str) { } else if (str == "bandpass") { return ActivationMode::kBandPass; } else { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unknown MIOPEN activation string: %s.", str)); } } @@ -124,7 +128,7 @@ template class CudnnDataType; template <> -class CudnnDataType { +class CudnnDataType { public: static const miopenDataType_t type = miopenHalf; // The scaling param type is float for HALF and FLOAT tensors @@ -141,7 +145,7 @@ class CudnnDataType { }; template <> -class CudnnDataType { +class CudnnDataType { public: static const miopenDataType_t type = miopenBFloat16; // The scaling param type is float for HALF and FLOAT tensors @@ -184,7 +188,7 @@ inline miopenTensorFormat_t GetCudnnTensorFormat(const DataLayout& order) { case DataLayout::kNDHWC: return MIOPEN_TENSOR_NHWC; default: - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "MIOPEN has no equivalent dataLayout for input order.")); } return MIOPEN_TENSOR_NCHW; @@ -193,10 +197,12 @@ inline miopenTensorFormat_t GetCudnnTensorFormat(const DataLayout& order) { class ScopedTensorDescriptor { public: ScopedTensorDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateTensorDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenCreateTensorDescriptor(&desc_)); } ~ScopedTensorDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyTensorDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenDestroyTensorDescriptor(desc_)); } inline miopenTensorDescriptor_t descriptor(const miopenTensorFormat_t format, @@ -217,19 +223,19 @@ class ScopedTensorDescriptor { } // MIOPEN ONLY support data layout of NCHW - PADDLE_ENFORCE_EQ(format, - MIOPEN_TENSOR_NCHW, - platform::errors::InvalidArgument( - "format should ONLY be NCHW in MIOPEN.")); + PADDLE_ENFORCE_EQ( + format, + MIOPEN_TENSOR_NCHW, + phi::errors::InvalidArgument("format should ONLY be NCHW in MIOPEN.")); if (dims.size() == 4) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( desc_, type, dims_with_group.size(), const_cast(dims_with_group.data()), const_cast(strides.data()))); } else if (dims.size() == 5) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( desc_, type, dims_with_group.size(), @@ -250,12 +256,12 @@ class ScopedTensorDescriptor { inline miopenTensorDescriptor_t descriptor(const miopenDataType_t miopen_type, const std::vector& dim, const std::vector& stride) { - PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenSetTensorDescriptor(desc_, - miopen_type, - dim.size(), - const_cast(dim.data()), - const_cast(stride.data()))); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( + desc_, + miopen_type, + dim.size(), + const_cast(dim.data()), + const_cast(stride.data()))); return desc_; } @@ -275,14 +281,16 @@ class ScopedTensorDescriptor { class ScopedDropoutDescriptor { public: ScopedDropoutDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateDropoutDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenCreateDropoutDescriptor(&desc_)); } ~ScopedDropoutDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyDropoutDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenDestroyDropoutDescriptor(desc_)); } inline miopenDropoutDescriptor_t descriptor(const miopenHandle_t& handle, - const platform::Place& place, + const phi::Place& place, bool initialized, float dropout_prob_, phi::DenseTensor* dropout_state_, @@ -290,42 +298,42 @@ class ScopedDropoutDescriptor { size_t state_size) { if (dropout_state_ == nullptr) { // for no dropout or test PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenSetDropoutDescriptor(desc_, - handle, - 0 /* dropout */, - nullptr, - 0 /* state_size */, - 0 /* seed */, - false, - false, - MIOPEN_RNG_PSEUDO_XORWOW)); + phi::dynload::miopenSetDropoutDescriptor(desc_, + handle, + 0 /* dropout */, + nullptr, + 0 /* state_size */, + 0 /* seed */, + false, + false, + MIOPEN_RNG_PSEUDO_XORWOW)); return desc_; } auto* dropout_state_data = dropout_state_->data(); if (!initialized) { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenSetDropoutDescriptor(desc_, - handle, - dropout_prob_, - dropout_state_data, - state_size, - seed, - false, - false, - MIOPEN_RNG_PSEUDO_XORWOW)); + phi::dynload::miopenSetDropoutDescriptor(desc_, + handle, + dropout_prob_, + dropout_state_data, + state_size, + seed, + false, + false, + MIOPEN_RNG_PSEUDO_XORWOW)); } else { auto dropout_state_dims = dropout_state_->dims(); state_size = dropout_state_dims[0]; - PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenRestoreDropoutDescriptor(desc_, - handle, - dropout_prob_, - dropout_state_data, - state_size, - 0, - false, - false, - MIOPEN_RNG_PSEUDO_XORWOW)); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenRestoreDropoutDescriptor( + desc_, + handle, + dropout_prob_, + dropout_state_data, + state_size, + 0, + false, + false, + MIOPEN_RNG_PSEUDO_XORWOW)); } return desc_; } @@ -339,10 +347,10 @@ class ScopedDropoutDescriptor { class ScopedRNNDescriptor { public: ScopedRNNDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateRNNDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenCreateRNNDescriptor(&desc_)); } ~ScopedRNNDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyRNNDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenDestroyRNNDescriptor(desc_)); } inline miopenRNNDescriptor_t desc() { return desc_; } @@ -355,10 +363,12 @@ class ScopedRNNDescriptor { class ScopedFilterDescriptor { public: ScopedFilterDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateTensorDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenCreateTensorDescriptor(&desc_)); } ~ScopedFilterDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyTensorDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenDestroyTensorDescriptor(desc_)); } inline miopenTensorDescriptor_t descriptor(const miopenTensorFormat_t format, @@ -379,7 +389,7 @@ class ScopedFilterDescriptor { for (int k = kernel_with_group.size() - 2; k >= 0; k--) { stride_dim[k] = stride_dim[k + 1] * kernel_with_group[k + 1]; } - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( desc_, type, kernel_with_group.size(), @@ -407,11 +417,11 @@ class ScopedConvolutionDescriptor { public: ScopedConvolutionDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenCreateConvolutionDescriptor(&desc_)); + phi::dynload::miopenCreateConvolutionDescriptor(&desc_)); } ~ScopedConvolutionDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenDestroyConvolutionDescriptor(desc_)); + phi::dynload::miopenDestroyConvolutionDescriptor(desc_)); } inline miopenConvolutionDescriptor_t descriptor( @@ -421,7 +431,7 @@ class ScopedConvolutionDescriptor { const std::vector& dilations) { PADDLE_ENFORCE_EQ(pads.size(), strides.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of pads and strides should be equal. But " "received size of pads is %d, size of strides is %d.", pads.size(), @@ -429,12 +439,12 @@ class ScopedConvolutionDescriptor { PADDLE_ENFORCE_EQ( pads.size(), dilations.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of pads and dilations should be equal. But received size " "of pads is %d, size of dilations is %d.", pads.size(), dilations.size())); - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenInitConvolutionNdDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenInitConvolutionNdDescriptor( desc_, pads.size(), const_cast(pads.data()), @@ -460,10 +470,12 @@ class ScopedConvolutionDescriptor { class ScopedPoolingDescriptor { public: ScopedPoolingDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreatePoolingDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenCreatePoolingDescriptor(&desc_)); } ~ScopedPoolingDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyPoolingDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenDestroyPoolingDescriptor(desc_)); } inline miopenPoolingDescriptor_t descriptor(const PoolingMode& mode, @@ -472,7 +484,7 @@ class ScopedPoolingDescriptor { const std::vector& strides) { PADDLE_ENFORCE_EQ(kernel.size(), pads.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of kernel and pads should be equal. But " "received size of kernel is %d, size of pads is %d.", kernel.size(), @@ -480,12 +492,12 @@ class ScopedPoolingDescriptor { PADDLE_ENFORCE_EQ( kernel.size(), strides.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of kernel and strides should be equal. But " "received size of kernel is %d, size of strides is %d.", kernel.size(), strides.size())); - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetNdPoolingDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetNdPoolingDescriptor( desc_, GetPoolingMode(mode), kernel.size(), @@ -504,11 +516,11 @@ class ScopedActivationDescriptor { public: ScopedActivationDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenCreateActivationDescriptor(&desc_)); + phi::dynload::miopenCreateActivationDescriptor(&desc_)); } ~ScopedActivationDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenDestroyActivationDescriptor(desc_)); + phi::dynload::miopenDestroyActivationDescriptor(desc_)); } template @@ -539,11 +551,11 @@ class ScopedActivationDescriptor { mode = miopenActivationTANH; break; default: - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unrecognized MIOPEN activation mode: %d.", static_cast(activation_mode))); } - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetActivationDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetActivationDescriptor( desc_, mode, relu_ceiling, 0.0, 0.0)); return desc_; } @@ -556,15 +568,17 @@ class ScopedActivationDescriptor { class ScopedCTCLossDescriptor { public: ScopedCTCLossDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateCTCLossDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenCreateCTCLossDescriptor(&desc_)); } ~ScopedCTCLossDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyCTCLossDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenDestroyCTCLossDescriptor(desc_)); } template inline miopenCTCLossDescriptor_t descriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetCTCLossDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetCTCLossDescriptor( desc_, CudnnDataType::type, 0, false)); return desc_; } @@ -574,5 +588,6 @@ class ScopedCTCLossDescriptor { DISABLE_COPY_AND_ASSIGN(ScopedCTCLossDescriptor); }; -} // namespace platform -} // namespace paddle +} // namespace gpu +} // namespace backends +} // namespace phi diff --git a/paddle/phi/kernels/funcs/cross_entropy.cu b/paddle/phi/kernels/funcs/cross_entropy.cu index 9f08214ef5a..add838106bf 100644 --- a/paddle/phi/kernels/funcs/cross_entropy.cu +++ b/paddle/phi/kernels/funcs/cross_entropy.cu @@ -14,9 +14,9 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/cross_entropy.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/math.h" diff --git a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu index afa73f0a571..0f4f39629e8 100644 --- a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu @@ -14,9 +14,9 @@ #include "paddle/fluid/operators/layout_utils.h" #include "paddle/fluid/operators/norm_utils.cu.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/flags.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" @@ -37,7 +37,7 @@ DECLARE_bool(cudnn_batchnorm_spatial_persistent); namespace phi { template -using CudnnDataType = paddle::platform::CudnnDataType; +using CudnnDataType = phi::backends::gpu::CudnnDataType; template using BatchNormParamType = typename CudnnDataType::BatchNormParamType; @@ -644,7 +644,7 @@ void BatchNormGradRawKernel(const Context &ctx, C, scale.dims()[0])); - auto dtype = paddle::platform::CudnnDataType::type; + auto dtype = phi::backends::gpu::CudnnDataType::type; #ifdef PADDLE_WITH_HIP auto compute_format = data_layout == DataLayout::kNHWC ? DataLayout::kNHWC : DataLayout::kNCHW; diff --git a/paddle/phi/kernels/gpu/batch_norm_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_kernel.cu index 126b5c81ab3..cd1665ba903 100644 --- a/paddle/phi/kernels/gpu/batch_norm_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_kernel.cu @@ -22,9 +22,9 @@ namespace cub = hipcub; #include "paddle/fluid/operators/layout_utils.h" #include "paddle/fluid/operators/norm_utils.cu.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/flags.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" @@ -45,7 +45,7 @@ DECLARE_bool(cudnn_batchnorm_spatial_persistent); namespace phi { template -using CudnnDataType = paddle::platform::CudnnDataType; +using CudnnDataType = phi::backends::gpu::CudnnDataType; template using BatchNormParamType = typename CudnnDataType::BatchNormParamType; @@ -603,7 +603,7 @@ void BatchNormKernel(const Context &ctx, int N, C, H, W, D; phi::funcs::ExtractNCWHD(x_dims, data_layout, &N, &C, &H, &W, &D); - auto dtype = paddle::platform::CudnnDataType::type; + auto dtype = phi::backends::gpu::CudnnDataType::type; #ifdef PADDLE_WITH_HIP auto compute_format = diff --git a/paddle/phi/kernels/gpu/cross_entropy_grad_kernel.cu b/paddle/phi/kernels/gpu/cross_entropy_grad_kernel.cu index 7bafa03aba5..934b0fe152b 100644 --- a/paddle/phi/kernels/gpu/cross_entropy_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/cross_entropy_grad_kernel.cu @@ -23,8 +23,8 @@ namespace cub = hipcub; #endif #include "paddle/fluid/operators/math/softmax.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" diff --git a/paddle/phi/kernels/gpu/cross_entropy_kernel.cu b/paddle/phi/kernels/gpu/cross_entropy_kernel.cu index b9a2b07e696..94d2d7a744c 100644 --- a/paddle/phi/kernels/gpu/cross_entropy_kernel.cu +++ b/paddle/phi/kernels/gpu/cross_entropy_kernel.cu @@ -23,8 +23,8 @@ namespace cub = hipcub; #endif #include "paddle/fluid/operators/math/softmax.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" @@ -772,10 +772,10 @@ static void SoftmaxWithCrossEntropySoftLabel(const GPUContext& dev_ctx, : MIOPEN_SOFTMAX_MODE_CHANNEL; PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxForward_V2( handle, - paddle::platform::CudnnDataType::kOne(), + phi::backends::gpu::CudnnDataType::kOne(), descp, logits_data, - paddle::platform::CudnnDataType::kZero(), + phi::backends::gpu::CudnnDataType::kZero(), descp, softmax_data, MIOPEN_SOFTMAX_LOG, @@ -787,10 +787,10 @@ static void SoftmaxWithCrossEntropySoftLabel(const GPUContext& dev_ctx, handle, CUDNN_SOFTMAX_LOG, mode, - paddle::platform::CudnnDataType::kOne(), + phi::backends::gpu::CudnnDataType::kOne(), descp, logits_data, - paddle::platform::CudnnDataType::kZero(), + phi::backends::gpu::CudnnDataType::kZero(), descp, softmax_data)); #endif @@ -1206,10 +1206,10 @@ static void SoftmaxWithCrossEntropyHardLabel(const GPUContext& dev_ctx, : MIOPEN_SOFTMAX_MODE_CHANNEL; PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxForward_V2( handle, - paddle::platform::CudnnDataType::kOne(), + phi::backends::gpu::CudnnDataType::kOne(), descp, logits_data, - paddle::platform::CudnnDataType::kZero(), + phi::backends::gpu::CudnnDataType::kZero(), descp, softmax_data, MIOPEN_SOFTMAX_LOG, @@ -1221,10 +1221,10 @@ static void SoftmaxWithCrossEntropyHardLabel(const GPUContext& dev_ctx, handle, CUDNN_SOFTMAX_LOG, mode, - paddle::platform::CudnnDataType::kOne(), + phi::backends::gpu::CudnnDataType::kOne(), descp, logits_data, - paddle::platform::CudnnDataType::kZero(), + phi::backends::gpu::CudnnDataType::kZero(), descp, softmax_data)); #endif diff --git a/paddle/phi/kernels/gpu/instance_norm_utils.h b/paddle/phi/kernels/gpu/instance_norm_utils.h index 50dfe4ad222..e52fe868c39 100644 --- a/paddle/phi/kernels/gpu/instance_norm_utils.h +++ b/paddle/phi/kernels/gpu/instance_norm_utils.h @@ -26,12 +26,12 @@ namespace cub = hipcub; #endif -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" namespace phi { template -using CudnnDataType = paddle::platform::CudnnDataType; +using CudnnDataType = phi::backends::gpu::CudnnDataType; template using BatchNormParamType = typename CudnnDataType::BatchNormParamType; diff --git a/paddle/phi/kernels/gpu/rnn_functor.h b/paddle/phi/kernels/gpu/rnn_functor.h index 14778dc1847..59c59889863 100644 --- a/paddle/phi/kernels/gpu/rnn_functor.h +++ b/paddle/phi/kernels/gpu/rnn_functor.h @@ -15,7 +15,7 @@ #pragma once #include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/dense_tensor.h" @@ -64,7 +64,7 @@ class RNNDescriptors { size_t *reserve_size, DenseTensor *dropout_state) { int numDirections = is_bidirec_ ? 2 : 1; - gpuDnnDataType_t cudnn_type = paddle::platform::CudnnDataType::type; + gpuDnnDataType_t cudnn_type = phi::backends::gpu::CudnnDataType::type; // ------------------- cudnn x, y descriptors --------------------- std::vector dims_x = {batch_size_, input_size_, 1}; std::vector strides_x = {input_size_, 1, 1}; @@ -179,7 +179,7 @@ class RNNDescriptors { phi::errors::InvalidArgument( "The cudnn rnn and setting weight size should be same.")); // ------------------- cudnn weight descriptors --------------------- - auto layout = paddle::platform::DataLayout::kNCHW; + auto layout = phi::backends::gpu::DataLayout::kNCHW; int dim_tmp = weights_size_ / sizeof(T); std::vector dim_w = {dim_tmp, 1, 1}; weight_desc_.descriptor(layout, dim_w); @@ -250,19 +250,19 @@ class RNNDescriptors { std::vector y_descs_; #endif - paddle::platform::ScopedTensorDescriptor x_desc_; - paddle::platform::ScopedTensorDescriptor y_desc_; + phi::backends::gpu::ScopedTensorDescriptor x_desc_; + phi::backends::gpu::ScopedTensorDescriptor y_desc_; #if defined(PADDLE_WITH_CUDA) && CUDNN_VERSION >= 7201 - paddle::platform::ScopedRNNTensorDescriptor x_seq_desc_; - paddle::platform::ScopedRNNTensorDescriptor y_seq_desc_; + phi::backends::gpu::ScopedRNNTensorDescriptor x_seq_desc_; + phi::backends::gpu::ScopedRNNTensorDescriptor y_seq_desc_; #endif - paddle::platform::ScopedTensorDescriptor init_h_desc_; - paddle::platform::ScopedTensorDescriptor init_c_desc_; - paddle::platform::ScopedTensorDescriptor last_h_desc_; - paddle::platform::ScopedTensorDescriptor last_c_desc_; - paddle::platform::ScopedDropoutDescriptor dropout_desc_; - paddle::platform::ScopedFilterDescriptor weight_desc_; - paddle::platform::ScopedRNNDescriptor rnn_desc_; + phi::backends::gpu::ScopedTensorDescriptor init_h_desc_; + phi::backends::gpu::ScopedTensorDescriptor init_c_desc_; + phi::backends::gpu::ScopedTensorDescriptor last_h_desc_; + phi::backends::gpu::ScopedTensorDescriptor last_c_desc_; + phi::backends::gpu::ScopedDropoutDescriptor dropout_desc_; + phi::backends::gpu::ScopedFilterDescriptor weight_desc_; + phi::backends::gpu::ScopedRNNDescriptor rnn_desc_; }; template diff --git a/paddle/phi/kernels/gpu/sync_batch_norm_utils.h b/paddle/phi/kernels/gpu/sync_batch_norm_utils.h index 72627345162..cfb2758e62d 100644 --- a/paddle/phi/kernels/gpu/sync_batch_norm_utils.h +++ b/paddle/phi/kernels/gpu/sync_batch_norm_utils.h @@ -31,15 +31,15 @@ namespace cub = hipcub; #include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h" #endif #include "paddle/fluid/memory/malloc.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/kernels/funcs/norm_utils.h" namespace phi { template -using CudnnDataType = paddle::platform::CudnnDataType; +using CudnnDataType = phi::backends::gpu::CudnnDataType; template using BatchNormParamType = typename CudnnDataType::BatchNormParamType; diff --git a/paddle/phi/kernels/gpudnn/affine_grid_grad_kernel.cu b/paddle/phi/kernels/gpudnn/affine_grid_grad_kernel.cu index d1cc738e2b0..6bcfd328aac 100644 --- a/paddle/phi/kernels/gpudnn/affine_grid_grad_kernel.cu +++ b/paddle/phi/kernels/gpudnn/affine_grid_grad_kernel.cu @@ -15,11 +15,11 @@ #ifndef PADDLE_WITH_HIP #include "paddle/phi/kernels/affine_grid_grad_kernel.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/common/int_array.h" #include "paddle/phi/core/kernel_registry.h" @@ -27,7 +27,7 @@ namespace phi { using ScopedSpatialTransformerDescriptor = - paddle::platform::ScopedSpatialTransformerDescriptor; + phi::backends::gpu::ScopedSpatialTransformerDescriptor; template void AffineGridGradCudnnKernel(const Context& dev_ctx, diff --git a/paddle/phi/kernels/gpudnn/affine_grid_kernel.cu b/paddle/phi/kernels/gpudnn/affine_grid_kernel.cu index 6c5d305abbf..2f1c4de3716 100644 --- a/paddle/phi/kernels/gpudnn/affine_grid_kernel.cu +++ b/paddle/phi/kernels/gpudnn/affine_grid_kernel.cu @@ -15,11 +15,11 @@ #ifndef PADDLE_WITH_HIP #include "paddle/phi/kernels/affine_grid_kernel.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/common/int_array.h" #include "paddle/phi/core/kernel_registry.h" @@ -27,7 +27,7 @@ namespace phi { using ScopedSpatialTransformerDescriptor = - paddle::platform::ScopedSpatialTransformerDescriptor; + phi::backends::gpu::ScopedSpatialTransformerDescriptor; template void AffineGridCudnnKernel(const Context& dev_ctx, diff --git a/paddle/phi/kernels/gpudnn/conv_cudnn_frontend.h b/paddle/phi/kernels/gpudnn/conv_cudnn_frontend.h index 368b5585b29..c5fe47a2431 100644 --- a/paddle/phi/kernels/gpudnn/conv_cudnn_frontend.h +++ b/paddle/phi/kernels/gpudnn/conv_cudnn_frontend.h @@ -17,8 +17,8 @@ limitations under the License. */ #include -#include "paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h" #include "paddle/phi/backends/dynload/cudnn_frontend.h" +#include "paddle/phi/backends/gpu/cuda/cudnn_desc.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/utils/data_type.h" @@ -86,7 +86,8 @@ class CudnnFrontendConvHelper { cudnnTensorFormat_t layout_format) { auto transformed_dims = phi::vectorize(tensor->dims()); if (layout_format == CUDNN_TENSOR_NHWC) { - transformed_dims = paddle::platform::TransformDimOrder(transformed_dims); + transformed_dims = + phi::backends::gpu::TransformDimOrder(transformed_dims); } std::vector strides = GenerateStrides(transformed_dims, layout_format); @@ -95,7 +96,7 @@ class CudnnFrontendConvHelper { .setStrides(strides.size(), strides.data()) .setId(id) .setAlignment(GetAlignment(tensor)) - .setDataType(paddle::platform::ToCudnnDataType(tensor->dtype())) + .setDataType(phi::backends::gpu::ToCudnnDataType(tensor->dtype())) .build(); } diff --git a/paddle/phi/kernels/gpudnn/conv_cudnn_v7.h b/paddle/phi/kernels/gpudnn/conv_cudnn_v7.h index e7d912a4825..cb03efc42bc 100644 --- a/paddle/phi/kernels/gpudnn/conv_cudnn_v7.h +++ b/paddle/phi/kernels/gpudnn/conv_cudnn_v7.h @@ -559,7 +559,7 @@ struct SearchAlgorithmBase { size_t workspace_size_limit = CalcWorkspaceLimitInBytes(UseFixedWorkspace()); auto workspace_handle = ctx.cudnn_workspace_handle(); - if (paddle::platform::CudnnDataType::type != CUDNN_DATA_HALF) { + if (phi::backends::gpu::CudnnDataType::type != CUDNN_DATA_HALF) { size_t max_workspace_size = GetMaxWorkspaceSize(args, workspace_size_limit); VLOG(3) << "max_workspace_size=" << ToMegaBytes(max_workspace_size) @@ -674,7 +674,7 @@ struct SearchAlgorithm : public SearchAlgorithmBase { bool enable_autotune = true) { SearchResult result; bool use_autotune = false; - auto dtype = paddle::platform::CudnnDataType::type; + auto dtype = phi::backends::gpu::CudnnDataType::type; SetConvMathType(ctx, dtype, args.cdesc); if (deterministic) { @@ -734,7 +734,7 @@ struct SearchAlgorithm : public SearchAlgorithmBase { static void SetConvMathType( const phi::GPUContext& ctx, cudnnDataType_t dtype, - const paddle::platform::ConvolutionDescriptor& cdesc) { + const phi::backends::gpu::ConvolutionDescriptor& cdesc) { #if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1) if (ctx.GetComputeCapability() >= 70 && dtype == CUDNN_DATA_HALF) { PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType( diff --git a/paddle/phi/kernels/gpudnn/conv_gpudnn_base.h b/paddle/phi/kernels/gpudnn/conv_gpudnn_base.h index 4353c6789de..ea0969544b5 100644 --- a/paddle/phi/kernels/gpudnn/conv_gpudnn_base.h +++ b/paddle/phi/kernels/gpudnn/conv_gpudnn_base.h @@ -20,6 +20,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/memory/memory.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/kernels/autotune/cache.h" #include "paddle/phi/kernels/funcs/eigen/common.h" @@ -28,11 +29,11 @@ limitations under the License. */ namespace phi { -using GPUDNNDataLayout = paddle::platform::DataLayout; +using GPUDNNDataLayout = phi::backends::gpu::DataLayout; template using ScalingParamType = - typename paddle::platform::CudnnDataType::ScalingParamType; + typename phi::backends::gpu::CudnnDataType::ScalingParamType; enum class ConvKind { kForward = 1, kBackwardData = 2, kBackwardFilter = 3 }; @@ -96,10 +97,10 @@ static std::ostream& operator<<(std::ostream& out, const std::vector& v) { template struct ConvArgsBase { HandleT handle; - paddle::platform::TensorDescriptor idesc; - paddle::platform::TensorDescriptor odesc; - paddle::platform::FilterDescriptor wdesc; - paddle::platform::ConvolutionDescriptor cdesc; + phi::backends::gpu::TensorDescriptor idesc; + phi::backends::gpu::TensorDescriptor odesc; + phi::backends::gpu::FilterDescriptor wdesc; + phi::backends::gpu::ConvolutionDescriptor cdesc; const phi::DenseTensor* x = nullptr; const phi::DenseTensor* w = nullptr; diff --git a/paddle/phi/kernels/gpudnn/conv_gpudnn_info.h b/paddle/phi/kernels/gpudnn/conv_gpudnn_info.h index e52c2210c82..9b7b35f4357 100644 --- a/paddle/phi/kernels/gpudnn/conv_gpudnn_info.h +++ b/paddle/phi/kernels/gpudnn/conv_gpudnn_info.h @@ -18,7 +18,7 @@ limitations under the License. */ #include #include -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" DECLARE_int64(conv_workspace_size_limit); DECLARE_bool(cudnn_exhaustive_search); diff --git a/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu b/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu index 5d1a92a3119..f1114e59bad 100644 --- a/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu +++ b/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu @@ -52,8 +52,8 @@ void ConvCudnnGradKernelImplV7( const std::vector& strides, const std::vector& padding_common, const std::vector& dilations, - paddle::platform::DataLayout compute_format, - paddle::platform::DataLayout layout, + phi::backends::gpu::DataLayout compute_format, + phi::backends::gpu::DataLayout layout, bool use_addto, bool exhaustive_search, bool deterministic, @@ -70,8 +70,8 @@ void ConvCudnnGradKernelImplV7( auto handle = ctx.cudnn_handle(); auto workspace_handle = ctx.cudnn_workspace_handle(); - auto dtype = paddle::platform::CudnnDataType::type; - auto layout_tensor = paddle::platform::GetCudnnTensorFormat(layout); + auto dtype = phi::backends::gpu::CudnnDataType::type; + auto layout_tensor = phi::backends::gpu::GetCudnnTensorFormat(layout); ConvArgs args1{handle, transformed_input_grad, @@ -96,16 +96,16 @@ void ConvCudnnGradKernelImplV7( int i_n, i_c, i_d, i_h, i_w; int o_n, o_c, o_d, o_h, o_w; - if (compute_format == paddle::platform::DataLayout::kNHWC) { + if (compute_format == phi::backends::gpu::DataLayout::kNHWC) { GetNCDHW(transformed_input->dims(), - paddle::platform::DataLayout::kNHWC, + phi::backends::gpu::DataLayout::kNHWC, &i_n, &i_c, &i_d, &i_h, &i_w); GetNCDHW(transformed_output_grad_channel->dims(), - paddle::platform::DataLayout::kNHWC, + phi::backends::gpu::DataLayout::kNHWC, &o_n, &o_c, &o_d, @@ -113,14 +113,14 @@ void ConvCudnnGradKernelImplV7( &o_w); } else { GetNCDHW(transformed_input->dims(), - paddle::platform::DataLayout::kNCHW, + phi::backends::gpu::DataLayout::kNCHW, &i_n, &i_c, &i_d, &i_h, &i_w); GetNCDHW(transformed_output_grad_channel->dims(), - paddle::platform::DataLayout::kNCHW, + phi::backends::gpu::DataLayout::kNCHW, &o_n, &o_c, &o_d, @@ -347,7 +347,7 @@ void ConvCudnnGradKernelImplV8( const std::vector& strides, const std::vector& padding_common, const std::vector& dilations, - paddle::platform::DataLayout layout, + phi::backends::gpu::DataLayout layout, bool use_addto, bool exhaustive_search, bool deterministic, @@ -363,8 +363,8 @@ void ConvCudnnGradKernelImplV8( cudnnHandle_t handle = const_cast(ctx.cudnn_handle()); auto workspace_handle = ctx.cudnn_workspace_handle(); - auto dtype = paddle::platform::CudnnDataType::type; - auto layout_format = paddle::platform::GetCudnnTensorFormat(layout); + auto dtype = phi::backends::gpu::CudnnDataType::type; + auto layout_format = phi::backends::gpu::GetCudnnTensorFormat(layout); if (input_grad) { CudnnConvBwdDataV8(transformed_output_grad_channel, @@ -449,11 +449,11 @@ void ConvCudnnGradKernel(const Context& ctx, const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); - auto dtype = paddle::platform::CudnnDataType::type; + auto dtype = phi::backends::gpu::CudnnDataType::type; #ifdef PADDLE_WITH_HIP // HIP MIOPEN ONLY SUPPORT NCHW format - auto compute_format = paddle::platform::DataLayout::kNCHW; + auto compute_format = phi::backends::gpu::DataLayout::kNCHW; #else #if CUDNN_VERSION_MIN(8, 1, 0) const bool compute_in_nhwc = @@ -463,13 +463,13 @@ void ConvCudnnGradKernel(const Context& ctx, const bool compute_in_nhwc = dtype == CUDNN_DATA_HALF && IsVoltaOrLater(ctx); #endif auto compute_format = compute_in_nhwc && channel_last - ? paddle::platform::DataLayout::kNHWC - : paddle::platform::DataLayout::kNCHW; + ? phi::backends::gpu::DataLayout::kNHWC + : phi::backends::gpu::DataLayout::kNCHW; #endif VLOG(3) << "Compute ConvGradOp with cuDNN:" << " data_format=" << data_format << " compute_format=" - << (compute_format == paddle::platform::DataLayout::kNHWC ? "NHWC" - : "NCHW"); + << (compute_format == phi::backends::gpu::DataLayout::kNHWC ? "NHWC" + : "NCHW"); // transform Tensor DenseTensor transformed_input_channel(input.type()); @@ -478,7 +478,7 @@ void ConvCudnnGradKernel(const Context& ctx, DenseTensor transformed_filter_channel(filter.type()); DenseTensor transformed_filter_grad_channel(filter.type()); - if (channel_last && compute_format == paddle::platform::DataLayout::kNCHW) { + if (channel_last && compute_format == phi::backends::gpu::DataLayout::kNCHW) { VLOG(3) << "Transform input, output_grad, input_grad and tensor from " "NHWC to NCHW."; ResizeToChannelFirst(ctx, &input, &transformed_input_channel); @@ -507,7 +507,7 @@ void ConvCudnnGradKernel(const Context& ctx, } } - if (compute_format == paddle::platform::DataLayout::kNHWC) { + if (compute_format == phi::backends::gpu::DataLayout::kNHWC) { VLOG(3) << "Transform filter and filter_grad tensor from NCHW to NHWC."; ResizeToChannelLast(ctx, &filter, &transformed_filter_channel); TransToChannelLast(ctx, &filter, &transformed_filter_channel); @@ -528,7 +528,7 @@ void ConvCudnnGradKernel(const Context& ctx, auto filter_dims = transformed_filter_channel.dims(); DDim in_data_dims; DDim filter_data_dims; - if (compute_format == paddle::platform::DataLayout::kNCHW) { + if (compute_format == phi::backends::gpu::DataLayout::kNCHW) { in_data_dims = slice_ddim(in_dims, 2, in_dims.size()); filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size()); } else { @@ -553,7 +553,7 @@ void ConvCudnnGradKernel(const Context& ctx, std::vector padding_diff(data_dim); std::vector new_input_shape_vec(data_dim + 2); new_input_shape_vec[0] = transformed_input_channel.dims()[0]; - if (compute_format == paddle::platform::DataLayout::kNCHW) { + if (compute_format == phi::backends::gpu::DataLayout::kNCHW) { new_input_shape_vec[1] = transformed_input_channel.dims()[1]; } else { new_input_shape_vec[data_dim + 1] = @@ -563,14 +563,14 @@ void ConvCudnnGradKernel(const Context& ctx, for (size_t i = 0; i < data_dim; ++i) { padding_diff[i] = std::abs(paddings[2 * i] - paddings[2 * i + 1]); padding_common[i] = std::min(paddings[2 * i], paddings[2 * i + 1]); - if (compute_format == paddle::platform::DataLayout::kNCHW) { + if (compute_format == phi::backends::gpu::DataLayout::kNCHW) { new_input_shape_vec[i + 2] = transformed_input_channel.dims()[i + 2] + padding_diff[i]; } else { new_input_shape_vec[i + 1] = transformed_input_channel.dims()[i + 1] + padding_diff[i]; } - if (compute_format == paddle::platform::DataLayout::kNCHW) { + if (compute_format == phi::backends::gpu::DataLayout::kNCHW) { input_pad[2 * i + 4] = paddings[2 * i] - padding_common[i]; input_pad[2 * i + 4 + 1] = paddings[2 * i + 1] - padding_common[i]; } else { @@ -624,15 +624,15 @@ void ConvCudnnGradKernel(const Context& ctx, } } } - paddle::platform::DataLayout layout = - compute_format == paddle::platform::DataLayout::kNHWC - ? paddle::platform::DataLayout::kNHWC - : paddle::platform::DataLayout::kNCHW; + phi::backends::gpu::DataLayout layout = + compute_format == phi::backends::gpu::DataLayout::kNHWC + ? phi::backends::gpu::DataLayout::kNHWC + : phi::backends::gpu::DataLayout::kNCHW; // TODO(phlrain): replace paddle::platform::DataLaytout to phi::DataLayout if (transformed_input.dims().size() == 5) { - layout = compute_format == paddle::platform::DataLayout::kNHWC - ? paddle::platform::DataLayout::kNDHWC - : paddle::platform::DataLayout::kNCDHW; + layout = compute_format == phi::backends::gpu::DataLayout::kNHWC + ? phi::backends::gpu::DataLayout::kNDHWC + : phi::backends::gpu::DataLayout::kNCDHW; } #ifdef PADDLE_WITH_CUDNN_FRONTEND @@ -717,14 +717,15 @@ void ConvCudnnGradKernel(const Context& ctx, } } - if (channel_last && compute_format == paddle::platform::DataLayout::kNCHW) { + if (channel_last && + compute_format == phi::backends::gpu::DataLayout::kNCHW) { TransToChannelLast( ctx, &transformed_input_grad_channel, input_grad); } } if (filter_grad) { - if (compute_format == paddle::platform::DataLayout::kNHWC) { + if (compute_format == phi::backends::gpu::DataLayout::kNHWC) { TransToChannelFirst( ctx, &transformed_filter_grad_channel, filter_grad); } @@ -1008,11 +1009,11 @@ void ConvCudnnGradGradKernel( c_group = groups; groups = 1; #endif - auto dtype = paddle::platform::CudnnDataType::type; + auto dtype = phi::backends::gpu::CudnnDataType::type; auto handle = ctx.cudnn_handle(); - auto layout = paddle::platform::GetCudnnTensorFormat( - paddle::platform::DataLayout::kNCHW); + auto layout = phi::backends::gpu::GetCudnnTensorFormat( + phi::backends::gpu::DataLayout::kNCHW); ConvArgs args1{handle, &transformed_ddX, @@ -1023,7 +1024,7 @@ void ConvCudnnGradGradKernel( dilations, dtype, groups, - paddle::platform::DataLayout::kNCHW}; + phi::backends::gpu::DataLayout::kNCHW}; ConvArgs args2{handle, &transformed_X, ddW, @@ -1033,7 +1034,7 @@ void ConvCudnnGradGradKernel( dilations, dtype, groups, - paddle::platform::DataLayout::kNCHW}; + phi::backends::gpu::DataLayout::kNCHW}; ConvArgs args3{handle, &transformed_ddX, dW, @@ -1043,7 +1044,7 @@ void ConvCudnnGradGradKernel( dilations, dtype, groups, - paddle::platform::DataLayout::kNCHW}; + phi::backends::gpu::DataLayout::kNCHW}; ConvArgs args4{handle, &transformed_dX, ddW, @@ -1053,7 +1054,7 @@ void ConvCudnnGradGradKernel( dilations, dtype, groups, - paddle::platform::DataLayout::kNCHW}; + phi::backends::gpu::DataLayout::kNCHW}; #ifdef PADDLE_WITH_HIP SearchResult fwd_result1; diff --git a/paddle/phi/kernels/gpudnn/conv_kernel.cu b/paddle/phi/kernels/gpudnn/conv_kernel.cu index 40440566531..04ac84d606e 100644 --- a/paddle/phi/kernels/gpudnn/conv_kernel.cu +++ b/paddle/phi/kernels/gpudnn/conv_kernel.cu @@ -50,8 +50,8 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input, const std::vector& strides, const std::vector& padding_common, const std::vector& dilations, - paddle::platform::DataLayout compute_format, - paddle::platform::DataLayout layout, + phi::backends::gpu::DataLayout compute_format, + phi::backends::gpu::DataLayout layout, bool exhaustive_search, bool deterministic, int groups, @@ -63,8 +63,8 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input, auto handle = ctx.cudnn_handle(); auto workspace_handle = ctx.cudnn_workspace_handle(); - auto layout_format = paddle::platform::GetCudnnTensorFormat(layout); - auto dtype = paddle::platform::CudnnDataType::type; + auto layout_format = phi::backends::gpu::GetCudnnTensorFormat(layout); + auto dtype = phi::backends::gpu::CudnnDataType::type; // ------------------- cudnn descriptors --------------------- ConvArgs args{handle, @@ -113,16 +113,16 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input, int i_n, i_c, i_d, i_h, i_w; int o_n, o_c, o_d, o_h, o_w; - if (compute_format == paddle::platform::DataLayout::kNHWC) { + if (compute_format == phi::backends::gpu::DataLayout::kNHWC) { GetNCDHW(transformed_input->dims(), - paddle::platform::DataLayout::kNHWC, + phi::backends::gpu::DataLayout::kNHWC, &i_n, &i_c, &i_d, &i_h, &i_w); GetNCDHW(transformed_output->dims(), - paddle::platform::DataLayout::kNHWC, + phi::backends::gpu::DataLayout::kNHWC, &o_n, &o_c, &o_d, @@ -130,14 +130,14 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input, &o_w); } else { GetNCDHW(transformed_input->dims(), - paddle::platform::DataLayout::kNCHW, + phi::backends::gpu::DataLayout::kNCHW, &i_n, &i_c, &i_d, &i_h, &i_w); GetNCDHW(transformed_output->dims(), - paddle::platform::DataLayout::kNCHW, + phi::backends::gpu::DataLayout::kNCHW, &o_n, &o_c, &o_d, @@ -227,7 +227,7 @@ void ConvCudnnKernelImplV8(const DenseTensor* input_tensor, const std::vector& strides, const std::vector& padding_common, const std::vector& dilations, - paddle::platform::DataLayout layout, + phi::backends::gpu::DataLayout layout, bool exhaustive_search, bool deterministic, int groups, @@ -247,8 +247,8 @@ void ConvCudnnKernelImplV8(const DenseTensor* input_tensor, cudnnHandle_t handle = const_cast(ctx.cudnn_handle()); auto workspace_handle = ctx.cudnn_workspace_handle(); - auto layout_format = paddle::platform::GetCudnnTensorFormat(layout); - auto dtype = paddle::platform::CudnnDataType::type; + auto layout_format = phi::backends::gpu::GetCudnnTensorFormat(layout); + auto dtype = phi::backends::gpu::CudnnDataType::type; float alpha = 1.0f; float beta = 0.0f; @@ -368,11 +368,11 @@ void ConvCudnnKernel(const Context& ctx, "FLAGS_cudnn_deterministic True at same time.")); const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); - auto dtype = paddle::platform::CudnnDataType::type; + auto dtype = phi::backends::gpu::CudnnDataType::type; #ifdef PADDLE_WITH_HIP // HIP MIOPEN ONLY SUPPORT NCHW format - auto compute_format = paddle::platform::DataLayout::kNCHW; + auto compute_format = phi::backends::gpu::DataLayout::kNCHW; #else #if CUDNN_VERSION_MIN(8, 1, 0) // Tensor Core introduced from Volta GPUs supports more faster conv op @@ -388,20 +388,20 @@ void ConvCudnnKernel(const Context& ctx, // We will only do data format conversion from NHWC to NCHW. // cudnn will convert NCHW to NHWC automatically on Tensor Core. auto compute_format = compute_in_nhwc && channel_last - ? paddle::platform::DataLayout::kNHWC - : paddle::platform::DataLayout::kNCHW; + ? phi::backends::gpu::DataLayout::kNHWC + : phi::backends::gpu::DataLayout::kNCHW; #endif VLOG(3) << "Compute ConvOp with cuDNN:" << " data_format=" << data_format << " compute_format=" - << (compute_format == paddle::platform::DataLayout::kNHWC ? "NHWC" - : "NCHW"); + << (compute_format == phi::backends::gpu::DataLayout::kNHWC ? "NHWC" + : "NCHW"); // ------------ transformed tensor ----------- DenseTensor transformed_input_channel(input.type()); DenseTensor transformed_output(output->type()); DenseTensor transformed_filter_channel(filter.type()); - if (channel_last && compute_format == paddle::platform::DataLayout::kNCHW) { + if (channel_last && compute_format == phi::backends::gpu::DataLayout::kNCHW) { VLOG(3) << "Transform input tensor from NHWC to NCHW."; ResizeToChannelFirst(ctx, &input, &transformed_input_channel); TransToChannelFirst(ctx, &input, &transformed_input_channel); @@ -412,7 +412,7 @@ void ConvCudnnKernel(const Context& ctx, transformed_input_channel.ShareDataWith(input); transformed_output.ShareDataWith(*output); } - if (compute_format == paddle::platform::DataLayout::kNHWC) { + if (compute_format == phi::backends::gpu::DataLayout::kNHWC) { VLOG(3) << "Transform filter tensor from NCHW to NHWC."; ResizeToChannelLast(ctx, &filter, &transformed_filter_channel); TransToChannelLast(ctx, &filter, &transformed_filter_channel); @@ -426,7 +426,7 @@ void ConvCudnnKernel(const Context& ctx, DDim in_data_dims; DDim filter_data_dims; - if (compute_format == paddle::platform::DataLayout::kNCHW) { + if (compute_format == phi::backends::gpu::DataLayout::kNCHW) { in_data_dims = slice_ddim(in_dims, 2, in_dims.size()); filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size()); } else { @@ -448,7 +448,7 @@ void ConvCudnnKernel(const Context& ctx, std::vector new_input_shape_vec(data_dim + 2); new_input_shape_vec[0] = transformed_input_channel.dims()[0]; - if (compute_format == paddle::platform::DataLayout::kNCHW) { + if (compute_format == phi::backends::gpu::DataLayout::kNCHW) { new_input_shape_vec[1] = transformed_input_channel.dims()[1]; } else { new_input_shape_vec[data_dim + 1] = @@ -459,14 +459,14 @@ void ConvCudnnKernel(const Context& ctx, for (size_t i = 0; i < data_dim; ++i) { padding_diff[i] = std::abs(paddings[2 * i] - paddings[2 * i + 1]); padding_common[i] = std::min(paddings[2 * i], paddings[2 * i + 1]); - if (compute_format == paddle::platform::DataLayout::kNCHW) { + if (compute_format == phi::backends::gpu::DataLayout::kNCHW) { new_input_shape_vec[i + 2] = transformed_input_channel.dims()[i + 2] + padding_diff[i]; } else { new_input_shape_vec[i + 1] = transformed_input_channel.dims()[i + 1] + padding_diff[i]; } - if (compute_format == paddle::platform::DataLayout::kNCHW) { + if (compute_format == phi::backends::gpu::DataLayout::kNCHW) { input_pad[2 * i + 4] = paddings[2 * i] - padding_common[i]; input_pad[2 * i + 4 + 1] = paddings[2 * i + 1] - padding_common[i]; } else { @@ -513,14 +513,14 @@ void ConvCudnnKernel(const Context& ctx, } } - paddle::platform::DataLayout layout = - compute_format == paddle::platform::DataLayout::kNHWC - ? paddle::platform::DataLayout::kNHWC - : paddle::platform::DataLayout::kNCHW; + phi::backends::gpu::DataLayout layout = + compute_format == phi::backends::gpu::DataLayout::kNHWC + ? phi::backends::gpu::DataLayout::kNHWC + : phi::backends::gpu::DataLayout::kNCHW; if (transformed_input.dims().size() == 5) { - layout = compute_format == paddle::platform::DataLayout::kNHWC - ? paddle::platform::DataLayout::kNDHWC - : paddle::platform::DataLayout::kNCDHW; + layout = compute_format == phi::backends::gpu::DataLayout::kNHWC + ? phi::backends::gpu::DataLayout::kNDHWC + : phi::backends::gpu::DataLayout::kNCDHW; } #ifdef PADDLE_WITH_CUDNN_FRONTEND @@ -564,7 +564,7 @@ void ConvCudnnKernel(const Context& ctx, &transformed_output); #endif - if (channel_last && compute_format == paddle::platform::DataLayout::kNCHW) { + if (channel_last && compute_format == phi::backends::gpu::DataLayout::kNCHW) { TransToChannelLast(ctx, &transformed_output, output); } } diff --git a/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu b/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu index 78961c86b07..ca980fd6f69 100644 --- a/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu +++ b/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu @@ -28,16 +28,16 @@ limitations under the License. */ #include "paddle/phi/kernels/transpose_kernel.h" #ifdef PADDLE_WITH_HIP -#include "paddle/fluid/platform/device/gpu/rocm/miopen_helper.h" +#include "paddle/phi/backends/gpu/rocm/miopen_helper.h" #include "paddle/phi/kernels/gpudnn/conv_miopen_helper.h" #else -#include "paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h" +#include "paddle/phi/backends/gpu/cuda/cudnn_helper.h" #include "paddle/phi/kernels/gpudnn/conv_cudnn_v7.h" #endif namespace phi { -using GPUDNNDataLayout = paddle::platform::DataLayout; +using GPUDNNDataLayout = phi::backends::gpu::DataLayout; template void ConvTransposeGradRawGPUDNNKernel(const Context& ctx, @@ -171,7 +171,7 @@ void ConvTransposeGradRawGPUDNNKernel(const Context& ctx, groups = 1; #endif - auto dtype = paddle::platform::CudnnDataType::type; + auto dtype = phi::backends::gpu::CudnnDataType::type; auto handle = ctx.cudnn_handle(); ConvArgs args1{handle, @@ -203,7 +203,7 @@ void ConvTransposeGradRawGPUDNNKernel(const Context& ctx, SearchResult filter_result; #endif - auto layout_tensor = paddle::platform::GetCudnnTensorFormat(layout); + auto layout_tensor = phi::backends::gpu::GetCudnnTensorFormat(layout); size_t workspace_size = 0; bool deterministic = FLAGS_cudnn_deterministic; T* dx_data = nullptr; @@ -616,10 +616,11 @@ void Conv2dTransposeDoubleGradGPUDNNKernel( c_group = groups; groups = 1; #endif - auto dtype = paddle::platform::CudnnDataType::type; + auto dtype = phi::backends::gpu::CudnnDataType::type; auto handle = ctx.cudnn_handle(); - auto layout = paddle::platform::GetCudnnTensorFormat(GPUDNNDataLayout::kNCHW); + auto layout = + phi::backends::gpu::GetCudnnTensorFormat(GPUDNNDataLayout::kNCHW); ConvArgs args1{handle, &transformed_ddout_channel, diff --git a/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu b/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu index 593114ac65f..f45cf9bb179 100644 --- a/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu +++ b/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu @@ -26,16 +26,16 @@ limitations under the License. */ #include "paddle/phi/kernels/transpose_kernel.h" #ifdef PADDLE_WITH_HIP -#include "paddle/fluid/platform/device/gpu/rocm/miopen_helper.h" +#include "paddle/phi/backends/gpu/rocm/miopen_helper.h" #include "paddle/phi/kernels/gpudnn/conv_miopen_helper.h" #else -#include "paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h" +#include "paddle/phi/backends/gpu/cuda/cudnn_helper.h" #include "paddle/phi/kernels/gpudnn/conv_cudnn_v7.h" #endif namespace phi { -using GPUDNNDataLayout = paddle::platform::DataLayout; +using GPUDNNDataLayout = phi::backends::gpu::DataLayout; template void ConvTransposeRawGPUDNNKernel(const Context& ctx, @@ -194,10 +194,10 @@ void ConvTransposeRawGPUDNNKernel(const Context& ctx, #endif // ------------------- cudnn conv algorithm --------------------- auto handle = ctx.cudnn_handle(); - auto layout_tensor = paddle::platform::GetCudnnTensorFormat(layout); + auto layout_tensor = phi::backends::gpu::GetCudnnTensorFormat(layout); bool deterministic = FLAGS_cudnn_deterministic; - auto dtype = paddle::platform::CudnnDataType::type; + auto dtype = phi::backends::gpu::CudnnDataType::type; // ------------------- cudnn descriptors --------------------- ConvArgs args{handle, &transformed_out, diff --git a/paddle/phi/kernels/gpudnn/pool_gpudnn.h b/paddle/phi/kernels/gpudnn/pool_gpudnn.h index 69fd51b7f0d..d830aad6b4f 100644 --- a/paddle/phi/kernels/gpudnn/pool_gpudnn.h +++ b/paddle/phi/kernels/gpudnn/pool_gpudnn.h @@ -16,18 +16,18 @@ limitations under the License. */ #include -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" namespace phi { -using GPUDNNDataLayout = paddle::platform::DataLayout; -using PoolingMode = paddle::platform::PoolingMode; -using ScopedPoolingDescriptor = paddle::platform::ScopedPoolingDescriptor; -using ScopedTensorDescriptor = paddle::platform::ScopedTensorDescriptor; +using GPUDNNDataLayout = phi::backends::gpu::DataLayout; +using PoolingMode = phi::backends::gpu::PoolingMode; +using ScopedPoolingDescriptor = phi::backends::gpu::ScopedPoolingDescriptor; +using ScopedTensorDescriptor = phi::backends::gpu::ScopedTensorDescriptor; template using ScalingParamType = - typename paddle::platform::CudnnDataType::ScalingParamType; + typename phi::backends::gpu::CudnnDataType::ScalingParamType; inline GPUDNNDataLayout GetLayoutFromStr(std::string data_format) { if (data_format == "NHWC") { diff --git a/paddle/phi/kernels/gpudnn/pool_grad_kernel.cu b/paddle/phi/kernels/gpudnn/pool_grad_kernel.cu index 0798eab5e18..2b2b61c5a37 100644 --- a/paddle/phi/kernels/gpudnn/pool_grad_kernel.cu +++ b/paddle/phi/kernels/gpudnn/pool_grad_kernel.cu @@ -14,8 +14,8 @@ limitations under the License. */ #include "paddle/phi/kernels/pool_grad_kernel.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/pooling.h" diff --git a/paddle/phi/kernels/gpudnn/pool_kernel.cu b/paddle/phi/kernels/gpudnn/pool_kernel.cu index 16139d48b23..7e0545551b0 100644 --- a/paddle/phi/kernels/gpudnn/pool_kernel.cu +++ b/paddle/phi/kernels/gpudnn/pool_kernel.cu @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/phi/kernels/pool_kernel.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/pooling.h" diff --git a/paddle/phi/kernels/gpudnn/softmax_gpudnn.h b/paddle/phi/kernels/gpudnn/softmax_gpudnn.h index a81357e99b5..8ba329301c9 100644 --- a/paddle/phi/kernels/gpudnn/softmax_gpudnn.h +++ b/paddle/phi/kernels/gpudnn/softmax_gpudnn.h @@ -24,8 +24,8 @@ limitations under the License. */ #include "paddle/phi/kernels/primitive/kernel_primitives.h" // See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #define MATRIX_SOFTMAX_ALIGN_BYTES 16 #define MATRIX_SOFTMAX_THREAHOLD 100000 @@ -55,8 +55,8 @@ limitations under the License. */ namespace phi { -using ScopedTensorDescriptor = paddle::platform::ScopedTensorDescriptor; -using GPUDNNDataLayout = paddle::platform::DataLayout; +using ScopedTensorDescriptor = phi::backends::gpu::ScopedTensorDescriptor; +using GPUDNNDataLayout = phi::backends::gpu::DataLayout; // Vectorization trait 4 * sizeof(T) template @@ -1065,10 +1065,10 @@ void SoftmaxForwardCudnnKernel(const GPUContext& dev_ctx, auto algo = log_mode ? MIOPEN_SOFTMAX_LOG : MIOPEN_SOFTMAX_ACCURATE; PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxForward_V2( handle, - paddle::platform::CudnnDataType::kOne(), + phi::backends::gpu::CudnnDataType::kOne(), desc, x_data, - paddle::platform::CudnnDataType::kZero(), + phi::backends::gpu::CudnnDataType::kZero(), desc, out_data, algo, @@ -1082,10 +1082,10 @@ void SoftmaxForwardCudnnKernel(const GPUContext& dev_ctx, handle, algo, mode, - paddle::platform::CudnnDataType::kOne(), + phi::backends::gpu::CudnnDataType::kOne(), desc, x_data, - paddle::platform::CudnnDataType::kZero(), + phi::backends::gpu::CudnnDataType::kZero(), desc, out_data)); #endif @@ -1137,12 +1137,12 @@ void SoftmaxBackwardCudnnKernel(const GPUContext& dev_ctx, auto algo = log_mode ? MIOPEN_SOFTMAX_LOG : MIOPEN_SOFTMAX_ACCURATE; PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxBackward_V2( handle, - paddle::platform::CudnnDataType::kOne(), + phi::backends::gpu::CudnnDataType::kOne(), desc, out_data, desc, dout_data, - paddle::platform::CudnnDataType::kZero(), + phi::backends::gpu::CudnnDataType::kZero(), desc, dx_data, algo, @@ -1156,12 +1156,12 @@ void SoftmaxBackwardCudnnKernel(const GPUContext& dev_ctx, handle, algo, mode, - paddle::platform::CudnnDataType::kOne(), + phi::backends::gpu::CudnnDataType::kOne(), desc, out_data, desc, dout_data, - paddle::platform::CudnnDataType::kZero(), + phi::backends::gpu::CudnnDataType::kZero(), desc, dx_data)); #endif -- GitLab