diff --git a/paddle/fluid/operators/cuda_graph_with_in_out.h b/paddle/fluid/operators/cuda_graph_with_in_out.h index a667c40234dcac08cd98b95d7b756902f737ea7e..40896c585c374eb76e754219adde024a5d154930 100644 --- a/paddle/fluid/operators/cuda_graph_with_in_out.h +++ b/paddle/fluid/operators/cuda_graph_with_in_out.h @@ -14,6 +14,7 @@ #pragma once +#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/platform/cuda_graph_with_memory_pool.h" diff --git a/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h b/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h index b2201c89295cab5ba9bb3eef068b023c6fa330dc..0325a0e585ed33bcdeda9544eb9ced64c5aba31f 100644 --- a/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h +++ b/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h @@ -54,8 +54,8 @@ struct BNStatsFinalizeArgs { cudnnDataType_t param_dtype; cudnnTensorFormat_t format; - platform::TensorDescriptor in_desc; - platform::TensorDescriptor out_desc; + phi::backends::gpu::TensorDescriptor in_desc; + phi::backends::gpu::TensorDescriptor out_desc; }; template diff --git a/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h b/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h index a5e210dc7fe3c5d6f41be719a237fc749345b194..bf0e06b825e4b2ea8c823863dce46bd5088c5473 100644 --- a/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h +++ b/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h @@ -163,11 +163,11 @@ struct NormConvolutionArgs { std::vector paddings; std::vector dilations; - platform::TensorDescriptor in_desc; - platform::FilterDescriptor filter_desc; - platform::TensorDescriptor out_desc; - platform::TensorDescriptor out_stats_desc; - platform::ConvolutionDescriptor conv_desc; + phi::backends::gpu::TensorDescriptor in_desc; + phi::backends::gpu::FilterDescriptor filter_desc; + phi::backends::gpu::TensorDescriptor out_desc; + phi::backends::gpu::TensorDescriptor out_stats_desc; + phi::backends::gpu::ConvolutionDescriptor conv_desc; bool is_support; }; diff --git a/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h b/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h index 188f767daf1c8aec59a14832ea2c09ae2ce5ce17..df79ed758dbc501451577407006e095d04834c3b 100644 --- a/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h +++ b/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h @@ -89,12 +89,12 @@ struct ScaleBiasAddReluArgs { cudnnDataType_t param_dtype; cudnnTensorFormat_t format; - platform::TensorDescriptor in_desc; - platform::TensorDescriptor out_desc; - platform::TensorDescriptor equiv_scale_bias_desc; - platform::TensorDescriptor scale_bias_mean_var_desc; - platform::TensorDescriptor bitmask_desc; - platform::ActivationDescriptor activation_desc; + phi::backends::gpu::TensorDescriptor in_desc; + phi::backends::gpu::TensorDescriptor out_desc; + phi::backends::gpu::TensorDescriptor equiv_scale_bias_desc; + phi::backends::gpu::TensorDescriptor scale_bias_mean_var_desc; + phi::backends::gpu::TensorDescriptor bitmask_desc; + phi::backends::gpu::ActivationDescriptor activation_desc; }; template diff --git a/paddle/fluid/platform/device/gpu/cuda/cudnn_helper_test.cc b/paddle/fluid/platform/device/gpu/cuda/cudnn_helper_test.cc index dae9eb5313c605ad7099163660b219418c0191d5..5b2e90b3291be5eafe133b8dfbef855a1c03a6cf 100644 --- a/paddle/fluid/platform/device/gpu/cuda/cudnn_helper_test.cc +++ b/paddle/fluid/platform/device/gpu/cuda/cudnn_helper_test.cc @@ -20,8 +20,8 @@ limitations under the License. */ #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" TEST(CudnnHelper, ScopedTensorDescriptor) { - using paddle::platform::DataLayout; - using paddle::platform::ScopedTensorDescriptor; + using phi::backends::gpu::DataLayout; + using phi::backends::gpu::ScopedTensorDescriptor; ScopedTensorDescriptor tensor_desc; std::vector shape = {2, 4, 6, 6}; @@ -31,7 +31,7 @@ TEST(CudnnHelper, ScopedTensorDescriptor) { int nd; std::vector dims(4); std::vector strides(4); - paddle::platform::dynload::cudnnGetTensorNdDescriptor( + phi::dynload::cudnnGetTensorNdDescriptor( desc, 4, &type, &nd, dims.data(), strides.data()); EXPECT_EQ(nd, 4); @@ -50,7 +50,7 @@ TEST(CudnnHelper, ScopedTensorDescriptor) { std::vector dims_5d(5); std::vector strides_5d(5); - paddle::platform::dynload::cudnnGetTensorNdDescriptor( + phi::dynload::cudnnGetTensorNdDescriptor( desc_5d, 5, &type, &nd, dims_5d.data(), strides_5d.data()); EXPECT_EQ(nd, 5); @@ -65,8 +65,8 @@ TEST(CudnnHelper, ScopedTensorDescriptor) { } TEST(CudnnHelper, ScopedFilterDescriptor) { - using paddle::platform::DataLayout; - using paddle::platform::ScopedFilterDescriptor; + using phi::backends::gpu::DataLayout; + using phi::backends::gpu::ScopedFilterDescriptor; ScopedFilterDescriptor filter_desc; std::vector shape = {2, 3, 3}; @@ -76,7 +76,7 @@ TEST(CudnnHelper, ScopedFilterDescriptor) { int nd; cudnnTensorFormat_t format; std::vector kernel(3); - paddle::platform::dynload::cudnnGetFilterNdDescriptor( + phi::dynload::cudnnGetFilterNdDescriptor( desc, 3, &type, &format, &nd, kernel.data()); EXPECT_EQ(GetCudnnTensorFormat(DataLayout::kNCHW), format); @@ -90,7 +90,7 @@ TEST(CudnnHelper, ScopedFilterDescriptor) { auto desc_4d = filter_desc.descriptor(DataLayout::kNCDHW, shape_4d); std::vector kernel_4d(4); - paddle::platform::dynload::cudnnGetFilterNdDescriptor( + phi::dynload::cudnnGetFilterNdDescriptor( desc_4d, 4, &type, &format, &nd, kernel_4d.data()); EXPECT_EQ(GetCudnnTensorFormat(DataLayout::kNCHW), format); @@ -101,7 +101,7 @@ TEST(CudnnHelper, ScopedFilterDescriptor) { } TEST(CudnnHelper, ScopedConvolutionDescriptor) { - using paddle::platform::ScopedConvolutionDescriptor; + using phi::backends::gpu::ScopedConvolutionDescriptor; ScopedConvolutionDescriptor conv_desc; std::vector src_pads = {2, 2, 2}; @@ -115,14 +115,14 @@ TEST(CudnnHelper, ScopedConvolutionDescriptor) { std::vector pads(3); std::vector strides(3); std::vector dilations(3); - paddle::platform::dynload::cudnnGetConvolutionNdDescriptor(desc, - 3, - &nd, - pads.data(), - strides.data(), - dilations.data(), - &mode, - &type); + phi::dynload::cudnnGetConvolutionNdDescriptor(desc, + 3, + &nd, + pads.data(), + strides.data(), + dilations.data(), + &mode, + &type); EXPECT_EQ(nd, 3); for (size_t i = 0; i < src_pads.size(); ++i) { @@ -134,8 +134,8 @@ TEST(CudnnHelper, ScopedConvolutionDescriptor) { } TEST(CudnnHelper, ScopedPoolingDescriptor) { - using paddle::platform::PoolingMode; - using paddle::platform::ScopedPoolingDescriptor; + using phi::backends::gpu::PoolingMode; + using phi::backends::gpu::ScopedPoolingDescriptor; ScopedPoolingDescriptor pool_desc; std::vector src_kernel = {2, 2, 5}; @@ -150,7 +150,7 @@ TEST(CudnnHelper, ScopedPoolingDescriptor) { std::vector kernel(3); std::vector pads(3); std::vector strides(3); - paddle::platform::dynload::cudnnGetPoolingNdDescriptor( + phi::dynload::cudnnGetPoolingNdDescriptor( desc, 3, &mode, &nan_t, &nd, kernel.data(), pads.data(), strides.data()); EXPECT_EQ(nd, 3); diff --git a/paddle/fluid/platform/device/gpu/cudnn_desc_test.cc b/paddle/fluid/platform/device/gpu/cudnn_desc_test.cc index cbe322ef0c48c192b0cd763e111e1978cff8effe..e3b07d11324649db1570dad067856d1c72e88dbd 100644 --- a/paddle/fluid/platform/device/gpu/cudnn_desc_test.cc +++ b/paddle/fluid/platform/device/gpu/cudnn_desc_test.cc @@ -20,20 +20,22 @@ namespace paddle { namespace platform { TEST(TensorDescriptor, Empty) { - ActivationDescriptor a; - TensorDescriptor t; - TensorDescriptor t1; - TensorDescriptor *t11 = new TensorDescriptor(); + phi::backends::gpu::ActivationDescriptor a; + phi::backends::gpu::TensorDescriptor t; + phi::backends::gpu::TensorDescriptor t1; + phi::backends::gpu::TensorDescriptor *t11 = + new phi::backends::gpu::TensorDescriptor(); delete t11; - std::unique_ptr tt(new TensorDescriptor()); + std::unique_ptr tt( + new phi::backends::gpu::TensorDescriptor()); } TEST(TensorDescriptor, Normal) { phi::DenseTensor tt; tt.Resize({2, 3, 4}); - tt.mutable_data(platform::CPUPlace()); + tt.mutable_data(phi::CPUPlace()); - TensorDescriptor desc; + phi::backends::gpu::TensorDescriptor desc; desc.set(tt); EXPECT_TRUE(desc.desc() != nullptr); } diff --git a/paddle/fluid/platform/device/gpu/gpu_dnn.h b/paddle/fluid/platform/device/gpu/gpu_dnn.h index 3f9bc5e6de80b5d05261863b0557f863fe223943..f6f6392c4c23ddc675626225f0eea88ae44620cf 100644 --- a/paddle/fluid/platform/device/gpu/gpu_dnn.h +++ b/paddle/fluid/platform/device/gpu/gpu_dnn.h @@ -14,14 +14,34 @@ #pragma once +#include "paddle/phi/backends/gpu/gpu_dnn.h" + #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#ifdef PADDLE_WITH_HIP -#include "paddle/fluid/platform/device/gpu/rocm/miopen_desc.h" -#include "paddle/fluid/platform/device/gpu/rocm/miopen_helper.h" -#else // CUDA -#include "paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h" -#include "paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h" +namespace paddle { +namespace platform { + +using DataLayout = phi::backends::gpu::DataLayout; +using PoolingMode = phi::backends::gpu::PoolingMode; +template +using CudnnDataType = phi::backends::gpu::CudnnDataType; +using ScopedTensorDescriptor = phi::backends::gpu::ScopedTensorDescriptor; +using ScopedDropoutDescriptor = phi::backends::gpu::ScopedDropoutDescriptor; +using ScopedRNNDescriptor = phi::backends::gpu::ScopedRNNDescriptor; +using ScopedFilterDescriptor = phi::backends::gpu::ScopedFilterDescriptor; +using ScopedConvolutionDescriptor = + phi::backends::gpu::ScopedConvolutionDescriptor; +using ScopedPoolingDescriptor = phi::backends::gpu::ScopedPoolingDescriptor; +using ScopedActivationDescriptor = + phi::backends::gpu::ScopedActivationDescriptor; + +#if defined(PADDLE_WITH_CUDA) +using ScopedRNNTensorDescriptor = phi::backends::gpu::ScopedRNNTensorDescriptor; +using ScopedSpatialTransformerDescriptor = + phi::backends::gpu::ScopedSpatialTransformerDescriptor; #endif +} // namespace platform +} // namespace paddle + #endif diff --git a/paddle/fluid/platform/device/gpu/rocm/miopen_helper_test.cc b/paddle/fluid/platform/device/gpu/rocm/miopen_helper_test.cc index 1a5c46a065edd88b33f051e39a62d8a915a24418..0f73aefdd4255b8168c211c98c1cc0309dd13ce8 100644 --- a/paddle/fluid/platform/device/gpu/rocm/miopen_helper_test.cc +++ b/paddle/fluid/platform/device/gpu/rocm/miopen_helper_test.cc @@ -20,8 +20,8 @@ limitations under the License. */ #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" TEST(MIOpenHelper, ScopedTensorDescriptor) { - using paddle::platform::DataLayout; - using paddle::platform::ScopedTensorDescriptor; + using phi::backends::gpu::DataLayout; + using phi::backends::gpu::ScopedTensorDescriptor; ScopedTensorDescriptor tensor_desc; std::vector shape = {2, 4, 6, 6}; @@ -31,9 +31,9 @@ TEST(MIOpenHelper, ScopedTensorDescriptor) { int nd; std::vector dims(4); std::vector strides(4); - paddle::platform::dynload::miopenGetTensorDescriptor( + phi::dynload::miopenGetTensorDescriptor( desc, &type, dims.data(), strides.data()); - paddle::platform::dynload::miopenGetTensorDescriptorSize(desc, &nd); + phi::dynload::miopenGetTensorDescriptorSize(desc, &nd); EXPECT_EQ(nd, 4); for (size_t i = 0; i < dims.size(); ++i) { @@ -51,9 +51,9 @@ TEST(MIOpenHelper, ScopedTensorDescriptor) { std::vector dims_5d(5); std::vector strides_5d(5); - paddle::platform::dynload::miopenGetTensorDescriptor( + phi::dynload::miopenGetTensorDescriptor( desc_5d, &type, dims_5d.data(), strides_5d.data()); - paddle::platform::dynload::miopenGetTensorDescriptorSize(desc_5d, &nd); + phi::dynload::miopenGetTensorDescriptorSize(desc_5d, &nd); EXPECT_EQ(nd, 5); for (size_t i = 0; i < dims_5d.size(); ++i) { @@ -67,7 +67,7 @@ TEST(MIOpenHelper, ScopedTensorDescriptor) { } TEST(MIOpenHelper, ScopedConvolutionDescriptor) { - using paddle::platform::ScopedConvolutionDescriptor; + using phi::backends::gpu::ScopedConvolutionDescriptor; ScopedConvolutionDescriptor conv_desc; std::vector src_pads = {2, 2, 2}; @@ -80,7 +80,7 @@ TEST(MIOpenHelper, ScopedConvolutionDescriptor) { std::vector pads(3); std::vector strides(3); std::vector dilations(3); - paddle::platform::dynload::miopenGetConvolutionNdDescriptor( + phi::dynload::miopenGetConvolutionNdDescriptor( desc, 3, &nd, pads.data(), strides.data(), dilations.data(), &mode); EXPECT_EQ(nd, 3); diff --git a/paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h b/paddle/phi/backends/gpu/cuda/cudnn_desc.h similarity index 69% rename from paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h rename to paddle/phi/backends/gpu/cuda/cudnn_desc.h index 677dc49cce4b629ef5e87a2788122a2ca2cbc4d6..d4fb6930bcc5507952a521e6b3d1c7bfc628777f 100644 --- a/paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h +++ b/paddle/phi/backends/gpu/cuda/cudnn_desc.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,22 +23,12 @@ #include #include -#include "paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h" -#include "paddle/fluid/platform/device_context.h" +#include "paddle/phi/backends/gpu/cuda/cudnn_helper.h" #include "paddle/phi/core/utils/data_type.h" namespace phi { -class DenseTensor; -} // namespace phi - -namespace paddle { -namespace platform { - -template -inline cudnnDataType_t ToCudnnDataType(const T& t) { - auto type = framework::ToDataType(t); - return ToCudnnDataType(phi::TransToPhiDataType(type)); -} +namespace backends { +namespace gpu { template inline std::vector TransformDimOrder(const std::vector& dims) { @@ -67,7 +57,6 @@ inline std::vector TransformDimOrder(const std::vector& dims) { return transformed_dims; } -template <> inline cudnnDataType_t ToCudnnDataType(const phi::DataType& t) { cudnnDataType_t type = CUDNN_DATA_FLOAT; switch (t) { @@ -98,7 +87,7 @@ class ActivationDescriptor { void operator()(T* t) { if (t != nullptr) { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnDestroyActivationDescriptor(t)); + phi::dynload::cudnnDestroyActivationDescriptor(t)); t = nullptr; } } @@ -106,12 +95,12 @@ class ActivationDescriptor { ActivationDescriptor() { T* raw_ptr; PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnCreateActivationDescriptor(&raw_ptr)); + phi::dynload::cudnnCreateActivationDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } template void set(cudnnActivationMode_t mode, const T& coef) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetActivationDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetActivationDescriptor( desc_.get(), mode, CUDNN_NOT_PROPAGATE_NAN, static_cast(coef))); } @@ -128,14 +117,16 @@ class TensorDescriptor { struct Deleter { void operator()(T* t) { if (t != nullptr) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyTensorDescriptor(t)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnDestroyTensorDescriptor(t)); t = nullptr; } } }; TensorDescriptor() { T* raw_ptr; - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateTensorDescriptor(&raw_ptr)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnCreateTensorDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } T* desc() { return desc_.get(); } @@ -151,12 +142,12 @@ class TensorDescriptor { if (groups > 1) { dims_with_group[1] = dims_with_group[1] / groups; } - PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetTensorNdDescriptor(desc_.get(), - ToCudnnDataType(tensor.dtype()), - dims_with_group.size(), - dims_with_group.data(), - strides.data())); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor( + desc_.get(), + ToCudnnDataType(tensor.dtype()), + dims_with_group.size(), + dims_with_group.data(), + strides.data())); } void set(const std::vector& dims, @@ -169,11 +160,11 @@ class TensorDescriptor { transformed_dims = dims; } PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetTensorNdDescriptorEx(desc_.get(), - format, - dtype, - transformed_dims.size(), - transformed_dims.data())); + phi::dynload::cudnnSetTensorNdDescriptorEx(desc_.get(), + format, + dtype, + transformed_dims.size(), + transformed_dims.data())); } void set(const phi::DenseTensor& tensor, const cudnnTensorFormat_t format) { @@ -192,14 +183,16 @@ class FilterDescriptor { struct Deleter { void operator()(T* t) { if (t != nullptr) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyFilterDescriptor(t)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnDestroyFilterDescriptor(t)); t = nullptr; } } }; FilterDescriptor() { T* raw_ptr; - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateFilterDescriptor(&raw_ptr)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnCreateFilterDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } T* desc() { return desc_.get(); } @@ -219,11 +212,11 @@ class FilterDescriptor { transformed_dims[1] = transformed_dims[1] / groups; } PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetFilterNdDescriptor(desc_.get(), - dtype, - format, - transformed_dims.size(), - transformed_dims.data())); + phi::dynload::cudnnSetFilterNdDescriptor(desc_.get(), + dtype, + format, + transformed_dims.size(), + transformed_dims.data())); } void set(const phi::DenseTensor& tensor, @@ -245,7 +238,7 @@ class ConvolutionDescriptor { void operator()(T* t) { if (t != nullptr) { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnDestroyConvolutionDescriptor(t)); + phi::dynload::cudnnDestroyConvolutionDescriptor(t)); t = nullptr; } } @@ -253,7 +246,7 @@ class ConvolutionDescriptor { ConvolutionDescriptor() { T* raw_ptr; PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnCreateConvolutionDescriptor(&raw_ptr)); + phi::dynload::cudnnCreateConvolutionDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } T* desc() { return desc_.get(); } @@ -270,31 +263,31 @@ class ConvolutionDescriptor { (dtype == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT; T* desc = desc_.get(); PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetConvolutionNdDescriptor(desc, - pads.size(), - pads.data(), - strides.data(), - dilations.data(), - CUDNN_CROSS_CORRELATION, - compute_type)); + phi::dynload::cudnnSetConvolutionNdDescriptor(desc, + pads.size(), + pads.data(), + strides.data(), + dilations.data(), + CUDNN_CROSS_CORRELATION, + compute_type)); #if CUDNN_VERSION_MIN(7, 0, 1) PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnSetConvolutionGroupCount(desc, groups)); + phi::dynload::cudnnSetConvolutionGroupCount(desc, groups)); #if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1) - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnSetConvolutionMathType( - desc, CUDNN_DEFAULT_MATH)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnSetConvolutionMathType(desc, CUDNN_DEFAULT_MATH)); if (dtype == CUDNN_DATA_HALF) { - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnSetConvolutionMathType( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType( desc, CUDNN_TENSOR_OP_MATH)); #if CUDA_VERSION >= 11000 #if CUDNN_VERSION_MIN(8, 1, 0) } else if (dtype == CUDNN_DATA_BFLOAT16) { - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnSetConvolutionMathType( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType( desc, CUDNN_TENSOR_OP_MATH)); #endif // CUDNN_VERSION_MIN(8,1,0) } else if (dtype == CUDNN_DATA_FLOAT && !allow_tf32) { PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cudnnSetConvolutionMathType(desc, CUDNN_FMA_MATH)); + phi::dynload::cudnnSetConvolutionMathType(desc, CUDNN_FMA_MATH)); #endif // CUDA_VERSION >= 11000 } #endif @@ -307,5 +300,6 @@ class ConvolutionDescriptor { std::unique_ptr desc_; }; -} // namespace platform -} // namespace paddle +} // namespace gpu +} // namespace backends +} // namespace phi diff --git a/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h b/paddle/phi/backends/gpu/cuda/cudnn_helper.h similarity index 74% rename from paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h rename to paddle/phi/backends/gpu/cuda/cudnn_helper.h index 7181fc34f88caee1d185d526d151840cd1c53328..670c1f43c82debcb0643251ec3959b3fea04cb3d 100644 --- a/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h +++ b/paddle/phi/backends/gpu/cuda/cudnn_helper.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,16 +17,20 @@ limitations under the License. */ #include #include -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/platform/dynload/cudnn.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/float16.h" -#include "paddle/fluid/platform/macros.h" +#include "paddle/phi/backends/dynload/cudnn.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/float16.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/phi/core/errors.h" +#include "paddle/phi/core/macros.h" DECLARE_bool(cudnn_deterministic); -namespace paddle { -namespace platform { +namespace phi { +namespace backends { +namespace gpu { #define CUDNN_VERSION_MIN(major, minor, patch) \ (CUDNN_VERSION >= ((major)*1000 + (minor)*100 + (patch))) @@ -68,7 +72,7 @@ inline cudnnPoolingMode_t GetPoolingMode(const PoolingMode& mode) { return CUDNN_POOLING_MAX; default: PADDLE_THROW( - platform::errors::Unimplemented("Unexpected CUDNN pooling mode.")); + phi::errors::Unimplemented("Unexpected CUDNN pooling mode.")); } } @@ -88,7 +92,7 @@ inline ActivationMode StringToActivationMode(const std::string& str) { } else if (str == "bandpass") { return ActivationMode::kBandPass; } else { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unknown CUDNN activation string: %s.", str)); } } @@ -99,7 +103,7 @@ class CudnnDataType; // CUDNN_DATA_BFLOAT16 is not valid before cudnn8.1 #if CUDNN_VERSION_MIN(8, 1, 0) template <> -class CudnnDataType { +class CudnnDataType { public: static const cudnnDataType_t type = CUDNN_DATA_BFLOAT16; using ScalingParamType = const float; @@ -116,7 +120,7 @@ class CudnnDataType { #endif template <> -class CudnnDataType { +class CudnnDataType { public: static const cudnnDataType_t type = CUDNN_DATA_HALF; // The scaling param type is float for HALF and FLOAT tensors @@ -176,7 +180,7 @@ inline cudnnTensorFormat_t GetCudnnTensorFormat( case DataLayout::kNDHWC: return CUDNN_TENSOR_NHWC; // add, liyamei default: - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "CUDNN has no equivalent dataLayout for input order.")); } return CUDNN_TENSOR_NCHW; @@ -185,10 +189,12 @@ inline cudnnTensorFormat_t GetCudnnTensorFormat( class ScopedTensorDescriptor { public: ScopedTensorDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateTensorDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnCreateTensorDescriptor(&desc_)); } ~ScopedTensorDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyTensorDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnDestroyTensorDescriptor(desc_)); } inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format, @@ -211,25 +217,25 @@ class ScopedTensorDescriptor { if (dims.size() == 4) { if (format == CUDNN_TENSOR_NCHW) { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetTensorNdDescriptor(desc_, - type, - dims_with_group.size(), - dims_with_group.data(), - strides.data())); + phi::dynload::cudnnSetTensorNdDescriptor(desc_, + type, + dims_with_group.size(), + dims_with_group.data(), + strides.data())); } else { // CUDNN_TENSOR_NHWC - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensor4dDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensor4dDescriptor( desc_, format, type, dims[0], dims[3], dims[1], dims[2])); } } else if (dims.size() == 5) { if (format == CUDNN_TENSOR_NCHW) { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetTensorNdDescriptor(desc_, - type, - dims_with_group.size(), - dims_with_group.data(), - strides.data())); + phi::dynload::cudnnSetTensorNdDescriptor(desc_, + type, + dims_with_group.size(), + dims_with_group.data(), + strides.data())); } else { // CUDNN_TENSOR_NHWC - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensorNdDescriptorEx( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptorEx( desc_, format, type, dims.size(), dims.data())); } } @@ -247,7 +253,7 @@ class ScopedTensorDescriptor { inline cudnnTensorDescriptor_t descriptor(const cudnnDataType_t cudnn_type, const std::vector& dim, const std::vector& stride) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensorNdDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor( desc_, cudnn_type, dim.size(), dim.data(), stride.data())); return desc_; } @@ -269,11 +275,13 @@ class ScopedTensorDescriptor { class ScopedRNNTensorDescriptor { public: ScopedRNNTensorDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateRNNDataDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnCreateRNNDataDescriptor(&desc_)); } ~ScopedRNNTensorDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyRNNDataDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnDestroyRNNDataDescriptor(desc_)); } inline cudnnRNNDataDescriptor_t descriptor( @@ -292,15 +300,15 @@ class ScopedRNNTensorDescriptor { layout = CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED; } - PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetRNNDataDescriptor(desc_, - cudnn_type, - layout, - max_seq_length, - batch_size, - input_size, - seq_length.data(), - static_cast(&padding_fill))); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetRNNDataDescriptor( + desc_, + cudnn_type, + layout, + max_seq_length, + batch_size, + input_size, + seq_length.data(), + static_cast(&padding_fill))); return desc_; } @@ -331,14 +339,16 @@ class ScopedRNNTensorDescriptor { class ScopedDropoutDescriptor { public: ScopedDropoutDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateDropoutDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnCreateDropoutDescriptor(&desc_)); } ~ScopedDropoutDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyDropoutDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnDestroyDropoutDescriptor(desc_)); } inline cudnnDropoutDescriptor_t descriptor(const cudnnHandle_t& handle, - const platform::Place& place, + const phi::Place& place, bool initialized, float dropout_prob_, phi::DenseTensor* dropout_state_, @@ -346,22 +356,22 @@ class ScopedDropoutDescriptor { size_t state_size) { if (dropout_state_ == nullptr) { // for no dropout or test PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetDropoutDescriptor(desc_, - handle, - 0 /* dropout */, - nullptr, - 0 /* state_size */, - 0 /* seed */)); + phi::dynload::cudnnSetDropoutDescriptor(desc_, + handle, + 0 /* dropout */, + nullptr, + 0 /* state_size */, + 0 /* seed */)); return desc_; } auto* dropout_state_data = dropout_state_->data(); if (!initialized) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetDropoutDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetDropoutDescriptor( desc_, handle, dropout_prob_, dropout_state_data, state_size, seed)); } else { auto dropout_state_dims = dropout_state_->dims(); state_size = dropout_state_dims[0]; - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnRestoreDropoutDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnRestoreDropoutDescriptor( desc_, handle, dropout_prob_, dropout_state_data, state_size, 0)); } return desc_; @@ -376,10 +386,10 @@ class ScopedDropoutDescriptor { class ScopedRNNDescriptor { public: ScopedRNNDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateRNNDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnCreateRNNDescriptor(&desc_)); } ~ScopedRNNDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyRNNDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnDestroyRNNDescriptor(desc_)); } inline cudnnRNNDescriptor_t desc() { return desc_; } @@ -392,10 +402,12 @@ class ScopedRNNDescriptor { class ScopedFilterDescriptor { public: ScopedFilterDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateFilterDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnCreateFilterDescriptor(&desc_)); } ~ScopedFilterDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyFilterDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnDestroyFilterDescriptor(desc_)); } inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format, @@ -412,11 +424,11 @@ class ScopedFilterDescriptor { // NOTE: input filter(C) of the filter is already asserted to be C/groups. } PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetFilterNdDescriptor(desc_, - type, - format, - kernel_with_group.size(), - kernel_with_group.data())); + phi::dynload::cudnnSetFilterNdDescriptor(desc_, + type, + format, + kernel_with_group.size(), + kernel_with_group.data())); return desc_; } @@ -439,11 +451,11 @@ class ScopedConvolutionDescriptor { public: ScopedConvolutionDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnCreateConvolutionDescriptor(&desc_)); + phi::dynload::cudnnCreateConvolutionDescriptor(&desc_)); } ~ScopedConvolutionDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnDestroyConvolutionDescriptor(desc_)); + phi::dynload::cudnnDestroyConvolutionDescriptor(desc_)); } inline cudnnConvolutionDescriptor_t descriptor( @@ -453,7 +465,7 @@ class ScopedConvolutionDescriptor { const std::vector& dilations) { PADDLE_ENFORCE_EQ(pads.size(), strides.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of pads and strides should be equal. But " "received size of pads is %d, size of strides is %d.", pads.size(), @@ -461,7 +473,7 @@ class ScopedConvolutionDescriptor { PADDLE_ENFORCE_EQ( pads.size(), dilations.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of pads and dilations should be equal. But received size " "of pads is %d, size of dilations is %d.", pads.size(), @@ -470,13 +482,13 @@ class ScopedConvolutionDescriptor { cudnnDataType_t compute_type = (type == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT; PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetConvolutionNdDescriptor(desc_, - pads.size(), - pads.data(), - strides.data(), - dilations.data(), - CUDNN_CROSS_CORRELATION, - compute_type)); + phi::dynload::cudnnSetConvolutionNdDescriptor(desc_, + pads.size(), + pads.data(), + strides.data(), + dilations.data(), + CUDNN_CROSS_CORRELATION, + compute_type)); return desc_; } @@ -496,10 +508,12 @@ class ScopedConvolutionDescriptor { class ScopedPoolingDescriptor { public: ScopedPoolingDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreatePoolingDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnCreatePoolingDescriptor(&desc_)); } ~ScopedPoolingDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyPoolingDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnDestroyPoolingDescriptor(desc_)); } inline cudnnPoolingDescriptor_t descriptor(const PoolingMode& mode, @@ -508,7 +522,7 @@ class ScopedPoolingDescriptor { const std::vector& strides) { PADDLE_ENFORCE_EQ(kernel.size(), pads.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of kernel and pads should be equal. But " "received size of kernel is %d, size of pads is %d.", kernel.size(), @@ -516,12 +530,12 @@ class ScopedPoolingDescriptor { PADDLE_ENFORCE_EQ( kernel.size(), strides.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of kernel and strides should be equal. But " "received size of kernel is %d, size of strides is %d.", kernel.size(), strides.size())); - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetPoolingNdDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetPoolingNdDescriptor( desc_, (GetPoolingMode(mode)), CUDNN_PROPAGATE_NAN, // Always propagate nans. @@ -541,18 +555,23 @@ class ScopedSpatialTransformerDescriptor { public: ScopedSpatialTransformerDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnCreateSpatialTransformerDescriptor(&desc_)); + phi::dynload::cudnnCreateSpatialTransformerDescriptor(&desc_)); } ~ScopedSpatialTransformerDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnDestroySpatialTransformerDescriptor(desc_)); + phi::dynload::cudnnDestroySpatialTransformerDescriptor(desc_)); } template inline cudnnSpatialTransformerDescriptor_t descriptor(const int nbDims, const int dimA[]) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetSpatialTransformerNdDescriptor( - desc_, CUDNN_SAMPLER_BILINEAR, CudnnDataType::type, nbDims, dimA)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnSetSpatialTransformerNdDescriptor( + desc_, + CUDNN_SAMPLER_BILINEAR, + CudnnDataType::type, + nbDims, + dimA)); return desc_; } @@ -565,11 +584,11 @@ class ScopedActivationDescriptor { public: ScopedActivationDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnCreateActivationDescriptor(&desc_)); + phi::dynload::cudnnCreateActivationDescriptor(&desc_)); } ~ScopedActivationDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnDestroyActivationDescriptor(desc_)); + phi::dynload::cudnnDestroyActivationDescriptor(desc_)); } template @@ -602,11 +621,11 @@ class ScopedActivationDescriptor { mode = CUDNN_ACTIVATION_TANH; break; default: - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unrecognized CUDNN activation mode: %d.", static_cast(activation_mode))); } - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetActivationDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetActivationDescriptor( desc_, mode, CUDNN_NOT_PROPAGATE_NAN, relu_ceiling)); return desc_; } @@ -620,16 +639,18 @@ class ScopedActivationDescriptor { class ScopedCTCLossDescriptor { public: ScopedCTCLossDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateCTCLossDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnCreateCTCLossDescriptor(&desc_)); } ~ScopedCTCLossDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyCTCLossDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::cudnnDestroyCTCLossDescriptor(desc_)); } template inline cudnnCTCLossDescriptor_t descriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::cudnnSetCTCLossDescriptor(desc_, CudnnDataType::type)); + phi::dynload::cudnnSetCTCLossDescriptor(desc_, CudnnDataType::type)); return desc_; } @@ -639,5 +660,6 @@ class ScopedCTCLossDescriptor { }; #endif -} // namespace platform -} // namespace paddle +} // namespace gpu +} // namespace backends +} // namespace phi diff --git a/paddle/phi/backends/gpu/gpu_dnn.h b/paddle/phi/backends/gpu/gpu_dnn.h new file mode 100644 index 0000000000000000000000000000000000000000..f37afa3deeb746e29cec378f075246dc7350a9a0 --- /dev/null +++ b/paddle/phi/backends/gpu/gpu_dnn.h @@ -0,0 +1,27 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + +#ifdef PADDLE_WITH_HIP +#include "paddle/phi/backends/gpu/rocm/miopen_desc.h" +#include "paddle/phi/backends/gpu/rocm/miopen_helper.h" +#else // CUDA +#include "paddle/phi/backends/gpu/cuda/cudnn_desc.h" +#include "paddle/phi/backends/gpu/cuda/cudnn_helper.h" +#endif + +#endif diff --git a/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h b/paddle/phi/backends/gpu/rocm/miopen_desc.h similarity index 76% rename from paddle/fluid/platform/device/gpu/rocm/miopen_desc.h rename to paddle/phi/backends/gpu/rocm/miopen_desc.h index 1ce4df05be64e0bbbddf84563a8fd0730e44341b..ae0e274ca650efbed885f755fbab90e1c453534a 100644 --- a/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h +++ b/paddle/phi/backends/gpu/rocm/miopen_desc.h @@ -1,4 +1,4 @@ -// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -23,22 +23,12 @@ #include #include -#include "paddle/fluid/platform/device/gpu/rocm/miopen_helper.h" -#include "paddle/fluid/platform/device_context.h" +#include "paddle/phi/backends/gpu/rocm/miopen_helper.h" #include "paddle/phi/core/utils/data_type.h" namespace phi { -class DenseTensor; -} // namespace phi - -namespace paddle { -namespace platform { - -template -inline miopenDataType_t ToCudnnDataType(const T& t) { - auto type = framework::ToDataType(t); - return ToCudnnDataType(phi::TransToPhiDataType(type)); -} +namespace backends { +namespace gpu { inline std::vector TransformDimOrder(const std::vector& dims) { std::vector transformed_dims(dims.begin(), dims.end()); @@ -63,7 +53,6 @@ inline std::vector TransformDimOrder(const std::vector& dims) { return transformed_dims; } -template <> inline miopenDataType_t ToCudnnDataType(const phi::DataType& t) { miopenDataType_t type = miopenFloat; switch (t) { @@ -86,7 +75,7 @@ class ActivationDescriptor { void operator()(T* t) { if (t != nullptr) { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenDestroyActivationDescriptor(t)); + phi::dynload::miopenDestroyActivationDescriptor(t)); t = nullptr; } } @@ -94,12 +83,12 @@ class ActivationDescriptor { ActivationDescriptor() { T* raw_ptr; PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenCreateActivationDescriptor(&raw_ptr)); + phi::dynload::miopenCreateActivationDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } template void set(miopenActivationMode_t mode, const T& coef) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetActivationDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetActivationDescriptor( desc_.get(), mode, static_cast(coef), 0.0, 0.0)); } @@ -116,14 +105,16 @@ class TensorDescriptor { struct Deleter { void operator()(T* t) { if (t != nullptr) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyTensorDescriptor(t)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenDestroyTensorDescriptor(t)); t = nullptr; } } }; TensorDescriptor() { T* raw_ptr; - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateTensorDescriptor(&raw_ptr)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenCreateTensorDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } T* desc() { return desc_.get(); } @@ -140,7 +131,7 @@ class TensorDescriptor { if (groups > 1) { dims_with_group[1] = dims_with_group[1] / groups; } - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( (miopenTensorDescriptor_t)(desc_.get()), ToCudnnDataType(tensor.dtype()), static_cast(dims_with_group.size()), @@ -150,10 +141,10 @@ class TensorDescriptor { void set(const phi::DenseTensor& tensor, const miopenTensorFormat_t format) { const int groups = 1; - PADDLE_ENFORCE_EQ(format, - MIOPEN_TENSOR_NCHW, - platform::errors::InvalidArgument( - "format should ONLY be NCHW in MIOPEN.")); + PADDLE_ENFORCE_EQ( + format, + MIOPEN_TENSOR_NCHW, + phi::errors::InvalidArgument("format should ONLY be NCHW in MIOPEN.")); auto dims = phi::vectorize(tensor.dims()); std::vector strides(dims.size()); strides[dims.size() - 1] = 1; @@ -164,7 +155,7 @@ class TensorDescriptor { if (groups > 1) { dims_with_group[1] = dims_with_group[1] / groups; } - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( (miopenTensorDescriptor_t)(desc_.get()), ToCudnnDataType(tensor.dtype()), static_cast(dims_with_group.size()), @@ -182,14 +173,16 @@ class FilterDescriptor { struct Deleter { void operator()(T* t) { if (t != nullptr) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyTensorDescriptor(t)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenDestroyTensorDescriptor(t)); t = nullptr; } } }; FilterDescriptor() { T* raw_ptr; - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateTensorDescriptor(&raw_ptr)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenCreateTensorDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } T* desc() { return desc_.get(); } @@ -198,10 +191,10 @@ class FilterDescriptor { void set(const phi::DenseTensor& tensor, const miopenTensorFormat_t format, const int groups = 1) { - PADDLE_ENFORCE_EQ(format, - MIOPEN_TENSOR_NCHW, - platform::errors::InvalidArgument( - "format should ONLY be NCHW in MIOPEN.")); + PADDLE_ENFORCE_EQ( + format, + MIOPEN_TENSOR_NCHW, + phi::errors::InvalidArgument("format should ONLY be NCHW in MIOPEN.")); auto dims = phi::vectorize(tensor.dims()); std::vector strides(dims.size()); strides[dims.size() - 1] = 1; @@ -212,7 +205,7 @@ class FilterDescriptor { if (groups > 1) { dims_with_group[1] = dims_with_group[1] / groups; } - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( (miopenTensorDescriptor_t)(desc_.get()), ToCudnnDataType(tensor.dtype()), static_cast(dims_with_group.size()), @@ -231,7 +224,7 @@ class ConvolutionDescriptor { void operator()(T* t) { if (t != nullptr) { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenDestroyConvolutionDescriptor(t)); + phi::dynload::miopenDestroyConvolutionDescriptor(t)); t = nullptr; } } @@ -239,7 +232,7 @@ class ConvolutionDescriptor { ConvolutionDescriptor() { T* raw_ptr; PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenCreateConvolutionDescriptor(&raw_ptr)); + phi::dynload::miopenCreateConvolutionDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } T* desc() { return desc_.get(); } @@ -251,21 +244,21 @@ class ConvolutionDescriptor { const std::vector& dilations, bool allow_tf32, const int groups = 1) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenInitConvolutionNdDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenInitConvolutionNdDescriptor( (miopenConvolutionDescriptor_t)desc_.get(), static_cast(pads.size()), const_cast(pads.data()), const_cast(strides.data()), const_cast(dilations.data()), miopenConvolution)); - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::miopenSetConvolutionGroupCount( - (miopenConvolutionDescriptor_t)desc_.get(), groups)); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetConvolutionGroupCount( + (miopenConvolutionDescriptor_t)desc_.get(), groups)); } private: std::unique_ptr desc_; }; -} // namespace platform -} // namespace paddle +} // namespace gpu +} // namespace backends +} // namespace phi diff --git a/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h b/paddle/phi/backends/gpu/rocm/miopen_helper.h similarity index 75% rename from paddle/fluid/platform/device/gpu/rocm/miopen_helper.h rename to paddle/phi/backends/gpu/rocm/miopen_helper.h index fb2392e657523a22085c82d4d0d03164ac9e0241..fc602d90fc647123a71044fefa01682bcf948907 100644 --- a/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h +++ b/paddle/phi/backends/gpu/rocm/miopen_helper.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,20 +17,24 @@ limitations under the License. */ #include #include -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/platform/device/gpu/gpu_types.h" -#include "paddle/fluid/platform/dynload/miopen.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/float16.h" -#include "paddle/fluid/platform/macros.h" +#include "paddle/phi/backends/dynload/miopen.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/float16.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/phi/core/errors.h" +#include "paddle/phi/core/macros.h" // MIOPEN do not have epslion definition #define CUDNN_BN_MIN_EPSILON 1e-05 DECLARE_bool(cudnn_deterministic); -namespace paddle { -namespace platform { +namespace phi { +namespace backends { +namespace gpu { + inline const char* miopenGetErrorString(miopenStatus_t status) { switch (status) { case miopenStatusSuccess: @@ -95,7 +99,7 @@ inline miopenPoolingMode_t GetPoolingMode(const PoolingMode& mode) { return miopenPoolingMax; default: PADDLE_THROW( - platform::errors::Unimplemented("Unexpected MIOPEN pooling mode.")); + phi::errors::Unimplemented("Unexpected MIOPEN pooling mode.")); } } @@ -115,7 +119,7 @@ inline ActivationMode StringToActivationMode(const std::string& str) { } else if (str == "bandpass") { return ActivationMode::kBandPass; } else { - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unknown MIOPEN activation string: %s.", str)); } } @@ -124,7 +128,7 @@ template class CudnnDataType; template <> -class CudnnDataType { +class CudnnDataType { public: static const miopenDataType_t type = miopenHalf; // The scaling param type is float for HALF and FLOAT tensors @@ -141,7 +145,7 @@ class CudnnDataType { }; template <> -class CudnnDataType { +class CudnnDataType { public: static const miopenDataType_t type = miopenBFloat16; // The scaling param type is float for HALF and FLOAT tensors @@ -184,7 +188,7 @@ inline miopenTensorFormat_t GetCudnnTensorFormat(const DataLayout& order) { case DataLayout::kNDHWC: return MIOPEN_TENSOR_NHWC; default: - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "MIOPEN has no equivalent dataLayout for input order.")); } return MIOPEN_TENSOR_NCHW; @@ -193,10 +197,12 @@ inline miopenTensorFormat_t GetCudnnTensorFormat(const DataLayout& order) { class ScopedTensorDescriptor { public: ScopedTensorDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateTensorDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenCreateTensorDescriptor(&desc_)); } ~ScopedTensorDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyTensorDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenDestroyTensorDescriptor(desc_)); } inline miopenTensorDescriptor_t descriptor(const miopenTensorFormat_t format, @@ -217,19 +223,19 @@ class ScopedTensorDescriptor { } // MIOPEN ONLY support data layout of NCHW - PADDLE_ENFORCE_EQ(format, - MIOPEN_TENSOR_NCHW, - platform::errors::InvalidArgument( - "format should ONLY be NCHW in MIOPEN.")); + PADDLE_ENFORCE_EQ( + format, + MIOPEN_TENSOR_NCHW, + phi::errors::InvalidArgument("format should ONLY be NCHW in MIOPEN.")); if (dims.size() == 4) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( desc_, type, dims_with_group.size(), const_cast(dims_with_group.data()), const_cast(strides.data()))); } else if (dims.size() == 5) { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( desc_, type, dims_with_group.size(), @@ -250,12 +256,12 @@ class ScopedTensorDescriptor { inline miopenTensorDescriptor_t descriptor(const miopenDataType_t miopen_type, const std::vector& dim, const std::vector& stride) { - PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenSetTensorDescriptor(desc_, - miopen_type, - dim.size(), - const_cast(dim.data()), - const_cast(stride.data()))); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( + desc_, + miopen_type, + dim.size(), + const_cast(dim.data()), + const_cast(stride.data()))); return desc_; } @@ -275,14 +281,16 @@ class ScopedTensorDescriptor { class ScopedDropoutDescriptor { public: ScopedDropoutDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateDropoutDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenCreateDropoutDescriptor(&desc_)); } ~ScopedDropoutDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyDropoutDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenDestroyDropoutDescriptor(desc_)); } inline miopenDropoutDescriptor_t descriptor(const miopenHandle_t& handle, - const platform::Place& place, + const phi::Place& place, bool initialized, float dropout_prob_, phi::DenseTensor* dropout_state_, @@ -290,42 +298,42 @@ class ScopedDropoutDescriptor { size_t state_size) { if (dropout_state_ == nullptr) { // for no dropout or test PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenSetDropoutDescriptor(desc_, - handle, - 0 /* dropout */, - nullptr, - 0 /* state_size */, - 0 /* seed */, - false, - false, - MIOPEN_RNG_PSEUDO_XORWOW)); + phi::dynload::miopenSetDropoutDescriptor(desc_, + handle, + 0 /* dropout */, + nullptr, + 0 /* state_size */, + 0 /* seed */, + false, + false, + MIOPEN_RNG_PSEUDO_XORWOW)); return desc_; } auto* dropout_state_data = dropout_state_->data(); if (!initialized) { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenSetDropoutDescriptor(desc_, - handle, - dropout_prob_, - dropout_state_data, - state_size, - seed, - false, - false, - MIOPEN_RNG_PSEUDO_XORWOW)); + phi::dynload::miopenSetDropoutDescriptor(desc_, + handle, + dropout_prob_, + dropout_state_data, + state_size, + seed, + false, + false, + MIOPEN_RNG_PSEUDO_XORWOW)); } else { auto dropout_state_dims = dropout_state_->dims(); state_size = dropout_state_dims[0]; - PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenRestoreDropoutDescriptor(desc_, - handle, - dropout_prob_, - dropout_state_data, - state_size, - 0, - false, - false, - MIOPEN_RNG_PSEUDO_XORWOW)); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenRestoreDropoutDescriptor( + desc_, + handle, + dropout_prob_, + dropout_state_data, + state_size, + 0, + false, + false, + MIOPEN_RNG_PSEUDO_XORWOW)); } return desc_; } @@ -339,10 +347,10 @@ class ScopedDropoutDescriptor { class ScopedRNNDescriptor { public: ScopedRNNDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateRNNDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenCreateRNNDescriptor(&desc_)); } ~ScopedRNNDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyRNNDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenDestroyRNNDescriptor(desc_)); } inline miopenRNNDescriptor_t desc() { return desc_; } @@ -355,10 +363,12 @@ class ScopedRNNDescriptor { class ScopedFilterDescriptor { public: ScopedFilterDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateTensorDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenCreateTensorDescriptor(&desc_)); } ~ScopedFilterDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyTensorDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenDestroyTensorDescriptor(desc_)); } inline miopenTensorDescriptor_t descriptor(const miopenTensorFormat_t format, @@ -379,7 +389,7 @@ class ScopedFilterDescriptor { for (int k = kernel_with_group.size() - 2; k >= 0; k--) { stride_dim[k] = stride_dim[k + 1] * kernel_with_group[k + 1]; } - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( desc_, type, kernel_with_group.size(), @@ -407,11 +417,11 @@ class ScopedConvolutionDescriptor { public: ScopedConvolutionDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenCreateConvolutionDescriptor(&desc_)); + phi::dynload::miopenCreateConvolutionDescriptor(&desc_)); } ~ScopedConvolutionDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenDestroyConvolutionDescriptor(desc_)); + phi::dynload::miopenDestroyConvolutionDescriptor(desc_)); } inline miopenConvolutionDescriptor_t descriptor( @@ -421,7 +431,7 @@ class ScopedConvolutionDescriptor { const std::vector& dilations) { PADDLE_ENFORCE_EQ(pads.size(), strides.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of pads and strides should be equal. But " "received size of pads is %d, size of strides is %d.", pads.size(), @@ -429,12 +439,12 @@ class ScopedConvolutionDescriptor { PADDLE_ENFORCE_EQ( pads.size(), dilations.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of pads and dilations should be equal. But received size " "of pads is %d, size of dilations is %d.", pads.size(), dilations.size())); - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenInitConvolutionNdDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenInitConvolutionNdDescriptor( desc_, pads.size(), const_cast(pads.data()), @@ -460,10 +470,12 @@ class ScopedConvolutionDescriptor { class ScopedPoolingDescriptor { public: ScopedPoolingDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreatePoolingDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenCreatePoolingDescriptor(&desc_)); } ~ScopedPoolingDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyPoolingDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenDestroyPoolingDescriptor(desc_)); } inline miopenPoolingDescriptor_t descriptor(const PoolingMode& mode, @@ -472,7 +484,7 @@ class ScopedPoolingDescriptor { const std::vector& strides) { PADDLE_ENFORCE_EQ(kernel.size(), pads.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of kernel and pads should be equal. But " "received size of kernel is %d, size of pads is %d.", kernel.size(), @@ -480,12 +492,12 @@ class ScopedPoolingDescriptor { PADDLE_ENFORCE_EQ( kernel.size(), strides.size(), - platform::errors::InvalidArgument( + phi::errors::InvalidArgument( "The size of kernel and strides should be equal. But " "received size of kernel is %d, size of strides is %d.", kernel.size(), strides.size())); - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetNdPoolingDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetNdPoolingDescriptor( desc_, GetPoolingMode(mode), kernel.size(), @@ -504,11 +516,11 @@ class ScopedActivationDescriptor { public: ScopedActivationDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenCreateActivationDescriptor(&desc_)); + phi::dynload::miopenCreateActivationDescriptor(&desc_)); } ~ScopedActivationDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - dynload::miopenDestroyActivationDescriptor(desc_)); + phi::dynload::miopenDestroyActivationDescriptor(desc_)); } template @@ -539,11 +551,11 @@ class ScopedActivationDescriptor { mode = miopenActivationTANH; break; default: - PADDLE_THROW(platform::errors::Unimplemented( + PADDLE_THROW(phi::errors::Unimplemented( "Unrecognized MIOPEN activation mode: %d.", static_cast(activation_mode))); } - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetActivationDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetActivationDescriptor( desc_, mode, relu_ceiling, 0.0, 0.0)); return desc_; } @@ -556,15 +568,17 @@ class ScopedActivationDescriptor { class ScopedCTCLossDescriptor { public: ScopedCTCLossDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateCTCLossDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenCreateCTCLossDescriptor(&desc_)); } ~ScopedCTCLossDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyCTCLossDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + phi::dynload::miopenDestroyCTCLossDescriptor(desc_)); } template inline miopenCTCLossDescriptor_t descriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetCTCLossDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetCTCLossDescriptor( desc_, CudnnDataType::type, 0, false)); return desc_; } @@ -574,5 +588,6 @@ class ScopedCTCLossDescriptor { DISABLE_COPY_AND_ASSIGN(ScopedCTCLossDescriptor); }; -} // namespace platform -} // namespace paddle +} // namespace gpu +} // namespace backends +} // namespace phi diff --git a/paddle/phi/kernels/funcs/cross_entropy.cu b/paddle/phi/kernels/funcs/cross_entropy.cu index 9f08214ef5a072cb0c3163d47ff302c21d3224da..add838106bfe8d584dd7ddeb7e8cc5bf15a3d8d8 100644 --- a/paddle/phi/kernels/funcs/cross_entropy.cu +++ b/paddle/phi/kernels/funcs/cross_entropy.cu @@ -14,9 +14,9 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/cross_entropy.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/math.h" diff --git a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu index afa73f0a5719b06de3f6b92c790ca6568c373108..0f4f39629e879313c74d4e9ca80b67769f4e95e1 100644 --- a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu @@ -14,9 +14,9 @@ #include "paddle/fluid/operators/layout_utils.h" #include "paddle/fluid/operators/norm_utils.cu.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/flags.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" @@ -37,7 +37,7 @@ DECLARE_bool(cudnn_batchnorm_spatial_persistent); namespace phi { template -using CudnnDataType = paddle::platform::CudnnDataType; +using CudnnDataType = phi::backends::gpu::CudnnDataType; template using BatchNormParamType = typename CudnnDataType::BatchNormParamType; @@ -644,7 +644,7 @@ void BatchNormGradRawKernel(const Context &ctx, C, scale.dims()[0])); - auto dtype = paddle::platform::CudnnDataType::type; + auto dtype = phi::backends::gpu::CudnnDataType::type; #ifdef PADDLE_WITH_HIP auto compute_format = data_layout == DataLayout::kNHWC ? DataLayout::kNHWC : DataLayout::kNCHW; diff --git a/paddle/phi/kernels/gpu/batch_norm_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_kernel.cu index 126b5c81ab3e2eac492107fee7b31bf40c78432b..cd1665ba9032e6b9c19c4629268ab7dfce5a5615 100644 --- a/paddle/phi/kernels/gpu/batch_norm_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_kernel.cu @@ -22,9 +22,9 @@ namespace cub = hipcub; #include "paddle/fluid/operators/layout_utils.h" #include "paddle/fluid/operators/norm_utils.cu.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/flags.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" @@ -45,7 +45,7 @@ DECLARE_bool(cudnn_batchnorm_spatial_persistent); namespace phi { template -using CudnnDataType = paddle::platform::CudnnDataType; +using CudnnDataType = phi::backends::gpu::CudnnDataType; template using BatchNormParamType = typename CudnnDataType::BatchNormParamType; @@ -603,7 +603,7 @@ void BatchNormKernel(const Context &ctx, int N, C, H, W, D; phi::funcs::ExtractNCWHD(x_dims, data_layout, &N, &C, &H, &W, &D); - auto dtype = paddle::platform::CudnnDataType::type; + auto dtype = phi::backends::gpu::CudnnDataType::type; #ifdef PADDLE_WITH_HIP auto compute_format = diff --git a/paddle/phi/kernels/gpu/cross_entropy_grad_kernel.cu b/paddle/phi/kernels/gpu/cross_entropy_grad_kernel.cu index 7bafa03aba5ff70ea9c9ab692ebba3a9373fdb08..934b0fe152bd8d7acff441af1d51602e64859a64 100644 --- a/paddle/phi/kernels/gpu/cross_entropy_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/cross_entropy_grad_kernel.cu @@ -23,8 +23,8 @@ namespace cub = hipcub; #endif #include "paddle/fluid/operators/math/softmax.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" diff --git a/paddle/phi/kernels/gpu/cross_entropy_kernel.cu b/paddle/phi/kernels/gpu/cross_entropy_kernel.cu index b9a2b07e6961ffd52a8798bf18c305088608a1d9..94d2d7a744c21d4ce075ada306fd62839b606eac 100644 --- a/paddle/phi/kernels/gpu/cross_entropy_kernel.cu +++ b/paddle/phi/kernels/gpu/cross_entropy_kernel.cu @@ -23,8 +23,8 @@ namespace cub = hipcub; #endif #include "paddle/fluid/operators/math/softmax.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" @@ -772,10 +772,10 @@ static void SoftmaxWithCrossEntropySoftLabel(const GPUContext& dev_ctx, : MIOPEN_SOFTMAX_MODE_CHANNEL; PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxForward_V2( handle, - paddle::platform::CudnnDataType::kOne(), + phi::backends::gpu::CudnnDataType::kOne(), descp, logits_data, - paddle::platform::CudnnDataType::kZero(), + phi::backends::gpu::CudnnDataType::kZero(), descp, softmax_data, MIOPEN_SOFTMAX_LOG, @@ -787,10 +787,10 @@ static void SoftmaxWithCrossEntropySoftLabel(const GPUContext& dev_ctx, handle, CUDNN_SOFTMAX_LOG, mode, - paddle::platform::CudnnDataType::kOne(), + phi::backends::gpu::CudnnDataType::kOne(), descp, logits_data, - paddle::platform::CudnnDataType::kZero(), + phi::backends::gpu::CudnnDataType::kZero(), descp, softmax_data)); #endif @@ -1206,10 +1206,10 @@ static void SoftmaxWithCrossEntropyHardLabel(const GPUContext& dev_ctx, : MIOPEN_SOFTMAX_MODE_CHANNEL; PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxForward_V2( handle, - paddle::platform::CudnnDataType::kOne(), + phi::backends::gpu::CudnnDataType::kOne(), descp, logits_data, - paddle::platform::CudnnDataType::kZero(), + phi::backends::gpu::CudnnDataType::kZero(), descp, softmax_data, MIOPEN_SOFTMAX_LOG, @@ -1221,10 +1221,10 @@ static void SoftmaxWithCrossEntropyHardLabel(const GPUContext& dev_ctx, handle, CUDNN_SOFTMAX_LOG, mode, - paddle::platform::CudnnDataType::kOne(), + phi::backends::gpu::CudnnDataType::kOne(), descp, logits_data, - paddle::platform::CudnnDataType::kZero(), + phi::backends::gpu::CudnnDataType::kZero(), descp, softmax_data)); #endif diff --git a/paddle/phi/kernels/gpu/instance_norm_utils.h b/paddle/phi/kernels/gpu/instance_norm_utils.h index 50dfe4ad222c0c3ddbe4ba08c2a7e0082e7e4d84..e52fe868c39ec5eeafa0e2493c3d85313f2d0898 100644 --- a/paddle/phi/kernels/gpu/instance_norm_utils.h +++ b/paddle/phi/kernels/gpu/instance_norm_utils.h @@ -26,12 +26,12 @@ namespace cub = hipcub; #endif -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" namespace phi { template -using CudnnDataType = paddle::platform::CudnnDataType; +using CudnnDataType = phi::backends::gpu::CudnnDataType; template using BatchNormParamType = typename CudnnDataType::BatchNormParamType; diff --git a/paddle/phi/kernels/gpu/rnn_functor.h b/paddle/phi/kernels/gpu/rnn_functor.h index 14778dc1847f274ac6552e6d9dc984574d447a06..59c5988986360849c8158d704a9b74cb64e17952 100644 --- a/paddle/phi/kernels/gpu/rnn_functor.h +++ b/paddle/phi/kernels/gpu/rnn_functor.h @@ -15,7 +15,7 @@ #pragma once #include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/dense_tensor.h" @@ -64,7 +64,7 @@ class RNNDescriptors { size_t *reserve_size, DenseTensor *dropout_state) { int numDirections = is_bidirec_ ? 2 : 1; - gpuDnnDataType_t cudnn_type = paddle::platform::CudnnDataType::type; + gpuDnnDataType_t cudnn_type = phi::backends::gpu::CudnnDataType::type; // ------------------- cudnn x, y descriptors --------------------- std::vector dims_x = {batch_size_, input_size_, 1}; std::vector strides_x = {input_size_, 1, 1}; @@ -179,7 +179,7 @@ class RNNDescriptors { phi::errors::InvalidArgument( "The cudnn rnn and setting weight size should be same.")); // ------------------- cudnn weight descriptors --------------------- - auto layout = paddle::platform::DataLayout::kNCHW; + auto layout = phi::backends::gpu::DataLayout::kNCHW; int dim_tmp = weights_size_ / sizeof(T); std::vector dim_w = {dim_tmp, 1, 1}; weight_desc_.descriptor(layout, dim_w); @@ -250,19 +250,19 @@ class RNNDescriptors { std::vector y_descs_; #endif - paddle::platform::ScopedTensorDescriptor x_desc_; - paddle::platform::ScopedTensorDescriptor y_desc_; + phi::backends::gpu::ScopedTensorDescriptor x_desc_; + phi::backends::gpu::ScopedTensorDescriptor y_desc_; #if defined(PADDLE_WITH_CUDA) && CUDNN_VERSION >= 7201 - paddle::platform::ScopedRNNTensorDescriptor x_seq_desc_; - paddle::platform::ScopedRNNTensorDescriptor y_seq_desc_; + phi::backends::gpu::ScopedRNNTensorDescriptor x_seq_desc_; + phi::backends::gpu::ScopedRNNTensorDescriptor y_seq_desc_; #endif - paddle::platform::ScopedTensorDescriptor init_h_desc_; - paddle::platform::ScopedTensorDescriptor init_c_desc_; - paddle::platform::ScopedTensorDescriptor last_h_desc_; - paddle::platform::ScopedTensorDescriptor last_c_desc_; - paddle::platform::ScopedDropoutDescriptor dropout_desc_; - paddle::platform::ScopedFilterDescriptor weight_desc_; - paddle::platform::ScopedRNNDescriptor rnn_desc_; + phi::backends::gpu::ScopedTensorDescriptor init_h_desc_; + phi::backends::gpu::ScopedTensorDescriptor init_c_desc_; + phi::backends::gpu::ScopedTensorDescriptor last_h_desc_; + phi::backends::gpu::ScopedTensorDescriptor last_c_desc_; + phi::backends::gpu::ScopedDropoutDescriptor dropout_desc_; + phi::backends::gpu::ScopedFilterDescriptor weight_desc_; + phi::backends::gpu::ScopedRNNDescriptor rnn_desc_; }; template diff --git a/paddle/phi/kernels/gpu/sync_batch_norm_utils.h b/paddle/phi/kernels/gpu/sync_batch_norm_utils.h index 726273451625322b4ddd28a7a7122d68e0ce7c4a..cfb2758e62def45b88f634614ed04a63105c3208 100644 --- a/paddle/phi/kernels/gpu/sync_batch_norm_utils.h +++ b/paddle/phi/kernels/gpu/sync_batch_norm_utils.h @@ -31,15 +31,15 @@ namespace cub = hipcub; #include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h" #endif #include "paddle/fluid/memory/malloc.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/kernels/funcs/norm_utils.h" namespace phi { template -using CudnnDataType = paddle::platform::CudnnDataType; +using CudnnDataType = phi::backends::gpu::CudnnDataType; template using BatchNormParamType = typename CudnnDataType::BatchNormParamType; diff --git a/paddle/phi/kernels/gpudnn/affine_grid_grad_kernel.cu b/paddle/phi/kernels/gpudnn/affine_grid_grad_kernel.cu index d1cc738e2b01bd7094f1d167ed75d11fd11e9dba..6bcfd328aacacfd2b9f5f86a847a6c36a7cff262 100644 --- a/paddle/phi/kernels/gpudnn/affine_grid_grad_kernel.cu +++ b/paddle/phi/kernels/gpudnn/affine_grid_grad_kernel.cu @@ -15,11 +15,11 @@ #ifndef PADDLE_WITH_HIP #include "paddle/phi/kernels/affine_grid_grad_kernel.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/common/int_array.h" #include "paddle/phi/core/kernel_registry.h" @@ -27,7 +27,7 @@ namespace phi { using ScopedSpatialTransformerDescriptor = - paddle::platform::ScopedSpatialTransformerDescriptor; + phi::backends::gpu::ScopedSpatialTransformerDescriptor; template void AffineGridGradCudnnKernel(const Context& dev_ctx, diff --git a/paddle/phi/kernels/gpudnn/affine_grid_kernel.cu b/paddle/phi/kernels/gpudnn/affine_grid_kernel.cu index 6c5d305abbff2f85c93c15cc8181d0e0eb06cacf..2f1c4de3716e2de4cedab2d9b1982011f87016ac 100644 --- a/paddle/phi/kernels/gpudnn/affine_grid_kernel.cu +++ b/paddle/phi/kernels/gpudnn/affine_grid_kernel.cu @@ -15,11 +15,11 @@ #ifndef PADDLE_WITH_HIP #include "paddle/phi/kernels/affine_grid_kernel.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/common/int_array.h" #include "paddle/phi/core/kernel_registry.h" @@ -27,7 +27,7 @@ namespace phi { using ScopedSpatialTransformerDescriptor = - paddle::platform::ScopedSpatialTransformerDescriptor; + phi::backends::gpu::ScopedSpatialTransformerDescriptor; template void AffineGridCudnnKernel(const Context& dev_ctx, diff --git a/paddle/phi/kernels/gpudnn/conv_cudnn_frontend.h b/paddle/phi/kernels/gpudnn/conv_cudnn_frontend.h index 368b5585b291e3ae075b352ac8c8e05c273bc232..c5fe47a2431df496ea6799bef6662bd79fe2f0af 100644 --- a/paddle/phi/kernels/gpudnn/conv_cudnn_frontend.h +++ b/paddle/phi/kernels/gpudnn/conv_cudnn_frontend.h @@ -17,8 +17,8 @@ limitations under the License. */ #include -#include "paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h" #include "paddle/phi/backends/dynload/cudnn_frontend.h" +#include "paddle/phi/backends/gpu/cuda/cudnn_desc.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/utils/data_type.h" @@ -86,7 +86,8 @@ class CudnnFrontendConvHelper { cudnnTensorFormat_t layout_format) { auto transformed_dims = phi::vectorize(tensor->dims()); if (layout_format == CUDNN_TENSOR_NHWC) { - transformed_dims = paddle::platform::TransformDimOrder(transformed_dims); + transformed_dims = + phi::backends::gpu::TransformDimOrder(transformed_dims); } std::vector strides = GenerateStrides(transformed_dims, layout_format); @@ -95,7 +96,7 @@ class CudnnFrontendConvHelper { .setStrides(strides.size(), strides.data()) .setId(id) .setAlignment(GetAlignment(tensor)) - .setDataType(paddle::platform::ToCudnnDataType(tensor->dtype())) + .setDataType(phi::backends::gpu::ToCudnnDataType(tensor->dtype())) .build(); } diff --git a/paddle/phi/kernels/gpudnn/conv_cudnn_v7.h b/paddle/phi/kernels/gpudnn/conv_cudnn_v7.h index e7d912a4825991c9f509e787e2567833fdf2cd9f..cb03efc42bccbdfa4059ba71f88d232b1c53f7b2 100644 --- a/paddle/phi/kernels/gpudnn/conv_cudnn_v7.h +++ b/paddle/phi/kernels/gpudnn/conv_cudnn_v7.h @@ -559,7 +559,7 @@ struct SearchAlgorithmBase { size_t workspace_size_limit = CalcWorkspaceLimitInBytes(UseFixedWorkspace()); auto workspace_handle = ctx.cudnn_workspace_handle(); - if (paddle::platform::CudnnDataType::type != CUDNN_DATA_HALF) { + if (phi::backends::gpu::CudnnDataType::type != CUDNN_DATA_HALF) { size_t max_workspace_size = GetMaxWorkspaceSize(args, workspace_size_limit); VLOG(3) << "max_workspace_size=" << ToMegaBytes(max_workspace_size) @@ -674,7 +674,7 @@ struct SearchAlgorithm : public SearchAlgorithmBase { bool enable_autotune = true) { SearchResult result; bool use_autotune = false; - auto dtype = paddle::platform::CudnnDataType::type; + auto dtype = phi::backends::gpu::CudnnDataType::type; SetConvMathType(ctx, dtype, args.cdesc); if (deterministic) { @@ -734,7 +734,7 @@ struct SearchAlgorithm : public SearchAlgorithmBase { static void SetConvMathType( const phi::GPUContext& ctx, cudnnDataType_t dtype, - const paddle::platform::ConvolutionDescriptor& cdesc) { + const phi::backends::gpu::ConvolutionDescriptor& cdesc) { #if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1) if (ctx.GetComputeCapability() >= 70 && dtype == CUDNN_DATA_HALF) { PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType( diff --git a/paddle/phi/kernels/gpudnn/conv_gpudnn_base.h b/paddle/phi/kernels/gpudnn/conv_gpudnn_base.h index 4353c6789de3738f0880cf3c026b99d537eed803..ea0969544b58cf7d2113f22ea344b4c7ef6acac2 100644 --- a/paddle/phi/kernels/gpudnn/conv_gpudnn_base.h +++ b/paddle/phi/kernels/gpudnn/conv_gpudnn_base.h @@ -20,6 +20,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/memory/memory.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/kernels/autotune/cache.h" #include "paddle/phi/kernels/funcs/eigen/common.h" @@ -28,11 +29,11 @@ limitations under the License. */ namespace phi { -using GPUDNNDataLayout = paddle::platform::DataLayout; +using GPUDNNDataLayout = phi::backends::gpu::DataLayout; template using ScalingParamType = - typename paddle::platform::CudnnDataType::ScalingParamType; + typename phi::backends::gpu::CudnnDataType::ScalingParamType; enum class ConvKind { kForward = 1, kBackwardData = 2, kBackwardFilter = 3 }; @@ -96,10 +97,10 @@ static std::ostream& operator<<(std::ostream& out, const std::vector& v) { template struct ConvArgsBase { HandleT handle; - paddle::platform::TensorDescriptor idesc; - paddle::platform::TensorDescriptor odesc; - paddle::platform::FilterDescriptor wdesc; - paddle::platform::ConvolutionDescriptor cdesc; + phi::backends::gpu::TensorDescriptor idesc; + phi::backends::gpu::TensorDescriptor odesc; + phi::backends::gpu::FilterDescriptor wdesc; + phi::backends::gpu::ConvolutionDescriptor cdesc; const phi::DenseTensor* x = nullptr; const phi::DenseTensor* w = nullptr; diff --git a/paddle/phi/kernels/gpudnn/conv_gpudnn_info.h b/paddle/phi/kernels/gpudnn/conv_gpudnn_info.h index e52c2210c82c42f1f42294507a5f8a1c66758f73..9b7b35f4357cd41fc50ab9e7e7d244260d7efa66 100644 --- a/paddle/phi/kernels/gpudnn/conv_gpudnn_info.h +++ b/paddle/phi/kernels/gpudnn/conv_gpudnn_info.h @@ -18,7 +18,7 @@ limitations under the License. */ #include #include -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" DECLARE_int64(conv_workspace_size_limit); DECLARE_bool(cudnn_exhaustive_search); diff --git a/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu b/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu index 5d1a92a3119bc427ef4d13d516125b6715b3e6fc..f1114e59bad75454e63e1a8d4aba1d7a40482f2a 100644 --- a/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu +++ b/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu @@ -52,8 +52,8 @@ void ConvCudnnGradKernelImplV7( const std::vector& strides, const std::vector& padding_common, const std::vector& dilations, - paddle::platform::DataLayout compute_format, - paddle::platform::DataLayout layout, + phi::backends::gpu::DataLayout compute_format, + phi::backends::gpu::DataLayout layout, bool use_addto, bool exhaustive_search, bool deterministic, @@ -70,8 +70,8 @@ void ConvCudnnGradKernelImplV7( auto handle = ctx.cudnn_handle(); auto workspace_handle = ctx.cudnn_workspace_handle(); - auto dtype = paddle::platform::CudnnDataType::type; - auto layout_tensor = paddle::platform::GetCudnnTensorFormat(layout); + auto dtype = phi::backends::gpu::CudnnDataType::type; + auto layout_tensor = phi::backends::gpu::GetCudnnTensorFormat(layout); ConvArgs args1{handle, transformed_input_grad, @@ -96,16 +96,16 @@ void ConvCudnnGradKernelImplV7( int i_n, i_c, i_d, i_h, i_w; int o_n, o_c, o_d, o_h, o_w; - if (compute_format == paddle::platform::DataLayout::kNHWC) { + if (compute_format == phi::backends::gpu::DataLayout::kNHWC) { GetNCDHW(transformed_input->dims(), - paddle::platform::DataLayout::kNHWC, + phi::backends::gpu::DataLayout::kNHWC, &i_n, &i_c, &i_d, &i_h, &i_w); GetNCDHW(transformed_output_grad_channel->dims(), - paddle::platform::DataLayout::kNHWC, + phi::backends::gpu::DataLayout::kNHWC, &o_n, &o_c, &o_d, @@ -113,14 +113,14 @@ void ConvCudnnGradKernelImplV7( &o_w); } else { GetNCDHW(transformed_input->dims(), - paddle::platform::DataLayout::kNCHW, + phi::backends::gpu::DataLayout::kNCHW, &i_n, &i_c, &i_d, &i_h, &i_w); GetNCDHW(transformed_output_grad_channel->dims(), - paddle::platform::DataLayout::kNCHW, + phi::backends::gpu::DataLayout::kNCHW, &o_n, &o_c, &o_d, @@ -347,7 +347,7 @@ void ConvCudnnGradKernelImplV8( const std::vector& strides, const std::vector& padding_common, const std::vector& dilations, - paddle::platform::DataLayout layout, + phi::backends::gpu::DataLayout layout, bool use_addto, bool exhaustive_search, bool deterministic, @@ -363,8 +363,8 @@ void ConvCudnnGradKernelImplV8( cudnnHandle_t handle = const_cast(ctx.cudnn_handle()); auto workspace_handle = ctx.cudnn_workspace_handle(); - auto dtype = paddle::platform::CudnnDataType::type; - auto layout_format = paddle::platform::GetCudnnTensorFormat(layout); + auto dtype = phi::backends::gpu::CudnnDataType::type; + auto layout_format = phi::backends::gpu::GetCudnnTensorFormat(layout); if (input_grad) { CudnnConvBwdDataV8(transformed_output_grad_channel, @@ -449,11 +449,11 @@ void ConvCudnnGradKernel(const Context& ctx, const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); - auto dtype = paddle::platform::CudnnDataType::type; + auto dtype = phi::backends::gpu::CudnnDataType::type; #ifdef PADDLE_WITH_HIP // HIP MIOPEN ONLY SUPPORT NCHW format - auto compute_format = paddle::platform::DataLayout::kNCHW; + auto compute_format = phi::backends::gpu::DataLayout::kNCHW; #else #if CUDNN_VERSION_MIN(8, 1, 0) const bool compute_in_nhwc = @@ -463,13 +463,13 @@ void ConvCudnnGradKernel(const Context& ctx, const bool compute_in_nhwc = dtype == CUDNN_DATA_HALF && IsVoltaOrLater(ctx); #endif auto compute_format = compute_in_nhwc && channel_last - ? paddle::platform::DataLayout::kNHWC - : paddle::platform::DataLayout::kNCHW; + ? phi::backends::gpu::DataLayout::kNHWC + : phi::backends::gpu::DataLayout::kNCHW; #endif VLOG(3) << "Compute ConvGradOp with cuDNN:" << " data_format=" << data_format << " compute_format=" - << (compute_format == paddle::platform::DataLayout::kNHWC ? "NHWC" - : "NCHW"); + << (compute_format == phi::backends::gpu::DataLayout::kNHWC ? "NHWC" + : "NCHW"); // transform Tensor DenseTensor transformed_input_channel(input.type()); @@ -478,7 +478,7 @@ void ConvCudnnGradKernel(const Context& ctx, DenseTensor transformed_filter_channel(filter.type()); DenseTensor transformed_filter_grad_channel(filter.type()); - if (channel_last && compute_format == paddle::platform::DataLayout::kNCHW) { + if (channel_last && compute_format == phi::backends::gpu::DataLayout::kNCHW) { VLOG(3) << "Transform input, output_grad, input_grad and tensor from " "NHWC to NCHW."; ResizeToChannelFirst(ctx, &input, &transformed_input_channel); @@ -507,7 +507,7 @@ void ConvCudnnGradKernel(const Context& ctx, } } - if (compute_format == paddle::platform::DataLayout::kNHWC) { + if (compute_format == phi::backends::gpu::DataLayout::kNHWC) { VLOG(3) << "Transform filter and filter_grad tensor from NCHW to NHWC."; ResizeToChannelLast(ctx, &filter, &transformed_filter_channel); TransToChannelLast(ctx, &filter, &transformed_filter_channel); @@ -528,7 +528,7 @@ void ConvCudnnGradKernel(const Context& ctx, auto filter_dims = transformed_filter_channel.dims(); DDim in_data_dims; DDim filter_data_dims; - if (compute_format == paddle::platform::DataLayout::kNCHW) { + if (compute_format == phi::backends::gpu::DataLayout::kNCHW) { in_data_dims = slice_ddim(in_dims, 2, in_dims.size()); filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size()); } else { @@ -553,7 +553,7 @@ void ConvCudnnGradKernel(const Context& ctx, std::vector padding_diff(data_dim); std::vector new_input_shape_vec(data_dim + 2); new_input_shape_vec[0] = transformed_input_channel.dims()[0]; - if (compute_format == paddle::platform::DataLayout::kNCHW) { + if (compute_format == phi::backends::gpu::DataLayout::kNCHW) { new_input_shape_vec[1] = transformed_input_channel.dims()[1]; } else { new_input_shape_vec[data_dim + 1] = @@ -563,14 +563,14 @@ void ConvCudnnGradKernel(const Context& ctx, for (size_t i = 0; i < data_dim; ++i) { padding_diff[i] = std::abs(paddings[2 * i] - paddings[2 * i + 1]); padding_common[i] = std::min(paddings[2 * i], paddings[2 * i + 1]); - if (compute_format == paddle::platform::DataLayout::kNCHW) { + if (compute_format == phi::backends::gpu::DataLayout::kNCHW) { new_input_shape_vec[i + 2] = transformed_input_channel.dims()[i + 2] + padding_diff[i]; } else { new_input_shape_vec[i + 1] = transformed_input_channel.dims()[i + 1] + padding_diff[i]; } - if (compute_format == paddle::platform::DataLayout::kNCHW) { + if (compute_format == phi::backends::gpu::DataLayout::kNCHW) { input_pad[2 * i + 4] = paddings[2 * i] - padding_common[i]; input_pad[2 * i + 4 + 1] = paddings[2 * i + 1] - padding_common[i]; } else { @@ -624,15 +624,15 @@ void ConvCudnnGradKernel(const Context& ctx, } } } - paddle::platform::DataLayout layout = - compute_format == paddle::platform::DataLayout::kNHWC - ? paddle::platform::DataLayout::kNHWC - : paddle::platform::DataLayout::kNCHW; + phi::backends::gpu::DataLayout layout = + compute_format == phi::backends::gpu::DataLayout::kNHWC + ? phi::backends::gpu::DataLayout::kNHWC + : phi::backends::gpu::DataLayout::kNCHW; // TODO(phlrain): replace paddle::platform::DataLaytout to phi::DataLayout if (transformed_input.dims().size() == 5) { - layout = compute_format == paddle::platform::DataLayout::kNHWC - ? paddle::platform::DataLayout::kNDHWC - : paddle::platform::DataLayout::kNCDHW; + layout = compute_format == phi::backends::gpu::DataLayout::kNHWC + ? phi::backends::gpu::DataLayout::kNDHWC + : phi::backends::gpu::DataLayout::kNCDHW; } #ifdef PADDLE_WITH_CUDNN_FRONTEND @@ -717,14 +717,15 @@ void ConvCudnnGradKernel(const Context& ctx, } } - if (channel_last && compute_format == paddle::platform::DataLayout::kNCHW) { + if (channel_last && + compute_format == phi::backends::gpu::DataLayout::kNCHW) { TransToChannelLast( ctx, &transformed_input_grad_channel, input_grad); } } if (filter_grad) { - if (compute_format == paddle::platform::DataLayout::kNHWC) { + if (compute_format == phi::backends::gpu::DataLayout::kNHWC) { TransToChannelFirst( ctx, &transformed_filter_grad_channel, filter_grad); } @@ -1008,11 +1009,11 @@ void ConvCudnnGradGradKernel( c_group = groups; groups = 1; #endif - auto dtype = paddle::platform::CudnnDataType::type; + auto dtype = phi::backends::gpu::CudnnDataType::type; auto handle = ctx.cudnn_handle(); - auto layout = paddle::platform::GetCudnnTensorFormat( - paddle::platform::DataLayout::kNCHW); + auto layout = phi::backends::gpu::GetCudnnTensorFormat( + phi::backends::gpu::DataLayout::kNCHW); ConvArgs args1{handle, &transformed_ddX, @@ -1023,7 +1024,7 @@ void ConvCudnnGradGradKernel( dilations, dtype, groups, - paddle::platform::DataLayout::kNCHW}; + phi::backends::gpu::DataLayout::kNCHW}; ConvArgs args2{handle, &transformed_X, ddW, @@ -1033,7 +1034,7 @@ void ConvCudnnGradGradKernel( dilations, dtype, groups, - paddle::platform::DataLayout::kNCHW}; + phi::backends::gpu::DataLayout::kNCHW}; ConvArgs args3{handle, &transformed_ddX, dW, @@ -1043,7 +1044,7 @@ void ConvCudnnGradGradKernel( dilations, dtype, groups, - paddle::platform::DataLayout::kNCHW}; + phi::backends::gpu::DataLayout::kNCHW}; ConvArgs args4{handle, &transformed_dX, ddW, @@ -1053,7 +1054,7 @@ void ConvCudnnGradGradKernel( dilations, dtype, groups, - paddle::platform::DataLayout::kNCHW}; + phi::backends::gpu::DataLayout::kNCHW}; #ifdef PADDLE_WITH_HIP SearchResult fwd_result1; diff --git a/paddle/phi/kernels/gpudnn/conv_kernel.cu b/paddle/phi/kernels/gpudnn/conv_kernel.cu index 404405665316255ea48663ca7d7817e4e5b1bef7..04ac84d606effd425df0aaf645bc7bce1d0003d0 100644 --- a/paddle/phi/kernels/gpudnn/conv_kernel.cu +++ b/paddle/phi/kernels/gpudnn/conv_kernel.cu @@ -50,8 +50,8 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input, const std::vector& strides, const std::vector& padding_common, const std::vector& dilations, - paddle::platform::DataLayout compute_format, - paddle::platform::DataLayout layout, + phi::backends::gpu::DataLayout compute_format, + phi::backends::gpu::DataLayout layout, bool exhaustive_search, bool deterministic, int groups, @@ -63,8 +63,8 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input, auto handle = ctx.cudnn_handle(); auto workspace_handle = ctx.cudnn_workspace_handle(); - auto layout_format = paddle::platform::GetCudnnTensorFormat(layout); - auto dtype = paddle::platform::CudnnDataType::type; + auto layout_format = phi::backends::gpu::GetCudnnTensorFormat(layout); + auto dtype = phi::backends::gpu::CudnnDataType::type; // ------------------- cudnn descriptors --------------------- ConvArgs args{handle, @@ -113,16 +113,16 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input, int i_n, i_c, i_d, i_h, i_w; int o_n, o_c, o_d, o_h, o_w; - if (compute_format == paddle::platform::DataLayout::kNHWC) { + if (compute_format == phi::backends::gpu::DataLayout::kNHWC) { GetNCDHW(transformed_input->dims(), - paddle::platform::DataLayout::kNHWC, + phi::backends::gpu::DataLayout::kNHWC, &i_n, &i_c, &i_d, &i_h, &i_w); GetNCDHW(transformed_output->dims(), - paddle::platform::DataLayout::kNHWC, + phi::backends::gpu::DataLayout::kNHWC, &o_n, &o_c, &o_d, @@ -130,14 +130,14 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input, &o_w); } else { GetNCDHW(transformed_input->dims(), - paddle::platform::DataLayout::kNCHW, + phi::backends::gpu::DataLayout::kNCHW, &i_n, &i_c, &i_d, &i_h, &i_w); GetNCDHW(transformed_output->dims(), - paddle::platform::DataLayout::kNCHW, + phi::backends::gpu::DataLayout::kNCHW, &o_n, &o_c, &o_d, @@ -227,7 +227,7 @@ void ConvCudnnKernelImplV8(const DenseTensor* input_tensor, const std::vector& strides, const std::vector& padding_common, const std::vector& dilations, - paddle::platform::DataLayout layout, + phi::backends::gpu::DataLayout layout, bool exhaustive_search, bool deterministic, int groups, @@ -247,8 +247,8 @@ void ConvCudnnKernelImplV8(const DenseTensor* input_tensor, cudnnHandle_t handle = const_cast(ctx.cudnn_handle()); auto workspace_handle = ctx.cudnn_workspace_handle(); - auto layout_format = paddle::platform::GetCudnnTensorFormat(layout); - auto dtype = paddle::platform::CudnnDataType::type; + auto layout_format = phi::backends::gpu::GetCudnnTensorFormat(layout); + auto dtype = phi::backends::gpu::CudnnDataType::type; float alpha = 1.0f; float beta = 0.0f; @@ -368,11 +368,11 @@ void ConvCudnnKernel(const Context& ctx, "FLAGS_cudnn_deterministic True at same time.")); const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); - auto dtype = paddle::platform::CudnnDataType::type; + auto dtype = phi::backends::gpu::CudnnDataType::type; #ifdef PADDLE_WITH_HIP // HIP MIOPEN ONLY SUPPORT NCHW format - auto compute_format = paddle::platform::DataLayout::kNCHW; + auto compute_format = phi::backends::gpu::DataLayout::kNCHW; #else #if CUDNN_VERSION_MIN(8, 1, 0) // Tensor Core introduced from Volta GPUs supports more faster conv op @@ -388,20 +388,20 @@ void ConvCudnnKernel(const Context& ctx, // We will only do data format conversion from NHWC to NCHW. // cudnn will convert NCHW to NHWC automatically on Tensor Core. auto compute_format = compute_in_nhwc && channel_last - ? paddle::platform::DataLayout::kNHWC - : paddle::platform::DataLayout::kNCHW; + ? phi::backends::gpu::DataLayout::kNHWC + : phi::backends::gpu::DataLayout::kNCHW; #endif VLOG(3) << "Compute ConvOp with cuDNN:" << " data_format=" << data_format << " compute_format=" - << (compute_format == paddle::platform::DataLayout::kNHWC ? "NHWC" - : "NCHW"); + << (compute_format == phi::backends::gpu::DataLayout::kNHWC ? "NHWC" + : "NCHW"); // ------------ transformed tensor ----------- DenseTensor transformed_input_channel(input.type()); DenseTensor transformed_output(output->type()); DenseTensor transformed_filter_channel(filter.type()); - if (channel_last && compute_format == paddle::platform::DataLayout::kNCHW) { + if (channel_last && compute_format == phi::backends::gpu::DataLayout::kNCHW) { VLOG(3) << "Transform input tensor from NHWC to NCHW."; ResizeToChannelFirst(ctx, &input, &transformed_input_channel); TransToChannelFirst(ctx, &input, &transformed_input_channel); @@ -412,7 +412,7 @@ void ConvCudnnKernel(const Context& ctx, transformed_input_channel.ShareDataWith(input); transformed_output.ShareDataWith(*output); } - if (compute_format == paddle::platform::DataLayout::kNHWC) { + if (compute_format == phi::backends::gpu::DataLayout::kNHWC) { VLOG(3) << "Transform filter tensor from NCHW to NHWC."; ResizeToChannelLast(ctx, &filter, &transformed_filter_channel); TransToChannelLast(ctx, &filter, &transformed_filter_channel); @@ -426,7 +426,7 @@ void ConvCudnnKernel(const Context& ctx, DDim in_data_dims; DDim filter_data_dims; - if (compute_format == paddle::platform::DataLayout::kNCHW) { + if (compute_format == phi::backends::gpu::DataLayout::kNCHW) { in_data_dims = slice_ddim(in_dims, 2, in_dims.size()); filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size()); } else { @@ -448,7 +448,7 @@ void ConvCudnnKernel(const Context& ctx, std::vector new_input_shape_vec(data_dim + 2); new_input_shape_vec[0] = transformed_input_channel.dims()[0]; - if (compute_format == paddle::platform::DataLayout::kNCHW) { + if (compute_format == phi::backends::gpu::DataLayout::kNCHW) { new_input_shape_vec[1] = transformed_input_channel.dims()[1]; } else { new_input_shape_vec[data_dim + 1] = @@ -459,14 +459,14 @@ void ConvCudnnKernel(const Context& ctx, for (size_t i = 0; i < data_dim; ++i) { padding_diff[i] = std::abs(paddings[2 * i] - paddings[2 * i + 1]); padding_common[i] = std::min(paddings[2 * i], paddings[2 * i + 1]); - if (compute_format == paddle::platform::DataLayout::kNCHW) { + if (compute_format == phi::backends::gpu::DataLayout::kNCHW) { new_input_shape_vec[i + 2] = transformed_input_channel.dims()[i + 2] + padding_diff[i]; } else { new_input_shape_vec[i + 1] = transformed_input_channel.dims()[i + 1] + padding_diff[i]; } - if (compute_format == paddle::platform::DataLayout::kNCHW) { + if (compute_format == phi::backends::gpu::DataLayout::kNCHW) { input_pad[2 * i + 4] = paddings[2 * i] - padding_common[i]; input_pad[2 * i + 4 + 1] = paddings[2 * i + 1] - padding_common[i]; } else { @@ -513,14 +513,14 @@ void ConvCudnnKernel(const Context& ctx, } } - paddle::platform::DataLayout layout = - compute_format == paddle::platform::DataLayout::kNHWC - ? paddle::platform::DataLayout::kNHWC - : paddle::platform::DataLayout::kNCHW; + phi::backends::gpu::DataLayout layout = + compute_format == phi::backends::gpu::DataLayout::kNHWC + ? phi::backends::gpu::DataLayout::kNHWC + : phi::backends::gpu::DataLayout::kNCHW; if (transformed_input.dims().size() == 5) { - layout = compute_format == paddle::platform::DataLayout::kNHWC - ? paddle::platform::DataLayout::kNDHWC - : paddle::platform::DataLayout::kNCDHW; + layout = compute_format == phi::backends::gpu::DataLayout::kNHWC + ? phi::backends::gpu::DataLayout::kNDHWC + : phi::backends::gpu::DataLayout::kNCDHW; } #ifdef PADDLE_WITH_CUDNN_FRONTEND @@ -564,7 +564,7 @@ void ConvCudnnKernel(const Context& ctx, &transformed_output); #endif - if (channel_last && compute_format == paddle::platform::DataLayout::kNCHW) { + if (channel_last && compute_format == phi::backends::gpu::DataLayout::kNCHW) { TransToChannelLast(ctx, &transformed_output, output); } } diff --git a/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu b/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu index 78961c86b070157bcc60907e3d601ae7ff4f3f71..ca980fd6f69cebd35f321ae2f47ed16816e2ca88 100644 --- a/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu +++ b/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu @@ -28,16 +28,16 @@ limitations under the License. */ #include "paddle/phi/kernels/transpose_kernel.h" #ifdef PADDLE_WITH_HIP -#include "paddle/fluid/platform/device/gpu/rocm/miopen_helper.h" +#include "paddle/phi/backends/gpu/rocm/miopen_helper.h" #include "paddle/phi/kernels/gpudnn/conv_miopen_helper.h" #else -#include "paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h" +#include "paddle/phi/backends/gpu/cuda/cudnn_helper.h" #include "paddle/phi/kernels/gpudnn/conv_cudnn_v7.h" #endif namespace phi { -using GPUDNNDataLayout = paddle::platform::DataLayout; +using GPUDNNDataLayout = phi::backends::gpu::DataLayout; template void ConvTransposeGradRawGPUDNNKernel(const Context& ctx, @@ -171,7 +171,7 @@ void ConvTransposeGradRawGPUDNNKernel(const Context& ctx, groups = 1; #endif - auto dtype = paddle::platform::CudnnDataType::type; + auto dtype = phi::backends::gpu::CudnnDataType::type; auto handle = ctx.cudnn_handle(); ConvArgs args1{handle, @@ -203,7 +203,7 @@ void ConvTransposeGradRawGPUDNNKernel(const Context& ctx, SearchResult filter_result; #endif - auto layout_tensor = paddle::platform::GetCudnnTensorFormat(layout); + auto layout_tensor = phi::backends::gpu::GetCudnnTensorFormat(layout); size_t workspace_size = 0; bool deterministic = FLAGS_cudnn_deterministic; T* dx_data = nullptr; @@ -616,10 +616,11 @@ void Conv2dTransposeDoubleGradGPUDNNKernel( c_group = groups; groups = 1; #endif - auto dtype = paddle::platform::CudnnDataType::type; + auto dtype = phi::backends::gpu::CudnnDataType::type; auto handle = ctx.cudnn_handle(); - auto layout = paddle::platform::GetCudnnTensorFormat(GPUDNNDataLayout::kNCHW); + auto layout = + phi::backends::gpu::GetCudnnTensorFormat(GPUDNNDataLayout::kNCHW); ConvArgs args1{handle, &transformed_ddout_channel, diff --git a/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu b/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu index 593114ac65f7a3898b963c5b186dd5c137a26ea4..f45cf9bb1796dd00f3205e6f7a92d99330edc111 100644 --- a/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu +++ b/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu @@ -26,16 +26,16 @@ limitations under the License. */ #include "paddle/phi/kernels/transpose_kernel.h" #ifdef PADDLE_WITH_HIP -#include "paddle/fluid/platform/device/gpu/rocm/miopen_helper.h" +#include "paddle/phi/backends/gpu/rocm/miopen_helper.h" #include "paddle/phi/kernels/gpudnn/conv_miopen_helper.h" #else -#include "paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h" +#include "paddle/phi/backends/gpu/cuda/cudnn_helper.h" #include "paddle/phi/kernels/gpudnn/conv_cudnn_v7.h" #endif namespace phi { -using GPUDNNDataLayout = paddle::platform::DataLayout; +using GPUDNNDataLayout = phi::backends::gpu::DataLayout; template void ConvTransposeRawGPUDNNKernel(const Context& ctx, @@ -194,10 +194,10 @@ void ConvTransposeRawGPUDNNKernel(const Context& ctx, #endif // ------------------- cudnn conv algorithm --------------------- auto handle = ctx.cudnn_handle(); - auto layout_tensor = paddle::platform::GetCudnnTensorFormat(layout); + auto layout_tensor = phi::backends::gpu::GetCudnnTensorFormat(layout); bool deterministic = FLAGS_cudnn_deterministic; - auto dtype = paddle::platform::CudnnDataType::type; + auto dtype = phi::backends::gpu::CudnnDataType::type; // ------------------- cudnn descriptors --------------------- ConvArgs args{handle, &transformed_out, diff --git a/paddle/phi/kernels/gpudnn/pool_gpudnn.h b/paddle/phi/kernels/gpudnn/pool_gpudnn.h index 69fd51b7f0ddce481f85fb172b501e8c47361f21..d830aad6b4f4f36c065c248f8a823290d23720ce 100644 --- a/paddle/phi/kernels/gpudnn/pool_gpudnn.h +++ b/paddle/phi/kernels/gpudnn/pool_gpudnn.h @@ -16,18 +16,18 @@ limitations under the License. */ #include -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" namespace phi { -using GPUDNNDataLayout = paddle::platform::DataLayout; -using PoolingMode = paddle::platform::PoolingMode; -using ScopedPoolingDescriptor = paddle::platform::ScopedPoolingDescriptor; -using ScopedTensorDescriptor = paddle::platform::ScopedTensorDescriptor; +using GPUDNNDataLayout = phi::backends::gpu::DataLayout; +using PoolingMode = phi::backends::gpu::PoolingMode; +using ScopedPoolingDescriptor = phi::backends::gpu::ScopedPoolingDescriptor; +using ScopedTensorDescriptor = phi::backends::gpu::ScopedTensorDescriptor; template using ScalingParamType = - typename paddle::platform::CudnnDataType::ScalingParamType; + typename phi::backends::gpu::CudnnDataType::ScalingParamType; inline GPUDNNDataLayout GetLayoutFromStr(std::string data_format) { if (data_format == "NHWC") { diff --git a/paddle/phi/kernels/gpudnn/pool_grad_kernel.cu b/paddle/phi/kernels/gpudnn/pool_grad_kernel.cu index 0798eab5e181b6be3cc865296a851447dc3f685d..2b2b61c5a376bab866e429d10eacc9eef13aec26 100644 --- a/paddle/phi/kernels/gpudnn/pool_grad_kernel.cu +++ b/paddle/phi/kernels/gpudnn/pool_grad_kernel.cu @@ -14,8 +14,8 @@ limitations under the License. */ #include "paddle/phi/kernels/pool_grad_kernel.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/pooling.h" diff --git a/paddle/phi/kernels/gpudnn/pool_kernel.cu b/paddle/phi/kernels/gpudnn/pool_kernel.cu index 16139d48b23789718e16987798966dfaea5cd131..7e0545551b0454ac1d9b65bb7d2268e568ec8d0b 100644 --- a/paddle/phi/kernels/gpudnn/pool_kernel.cu +++ b/paddle/phi/kernels/gpudnn/pool_kernel.cu @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/phi/kernels/pool_kernel.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/pooling.h" diff --git a/paddle/phi/kernels/gpudnn/softmax_gpudnn.h b/paddle/phi/kernels/gpudnn/softmax_gpudnn.h index a81357e99b58df46dbfd0c7b837830009fc1253d..8ba329301c9160cd6daf70f524484315a6cb0eb8 100644 --- a/paddle/phi/kernels/gpudnn/softmax_gpudnn.h +++ b/paddle/phi/kernels/gpudnn/softmax_gpudnn.h @@ -24,8 +24,8 @@ limitations under the License. */ #include "paddle/phi/kernels/primitive/kernel_primitives.h" // See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" +#include "paddle/phi/backends/gpu/gpu_dnn.h" #define MATRIX_SOFTMAX_ALIGN_BYTES 16 #define MATRIX_SOFTMAX_THREAHOLD 100000 @@ -55,8 +55,8 @@ limitations under the License. */ namespace phi { -using ScopedTensorDescriptor = paddle::platform::ScopedTensorDescriptor; -using GPUDNNDataLayout = paddle::platform::DataLayout; +using ScopedTensorDescriptor = phi::backends::gpu::ScopedTensorDescriptor; +using GPUDNNDataLayout = phi::backends::gpu::DataLayout; // Vectorization trait 4 * sizeof(T) template @@ -1065,10 +1065,10 @@ void SoftmaxForwardCudnnKernel(const GPUContext& dev_ctx, auto algo = log_mode ? MIOPEN_SOFTMAX_LOG : MIOPEN_SOFTMAX_ACCURATE; PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxForward_V2( handle, - paddle::platform::CudnnDataType::kOne(), + phi::backends::gpu::CudnnDataType::kOne(), desc, x_data, - paddle::platform::CudnnDataType::kZero(), + phi::backends::gpu::CudnnDataType::kZero(), desc, out_data, algo, @@ -1082,10 +1082,10 @@ void SoftmaxForwardCudnnKernel(const GPUContext& dev_ctx, handle, algo, mode, - paddle::platform::CudnnDataType::kOne(), + phi::backends::gpu::CudnnDataType::kOne(), desc, x_data, - paddle::platform::CudnnDataType::kZero(), + phi::backends::gpu::CudnnDataType::kZero(), desc, out_data)); #endif @@ -1137,12 +1137,12 @@ void SoftmaxBackwardCudnnKernel(const GPUContext& dev_ctx, auto algo = log_mode ? MIOPEN_SOFTMAX_LOG : MIOPEN_SOFTMAX_ACCURATE; PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxBackward_V2( handle, - paddle::platform::CudnnDataType::kOne(), + phi::backends::gpu::CudnnDataType::kOne(), desc, out_data, desc, dout_data, - paddle::platform::CudnnDataType::kZero(), + phi::backends::gpu::CudnnDataType::kZero(), desc, dx_data, algo, @@ -1156,12 +1156,12 @@ void SoftmaxBackwardCudnnKernel(const GPUContext& dev_ctx, handle, algo, mode, - paddle::platform::CudnnDataType::kOne(), + phi::backends::gpu::CudnnDataType::kOne(), desc, out_data, desc, dout_data, - paddle::platform::CudnnDataType::kZero(), + phi::backends::gpu::CudnnDataType::kZero(), desc, dx_data)); #endif