未验证 提交 fd9c91c3 编写于 作者: H huangjiyi 提交者: GitHub

[PHI decoupling] move several header files from fluid to phi (#48415)

* decouple cudnn_desc.h from fluid

* move cudnn_desc.h from fluid to phi

* fix bugs

* decouple cudnn_helper.h from fluid

* fix bugs

* move cudnn_helper.h from fluid to phi

* add fluid cudnn_helper.h

* move miopen_desc.h from fluid to phi

* move miopen_helper.h from fluid to phi

* fix bugs

* move gpu_dnn.h from fluid to phi

* fix bugs

* update copyright year

* simplify gpu_dnn.h in fluid

* fix bugs

* fix xpu build bug

* fix compile bug

* fix bug
上级 30a31a53
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#pragma once #pragma once
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cuda_graph_with_memory_pool.h" #include "paddle/fluid/platform/cuda_graph_with_memory_pool.h"
......
...@@ -54,8 +54,8 @@ struct BNStatsFinalizeArgs { ...@@ -54,8 +54,8 @@ struct BNStatsFinalizeArgs {
cudnnDataType_t param_dtype; cudnnDataType_t param_dtype;
cudnnTensorFormat_t format; cudnnTensorFormat_t format;
platform::TensorDescriptor in_desc; phi::backends::gpu::TensorDescriptor in_desc;
platform::TensorDescriptor out_desc; phi::backends::gpu::TensorDescriptor out_desc;
}; };
template <typename T> template <typename T>
......
...@@ -163,11 +163,11 @@ struct NormConvolutionArgs { ...@@ -163,11 +163,11 @@ struct NormConvolutionArgs {
std::vector<int> paddings; std::vector<int> paddings;
std::vector<int> dilations; std::vector<int> dilations;
platform::TensorDescriptor in_desc; phi::backends::gpu::TensorDescriptor in_desc;
platform::FilterDescriptor filter_desc; phi::backends::gpu::FilterDescriptor filter_desc;
platform::TensorDescriptor out_desc; phi::backends::gpu::TensorDescriptor out_desc;
platform::TensorDescriptor out_stats_desc; phi::backends::gpu::TensorDescriptor out_stats_desc;
platform::ConvolutionDescriptor conv_desc; phi::backends::gpu::ConvolutionDescriptor conv_desc;
bool is_support; bool is_support;
}; };
......
...@@ -89,12 +89,12 @@ struct ScaleBiasAddReluArgs { ...@@ -89,12 +89,12 @@ struct ScaleBiasAddReluArgs {
cudnnDataType_t param_dtype; cudnnDataType_t param_dtype;
cudnnTensorFormat_t format; cudnnTensorFormat_t format;
platform::TensorDescriptor in_desc; phi::backends::gpu::TensorDescriptor in_desc;
platform::TensorDescriptor out_desc; phi::backends::gpu::TensorDescriptor out_desc;
platform::TensorDescriptor equiv_scale_bias_desc; phi::backends::gpu::TensorDescriptor equiv_scale_bias_desc;
platform::TensorDescriptor scale_bias_mean_var_desc; phi::backends::gpu::TensorDescriptor scale_bias_mean_var_desc;
platform::TensorDescriptor bitmask_desc; phi::backends::gpu::TensorDescriptor bitmask_desc;
platform::ActivationDescriptor activation_desc; phi::backends::gpu::ActivationDescriptor activation_desc;
}; };
template <typename T> template <typename T>
......
...@@ -20,8 +20,8 @@ limitations under the License. */ ...@@ -20,8 +20,8 @@ limitations under the License. */
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
TEST(CudnnHelper, ScopedTensorDescriptor) { TEST(CudnnHelper, ScopedTensorDescriptor) {
using paddle::platform::DataLayout; using phi::backends::gpu::DataLayout;
using paddle::platform::ScopedTensorDescriptor; using phi::backends::gpu::ScopedTensorDescriptor;
ScopedTensorDescriptor tensor_desc; ScopedTensorDescriptor tensor_desc;
std::vector<int> shape = {2, 4, 6, 6}; std::vector<int> shape = {2, 4, 6, 6};
...@@ -31,7 +31,7 @@ TEST(CudnnHelper, ScopedTensorDescriptor) { ...@@ -31,7 +31,7 @@ TEST(CudnnHelper, ScopedTensorDescriptor) {
int nd; int nd;
std::vector<int> dims(4); std::vector<int> dims(4);
std::vector<int> strides(4); std::vector<int> strides(4);
paddle::platform::dynload::cudnnGetTensorNdDescriptor( phi::dynload::cudnnGetTensorNdDescriptor(
desc, 4, &type, &nd, dims.data(), strides.data()); desc, 4, &type, &nd, dims.data(), strides.data());
EXPECT_EQ(nd, 4); EXPECT_EQ(nd, 4);
...@@ -50,7 +50,7 @@ TEST(CudnnHelper, ScopedTensorDescriptor) { ...@@ -50,7 +50,7 @@ TEST(CudnnHelper, ScopedTensorDescriptor) {
std::vector<int> dims_5d(5); std::vector<int> dims_5d(5);
std::vector<int> strides_5d(5); std::vector<int> strides_5d(5);
paddle::platform::dynload::cudnnGetTensorNdDescriptor( phi::dynload::cudnnGetTensorNdDescriptor(
desc_5d, 5, &type, &nd, dims_5d.data(), strides_5d.data()); desc_5d, 5, &type, &nd, dims_5d.data(), strides_5d.data());
EXPECT_EQ(nd, 5); EXPECT_EQ(nd, 5);
...@@ -65,8 +65,8 @@ TEST(CudnnHelper, ScopedTensorDescriptor) { ...@@ -65,8 +65,8 @@ TEST(CudnnHelper, ScopedTensorDescriptor) {
} }
TEST(CudnnHelper, ScopedFilterDescriptor) { TEST(CudnnHelper, ScopedFilterDescriptor) {
using paddle::platform::DataLayout; using phi::backends::gpu::DataLayout;
using paddle::platform::ScopedFilterDescriptor; using phi::backends::gpu::ScopedFilterDescriptor;
ScopedFilterDescriptor filter_desc; ScopedFilterDescriptor filter_desc;
std::vector<int> shape = {2, 3, 3}; std::vector<int> shape = {2, 3, 3};
...@@ -76,7 +76,7 @@ TEST(CudnnHelper, ScopedFilterDescriptor) { ...@@ -76,7 +76,7 @@ TEST(CudnnHelper, ScopedFilterDescriptor) {
int nd; int nd;
cudnnTensorFormat_t format; cudnnTensorFormat_t format;
std::vector<int> kernel(3); std::vector<int> kernel(3);
paddle::platform::dynload::cudnnGetFilterNdDescriptor( phi::dynload::cudnnGetFilterNdDescriptor(
desc, 3, &type, &format, &nd, kernel.data()); desc, 3, &type, &format, &nd, kernel.data());
EXPECT_EQ(GetCudnnTensorFormat(DataLayout::kNCHW), format); EXPECT_EQ(GetCudnnTensorFormat(DataLayout::kNCHW), format);
...@@ -90,7 +90,7 @@ TEST(CudnnHelper, ScopedFilterDescriptor) { ...@@ -90,7 +90,7 @@ TEST(CudnnHelper, ScopedFilterDescriptor) {
auto desc_4d = filter_desc.descriptor<float>(DataLayout::kNCDHW, shape_4d); auto desc_4d = filter_desc.descriptor<float>(DataLayout::kNCDHW, shape_4d);
std::vector<int> kernel_4d(4); std::vector<int> kernel_4d(4);
paddle::platform::dynload::cudnnGetFilterNdDescriptor( phi::dynload::cudnnGetFilterNdDescriptor(
desc_4d, 4, &type, &format, &nd, kernel_4d.data()); desc_4d, 4, &type, &format, &nd, kernel_4d.data());
EXPECT_EQ(GetCudnnTensorFormat(DataLayout::kNCHW), format); EXPECT_EQ(GetCudnnTensorFormat(DataLayout::kNCHW), format);
...@@ -101,7 +101,7 @@ TEST(CudnnHelper, ScopedFilterDescriptor) { ...@@ -101,7 +101,7 @@ TEST(CudnnHelper, ScopedFilterDescriptor) {
} }
TEST(CudnnHelper, ScopedConvolutionDescriptor) { TEST(CudnnHelper, ScopedConvolutionDescriptor) {
using paddle::platform::ScopedConvolutionDescriptor; using phi::backends::gpu::ScopedConvolutionDescriptor;
ScopedConvolutionDescriptor conv_desc; ScopedConvolutionDescriptor conv_desc;
std::vector<int> src_pads = {2, 2, 2}; std::vector<int> src_pads = {2, 2, 2};
...@@ -115,14 +115,14 @@ TEST(CudnnHelper, ScopedConvolutionDescriptor) { ...@@ -115,14 +115,14 @@ TEST(CudnnHelper, ScopedConvolutionDescriptor) {
std::vector<int> pads(3); std::vector<int> pads(3);
std::vector<int> strides(3); std::vector<int> strides(3);
std::vector<int> dilations(3); std::vector<int> dilations(3);
paddle::platform::dynload::cudnnGetConvolutionNdDescriptor(desc, phi::dynload::cudnnGetConvolutionNdDescriptor(desc,
3, 3,
&nd, &nd,
pads.data(), pads.data(),
strides.data(), strides.data(),
dilations.data(), dilations.data(),
&mode, &mode,
&type); &type);
EXPECT_EQ(nd, 3); EXPECT_EQ(nd, 3);
for (size_t i = 0; i < src_pads.size(); ++i) { for (size_t i = 0; i < src_pads.size(); ++i) {
...@@ -134,8 +134,8 @@ TEST(CudnnHelper, ScopedConvolutionDescriptor) { ...@@ -134,8 +134,8 @@ TEST(CudnnHelper, ScopedConvolutionDescriptor) {
} }
TEST(CudnnHelper, ScopedPoolingDescriptor) { TEST(CudnnHelper, ScopedPoolingDescriptor) {
using paddle::platform::PoolingMode; using phi::backends::gpu::PoolingMode;
using paddle::platform::ScopedPoolingDescriptor; using phi::backends::gpu::ScopedPoolingDescriptor;
ScopedPoolingDescriptor pool_desc; ScopedPoolingDescriptor pool_desc;
std::vector<int> src_kernel = {2, 2, 5}; std::vector<int> src_kernel = {2, 2, 5};
...@@ -150,7 +150,7 @@ TEST(CudnnHelper, ScopedPoolingDescriptor) { ...@@ -150,7 +150,7 @@ TEST(CudnnHelper, ScopedPoolingDescriptor) {
std::vector<int> kernel(3); std::vector<int> kernel(3);
std::vector<int> pads(3); std::vector<int> pads(3);
std::vector<int> strides(3); std::vector<int> strides(3);
paddle::platform::dynload::cudnnGetPoolingNdDescriptor( phi::dynload::cudnnGetPoolingNdDescriptor(
desc, 3, &mode, &nan_t, &nd, kernel.data(), pads.data(), strides.data()); desc, 3, &mode, &nan_t, &nd, kernel.data(), pads.data(), strides.data());
EXPECT_EQ(nd, 3); EXPECT_EQ(nd, 3);
......
...@@ -20,20 +20,22 @@ namespace paddle { ...@@ -20,20 +20,22 @@ namespace paddle {
namespace platform { namespace platform {
TEST(TensorDescriptor, Empty) { TEST(TensorDescriptor, Empty) {
ActivationDescriptor a; phi::backends::gpu::ActivationDescriptor a;
TensorDescriptor t; phi::backends::gpu::TensorDescriptor t;
TensorDescriptor t1; phi::backends::gpu::TensorDescriptor t1;
TensorDescriptor *t11 = new TensorDescriptor(); phi::backends::gpu::TensorDescriptor *t11 =
new phi::backends::gpu::TensorDescriptor();
delete t11; delete t11;
std::unique_ptr<TensorDescriptor> tt(new TensorDescriptor()); std::unique_ptr<phi::backends::gpu::TensorDescriptor> tt(
new phi::backends::gpu::TensorDescriptor());
} }
TEST(TensorDescriptor, Normal) { TEST(TensorDescriptor, Normal) {
phi::DenseTensor tt; phi::DenseTensor tt;
tt.Resize({2, 3, 4}); tt.Resize({2, 3, 4});
tt.mutable_data<float>(platform::CPUPlace()); tt.mutable_data<float>(phi::CPUPlace());
TensorDescriptor desc; phi::backends::gpu::TensorDescriptor desc;
desc.set(tt); desc.set(tt);
EXPECT_TRUE(desc.desc() != nullptr); EXPECT_TRUE(desc.desc() != nullptr);
} }
......
...@@ -14,14 +14,34 @@ ...@@ -14,14 +14,34 @@
#pragma once #pragma once
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#ifdef PADDLE_WITH_HIP namespace paddle {
#include "paddle/fluid/platform/device/gpu/rocm/miopen_desc.h" namespace platform {
#include "paddle/fluid/platform/device/gpu/rocm/miopen_helper.h"
#else // CUDA using DataLayout = phi::backends::gpu::DataLayout;
#include "paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h" using PoolingMode = phi::backends::gpu::PoolingMode;
#include "paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h" template <typename T>
using CudnnDataType = phi::backends::gpu::CudnnDataType<T>;
using ScopedTensorDescriptor = phi::backends::gpu::ScopedTensorDescriptor;
using ScopedDropoutDescriptor = phi::backends::gpu::ScopedDropoutDescriptor;
using ScopedRNNDescriptor = phi::backends::gpu::ScopedRNNDescriptor;
using ScopedFilterDescriptor = phi::backends::gpu::ScopedFilterDescriptor;
using ScopedConvolutionDescriptor =
phi::backends::gpu::ScopedConvolutionDescriptor;
using ScopedPoolingDescriptor = phi::backends::gpu::ScopedPoolingDescriptor;
using ScopedActivationDescriptor =
phi::backends::gpu::ScopedActivationDescriptor;
#if defined(PADDLE_WITH_CUDA)
using ScopedRNNTensorDescriptor = phi::backends::gpu::ScopedRNNTensorDescriptor;
using ScopedSpatialTransformerDescriptor =
phi::backends::gpu::ScopedSpatialTransformerDescriptor;
#endif #endif
} // namespace platform
} // namespace paddle
#endif #endif
...@@ -20,8 +20,8 @@ limitations under the License. */ ...@@ -20,8 +20,8 @@ limitations under the License. */
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
TEST(MIOpenHelper, ScopedTensorDescriptor) { TEST(MIOpenHelper, ScopedTensorDescriptor) {
using paddle::platform::DataLayout; using phi::backends::gpu::DataLayout;
using paddle::platform::ScopedTensorDescriptor; using phi::backends::gpu::ScopedTensorDescriptor;
ScopedTensorDescriptor tensor_desc; ScopedTensorDescriptor tensor_desc;
std::vector<int> shape = {2, 4, 6, 6}; std::vector<int> shape = {2, 4, 6, 6};
...@@ -31,9 +31,9 @@ TEST(MIOpenHelper, ScopedTensorDescriptor) { ...@@ -31,9 +31,9 @@ TEST(MIOpenHelper, ScopedTensorDescriptor) {
int nd; int nd;
std::vector<int> dims(4); std::vector<int> dims(4);
std::vector<int> strides(4); std::vector<int> strides(4);
paddle::platform::dynload::miopenGetTensorDescriptor( phi::dynload::miopenGetTensorDescriptor(
desc, &type, dims.data(), strides.data()); desc, &type, dims.data(), strides.data());
paddle::platform::dynload::miopenGetTensorDescriptorSize(desc, &nd); phi::dynload::miopenGetTensorDescriptorSize(desc, &nd);
EXPECT_EQ(nd, 4); EXPECT_EQ(nd, 4);
for (size_t i = 0; i < dims.size(); ++i) { for (size_t i = 0; i < dims.size(); ++i) {
...@@ -51,9 +51,9 @@ TEST(MIOpenHelper, ScopedTensorDescriptor) { ...@@ -51,9 +51,9 @@ TEST(MIOpenHelper, ScopedTensorDescriptor) {
std::vector<int> dims_5d(5); std::vector<int> dims_5d(5);
std::vector<int> strides_5d(5); std::vector<int> strides_5d(5);
paddle::platform::dynload::miopenGetTensorDescriptor( phi::dynload::miopenGetTensorDescriptor(
desc_5d, &type, dims_5d.data(), strides_5d.data()); desc_5d, &type, dims_5d.data(), strides_5d.data());
paddle::platform::dynload::miopenGetTensorDescriptorSize(desc_5d, &nd); phi::dynload::miopenGetTensorDescriptorSize(desc_5d, &nd);
EXPECT_EQ(nd, 5); EXPECT_EQ(nd, 5);
for (size_t i = 0; i < dims_5d.size(); ++i) { for (size_t i = 0; i < dims_5d.size(); ++i) {
...@@ -67,7 +67,7 @@ TEST(MIOpenHelper, ScopedTensorDescriptor) { ...@@ -67,7 +67,7 @@ TEST(MIOpenHelper, ScopedTensorDescriptor) {
} }
TEST(MIOpenHelper, ScopedConvolutionDescriptor) { TEST(MIOpenHelper, ScopedConvolutionDescriptor) {
using paddle::platform::ScopedConvolutionDescriptor; using phi::backends::gpu::ScopedConvolutionDescriptor;
ScopedConvolutionDescriptor conv_desc; ScopedConvolutionDescriptor conv_desc;
std::vector<int> src_pads = {2, 2, 2}; std::vector<int> src_pads = {2, 2, 2};
...@@ -80,7 +80,7 @@ TEST(MIOpenHelper, ScopedConvolutionDescriptor) { ...@@ -80,7 +80,7 @@ TEST(MIOpenHelper, ScopedConvolutionDescriptor) {
std::vector<int> pads(3); std::vector<int> pads(3);
std::vector<int> strides(3); std::vector<int> strides(3);
std::vector<int> dilations(3); std::vector<int> dilations(3);
paddle::platform::dynload::miopenGetConvolutionNdDescriptor( phi::dynload::miopenGetConvolutionNdDescriptor(
desc, 3, &nd, pads.data(), strides.data(), dilations.data(), &mode); desc, 3, &nd, pads.data(), strides.data(), dilations.data(), &mode);
EXPECT_EQ(nd, 3); EXPECT_EQ(nd, 3);
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -23,22 +23,12 @@ ...@@ -23,22 +23,12 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h" #include "paddle/phi/backends/gpu/cuda/cudnn_helper.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/core/utils/data_type.h"
namespace phi { namespace phi {
class DenseTensor; namespace backends {
} // namespace phi namespace gpu {
namespace paddle {
namespace platform {
template <typename T>
inline cudnnDataType_t ToCudnnDataType(const T& t) {
auto type = framework::ToDataType(t);
return ToCudnnDataType(phi::TransToPhiDataType(type));
}
template <typename T> template <typename T>
inline std::vector<T> TransformDimOrder(const std::vector<T>& dims) { inline std::vector<T> TransformDimOrder(const std::vector<T>& dims) {
...@@ -67,7 +57,6 @@ inline std::vector<T> TransformDimOrder(const std::vector<T>& dims) { ...@@ -67,7 +57,6 @@ inline std::vector<T> TransformDimOrder(const std::vector<T>& dims) {
return transformed_dims; return transformed_dims;
} }
template <>
inline cudnnDataType_t ToCudnnDataType(const phi::DataType& t) { inline cudnnDataType_t ToCudnnDataType(const phi::DataType& t) {
cudnnDataType_t type = CUDNN_DATA_FLOAT; cudnnDataType_t type = CUDNN_DATA_FLOAT;
switch (t) { switch (t) {
...@@ -98,7 +87,7 @@ class ActivationDescriptor { ...@@ -98,7 +87,7 @@ class ActivationDescriptor {
void operator()(T* t) { void operator()(T* t) {
if (t != nullptr) { if (t != nullptr) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnDestroyActivationDescriptor(t)); phi::dynload::cudnnDestroyActivationDescriptor(t));
t = nullptr; t = nullptr;
} }
} }
...@@ -106,12 +95,12 @@ class ActivationDescriptor { ...@@ -106,12 +95,12 @@ class ActivationDescriptor {
ActivationDescriptor() { ActivationDescriptor() {
T* raw_ptr; T* raw_ptr;
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnCreateActivationDescriptor(&raw_ptr)); phi::dynload::cudnnCreateActivationDescriptor(&raw_ptr));
desc_.reset(raw_ptr); desc_.reset(raw_ptr);
} }
template <typename T> template <typename T>
void set(cudnnActivationMode_t mode, const T& coef) { void set(cudnnActivationMode_t mode, const T& coef) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetActivationDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetActivationDescriptor(
desc_.get(), mode, CUDNN_NOT_PROPAGATE_NAN, static_cast<double>(coef))); desc_.get(), mode, CUDNN_NOT_PROPAGATE_NAN, static_cast<double>(coef)));
} }
...@@ -128,14 +117,16 @@ class TensorDescriptor { ...@@ -128,14 +117,16 @@ class TensorDescriptor {
struct Deleter { struct Deleter {
void operator()(T* t) { void operator()(T* t) {
if (t != nullptr) { if (t != nullptr) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyTensorDescriptor(t)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnDestroyTensorDescriptor(t));
t = nullptr; t = nullptr;
} }
} }
}; };
TensorDescriptor() { TensorDescriptor() {
T* raw_ptr; T* raw_ptr;
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateTensorDescriptor(&raw_ptr)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnCreateTensorDescriptor(&raw_ptr));
desc_.reset(raw_ptr); desc_.reset(raw_ptr);
} }
T* desc() { return desc_.get(); } T* desc() { return desc_.get(); }
...@@ -151,12 +142,12 @@ class TensorDescriptor { ...@@ -151,12 +142,12 @@ class TensorDescriptor {
if (groups > 1) { if (groups > 1) {
dims_with_group[1] = dims_with_group[1] / groups; dims_with_group[1] = dims_with_group[1] / groups;
} }
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor(
dynload::cudnnSetTensorNdDescriptor(desc_.get(), desc_.get(),
ToCudnnDataType(tensor.dtype()), ToCudnnDataType(tensor.dtype()),
dims_with_group.size(), dims_with_group.size(),
dims_with_group.data(), dims_with_group.data(),
strides.data())); strides.data()));
} }
void set(const std::vector<int>& dims, void set(const std::vector<int>& dims,
...@@ -169,11 +160,11 @@ class TensorDescriptor { ...@@ -169,11 +160,11 @@ class TensorDescriptor {
transformed_dims = dims; transformed_dims = dims;
} }
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnSetTensorNdDescriptorEx(desc_.get(), phi::dynload::cudnnSetTensorNdDescriptorEx(desc_.get(),
format, format,
dtype, dtype,
transformed_dims.size(), transformed_dims.size(),
transformed_dims.data())); transformed_dims.data()));
} }
void set(const phi::DenseTensor& tensor, const cudnnTensorFormat_t format) { void set(const phi::DenseTensor& tensor, const cudnnTensorFormat_t format) {
...@@ -192,14 +183,16 @@ class FilterDescriptor { ...@@ -192,14 +183,16 @@ class FilterDescriptor {
struct Deleter { struct Deleter {
void operator()(T* t) { void operator()(T* t) {
if (t != nullptr) { if (t != nullptr) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyFilterDescriptor(t)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnDestroyFilterDescriptor(t));
t = nullptr; t = nullptr;
} }
} }
}; };
FilterDescriptor() { FilterDescriptor() {
T* raw_ptr; T* raw_ptr;
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateFilterDescriptor(&raw_ptr)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnCreateFilterDescriptor(&raw_ptr));
desc_.reset(raw_ptr); desc_.reset(raw_ptr);
} }
T* desc() { return desc_.get(); } T* desc() { return desc_.get(); }
...@@ -219,11 +212,11 @@ class FilterDescriptor { ...@@ -219,11 +212,11 @@ class FilterDescriptor {
transformed_dims[1] = transformed_dims[1] / groups; transformed_dims[1] = transformed_dims[1] / groups;
} }
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnSetFilterNdDescriptor(desc_.get(), phi::dynload::cudnnSetFilterNdDescriptor(desc_.get(),
dtype, dtype,
format, format,
transformed_dims.size(), transformed_dims.size(),
transformed_dims.data())); transformed_dims.data()));
} }
void set(const phi::DenseTensor& tensor, void set(const phi::DenseTensor& tensor,
...@@ -245,7 +238,7 @@ class ConvolutionDescriptor { ...@@ -245,7 +238,7 @@ class ConvolutionDescriptor {
void operator()(T* t) { void operator()(T* t) {
if (t != nullptr) { if (t != nullptr) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnDestroyConvolutionDescriptor(t)); phi::dynload::cudnnDestroyConvolutionDescriptor(t));
t = nullptr; t = nullptr;
} }
} }
...@@ -253,7 +246,7 @@ class ConvolutionDescriptor { ...@@ -253,7 +246,7 @@ class ConvolutionDescriptor {
ConvolutionDescriptor() { ConvolutionDescriptor() {
T* raw_ptr; T* raw_ptr;
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnCreateConvolutionDescriptor(&raw_ptr)); phi::dynload::cudnnCreateConvolutionDescriptor(&raw_ptr));
desc_.reset(raw_ptr); desc_.reset(raw_ptr);
} }
T* desc() { return desc_.get(); } T* desc() { return desc_.get(); }
...@@ -270,31 +263,31 @@ class ConvolutionDescriptor { ...@@ -270,31 +263,31 @@ class ConvolutionDescriptor {
(dtype == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT; (dtype == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT;
T* desc = desc_.get(); T* desc = desc_.get();
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnSetConvolutionNdDescriptor(desc, phi::dynload::cudnnSetConvolutionNdDescriptor(desc,
pads.size(), pads.size(),
pads.data(), pads.data(),
strides.data(), strides.data(),
dilations.data(), dilations.data(),
CUDNN_CROSS_CORRELATION, CUDNN_CROSS_CORRELATION,
compute_type)); compute_type));
#if CUDNN_VERSION_MIN(7, 0, 1) #if CUDNN_VERSION_MIN(7, 0, 1)
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
platform::dynload::cudnnSetConvolutionGroupCount(desc, groups)); phi::dynload::cudnnSetConvolutionGroupCount(desc, groups));
#if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1) #if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1)
PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnSetConvolutionMathType( PADDLE_ENFORCE_GPU_SUCCESS(
desc, CUDNN_DEFAULT_MATH)); phi::dynload::cudnnSetConvolutionMathType(desc, CUDNN_DEFAULT_MATH));
if (dtype == CUDNN_DATA_HALF) { if (dtype == CUDNN_DATA_HALF) {
PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnSetConvolutionMathType( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType(
desc, CUDNN_TENSOR_OP_MATH)); desc, CUDNN_TENSOR_OP_MATH));
#if CUDA_VERSION >= 11000 #if CUDA_VERSION >= 11000
#if CUDNN_VERSION_MIN(8, 1, 0) #if CUDNN_VERSION_MIN(8, 1, 0)
} else if (dtype == CUDNN_DATA_BFLOAT16) { } else if (dtype == CUDNN_DATA_BFLOAT16) {
PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnSetConvolutionMathType( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType(
desc, CUDNN_TENSOR_OP_MATH)); desc, CUDNN_TENSOR_OP_MATH));
#endif // CUDNN_VERSION_MIN(8,1,0) #endif // CUDNN_VERSION_MIN(8,1,0)
} else if (dtype == CUDNN_DATA_FLOAT && !allow_tf32) { } else if (dtype == CUDNN_DATA_FLOAT && !allow_tf32) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
platform::dynload::cudnnSetConvolutionMathType(desc, CUDNN_FMA_MATH)); phi::dynload::cudnnSetConvolutionMathType(desc, CUDNN_FMA_MATH));
#endif // CUDA_VERSION >= 11000 #endif // CUDA_VERSION >= 11000
} }
#endif #endif
...@@ -307,5 +300,6 @@ class ConvolutionDescriptor { ...@@ -307,5 +300,6 @@ class ConvolutionDescriptor {
std::unique_ptr<T, Deleter> desc_; std::unique_ptr<T, Deleter> desc_;
}; };
} // namespace platform } // namespace gpu
} // namespace paddle } // namespace backends
} // namespace phi
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
...@@ -17,16 +17,20 @@ limitations under the License. */ ...@@ -17,16 +17,20 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/operator.h" #include "paddle/phi/backends/dynload/cudnn.h"
#include "paddle/fluid/platform/dynload/cudnn.h" #include "paddle/phi/common/bfloat16.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/phi/common/float16.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/phi/common/place.h"
#include "paddle/fluid/platform/macros.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h"
#include "paddle/phi/core/macros.h"
DECLARE_bool(cudnn_deterministic); DECLARE_bool(cudnn_deterministic);
namespace paddle { namespace phi {
namespace platform { namespace backends {
namespace gpu {
#define CUDNN_VERSION_MIN(major, minor, patch) \ #define CUDNN_VERSION_MIN(major, minor, patch) \
(CUDNN_VERSION >= ((major)*1000 + (minor)*100 + (patch))) (CUDNN_VERSION >= ((major)*1000 + (minor)*100 + (patch)))
...@@ -68,7 +72,7 @@ inline cudnnPoolingMode_t GetPoolingMode(const PoolingMode& mode) { ...@@ -68,7 +72,7 @@ inline cudnnPoolingMode_t GetPoolingMode(const PoolingMode& mode) {
return CUDNN_POOLING_MAX; return CUDNN_POOLING_MAX;
default: default:
PADDLE_THROW( PADDLE_THROW(
platform::errors::Unimplemented("Unexpected CUDNN pooling mode.")); phi::errors::Unimplemented("Unexpected CUDNN pooling mode."));
} }
} }
...@@ -88,7 +92,7 @@ inline ActivationMode StringToActivationMode(const std::string& str) { ...@@ -88,7 +92,7 @@ inline ActivationMode StringToActivationMode(const std::string& str) {
} else if (str == "bandpass") { } else if (str == "bandpass") {
return ActivationMode::kBandPass; return ActivationMode::kBandPass;
} else { } else {
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(phi::errors::Unimplemented(
"Unknown CUDNN activation string: %s.", str)); "Unknown CUDNN activation string: %s.", str));
} }
} }
...@@ -99,7 +103,7 @@ class CudnnDataType; ...@@ -99,7 +103,7 @@ class CudnnDataType;
// CUDNN_DATA_BFLOAT16 is not valid before cudnn8.1 // CUDNN_DATA_BFLOAT16 is not valid before cudnn8.1
#if CUDNN_VERSION_MIN(8, 1, 0) #if CUDNN_VERSION_MIN(8, 1, 0)
template <> template <>
class CudnnDataType<bfloat16> { class CudnnDataType<phi::dtype::bfloat16> {
public: public:
static const cudnnDataType_t type = CUDNN_DATA_BFLOAT16; static const cudnnDataType_t type = CUDNN_DATA_BFLOAT16;
using ScalingParamType = const float; using ScalingParamType = const float;
...@@ -116,7 +120,7 @@ class CudnnDataType<bfloat16> { ...@@ -116,7 +120,7 @@ class CudnnDataType<bfloat16> {
#endif #endif
template <> template <>
class CudnnDataType<float16> { class CudnnDataType<phi::dtype::float16> {
public: public:
static const cudnnDataType_t type = CUDNN_DATA_HALF; static const cudnnDataType_t type = CUDNN_DATA_HALF;
// The scaling param type is float for HALF and FLOAT tensors // The scaling param type is float for HALF and FLOAT tensors
...@@ -176,7 +180,7 @@ inline cudnnTensorFormat_t GetCudnnTensorFormat( ...@@ -176,7 +180,7 @@ inline cudnnTensorFormat_t GetCudnnTensorFormat(
case DataLayout::kNDHWC: case DataLayout::kNDHWC:
return CUDNN_TENSOR_NHWC; // add, liyamei return CUDNN_TENSOR_NHWC; // add, liyamei
default: default:
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(phi::errors::Unimplemented(
"CUDNN has no equivalent dataLayout for input order.")); "CUDNN has no equivalent dataLayout for input order."));
} }
return CUDNN_TENSOR_NCHW; return CUDNN_TENSOR_NCHW;
...@@ -185,10 +189,12 @@ inline cudnnTensorFormat_t GetCudnnTensorFormat( ...@@ -185,10 +189,12 @@ inline cudnnTensorFormat_t GetCudnnTensorFormat(
class ScopedTensorDescriptor { class ScopedTensorDescriptor {
public: public:
ScopedTensorDescriptor() { ScopedTensorDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateTensorDescriptor(&desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnCreateTensorDescriptor(&desc_));
} }
~ScopedTensorDescriptor() PADDLE_MAY_THROW { ~ScopedTensorDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyTensorDescriptor(desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnDestroyTensorDescriptor(desc_));
} }
inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format, inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format,
...@@ -211,25 +217,25 @@ class ScopedTensorDescriptor { ...@@ -211,25 +217,25 @@ class ScopedTensorDescriptor {
if (dims.size() == 4) { if (dims.size() == 4) {
if (format == CUDNN_TENSOR_NCHW) { if (format == CUDNN_TENSOR_NCHW) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnSetTensorNdDescriptor(desc_, phi::dynload::cudnnSetTensorNdDescriptor(desc_,
type, type,
dims_with_group.size(), dims_with_group.size(),
dims_with_group.data(), dims_with_group.data(),
strides.data())); strides.data()));
} else { // CUDNN_TENSOR_NHWC } else { // CUDNN_TENSOR_NHWC
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensor4dDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensor4dDescriptor(
desc_, format, type, dims[0], dims[3], dims[1], dims[2])); desc_, format, type, dims[0], dims[3], dims[1], dims[2]));
} }
} else if (dims.size() == 5) { } else if (dims.size() == 5) {
if (format == CUDNN_TENSOR_NCHW) { if (format == CUDNN_TENSOR_NCHW) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnSetTensorNdDescriptor(desc_, phi::dynload::cudnnSetTensorNdDescriptor(desc_,
type, type,
dims_with_group.size(), dims_with_group.size(),
dims_with_group.data(), dims_with_group.data(),
strides.data())); strides.data()));
} else { // CUDNN_TENSOR_NHWC } else { // CUDNN_TENSOR_NHWC
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensorNdDescriptorEx( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptorEx(
desc_, format, type, dims.size(), dims.data())); desc_, format, type, dims.size(), dims.data()));
} }
} }
...@@ -247,7 +253,7 @@ class ScopedTensorDescriptor { ...@@ -247,7 +253,7 @@ class ScopedTensorDescriptor {
inline cudnnTensorDescriptor_t descriptor(const cudnnDataType_t cudnn_type, inline cudnnTensorDescriptor_t descriptor(const cudnnDataType_t cudnn_type,
const std::vector<int>& dim, const std::vector<int>& dim,
const std::vector<int>& stride) { const std::vector<int>& stride) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensorNdDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor(
desc_, cudnn_type, dim.size(), dim.data(), stride.data())); desc_, cudnn_type, dim.size(), dim.data(), stride.data()));
return desc_; return desc_;
} }
...@@ -269,11 +275,13 @@ class ScopedTensorDescriptor { ...@@ -269,11 +275,13 @@ class ScopedTensorDescriptor {
class ScopedRNNTensorDescriptor { class ScopedRNNTensorDescriptor {
public: public:
ScopedRNNTensorDescriptor() { ScopedRNNTensorDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateRNNDataDescriptor(&desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnCreateRNNDataDescriptor(&desc_));
} }
~ScopedRNNTensorDescriptor() PADDLE_MAY_THROW { ~ScopedRNNTensorDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyRNNDataDescriptor(desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnDestroyRNNDataDescriptor(desc_));
} }
inline cudnnRNNDataDescriptor_t descriptor( inline cudnnRNNDataDescriptor_t descriptor(
...@@ -292,15 +300,15 @@ class ScopedRNNTensorDescriptor { ...@@ -292,15 +300,15 @@ class ScopedRNNTensorDescriptor {
layout = CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED; layout = CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED;
} }
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetRNNDataDescriptor(
dynload::cudnnSetRNNDataDescriptor(desc_, desc_,
cudnn_type, cudnn_type,
layout, layout,
max_seq_length, max_seq_length,
batch_size, batch_size,
input_size, input_size,
seq_length.data(), seq_length.data(),
static_cast<void*>(&padding_fill))); static_cast<void*>(&padding_fill)));
return desc_; return desc_;
} }
...@@ -331,14 +339,16 @@ class ScopedRNNTensorDescriptor { ...@@ -331,14 +339,16 @@ class ScopedRNNTensorDescriptor {
class ScopedDropoutDescriptor { class ScopedDropoutDescriptor {
public: public:
ScopedDropoutDescriptor() { ScopedDropoutDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateDropoutDescriptor(&desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnCreateDropoutDescriptor(&desc_));
} }
~ScopedDropoutDescriptor() PADDLE_MAY_THROW { ~ScopedDropoutDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyDropoutDescriptor(desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnDestroyDropoutDescriptor(desc_));
} }
inline cudnnDropoutDescriptor_t descriptor(const cudnnHandle_t& handle, inline cudnnDropoutDescriptor_t descriptor(const cudnnHandle_t& handle,
const platform::Place& place, const phi::Place& place,
bool initialized, bool initialized,
float dropout_prob_, float dropout_prob_,
phi::DenseTensor* dropout_state_, phi::DenseTensor* dropout_state_,
...@@ -346,22 +356,22 @@ class ScopedDropoutDescriptor { ...@@ -346,22 +356,22 @@ class ScopedDropoutDescriptor {
size_t state_size) { size_t state_size) {
if (dropout_state_ == nullptr) { // for no dropout or test if (dropout_state_ == nullptr) { // for no dropout or test
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnSetDropoutDescriptor(desc_, phi::dynload::cudnnSetDropoutDescriptor(desc_,
handle, handle,
0 /* dropout */, 0 /* dropout */,
nullptr, nullptr,
0 /* state_size */, 0 /* state_size */,
0 /* seed */)); 0 /* seed */));
return desc_; return desc_;
} }
auto* dropout_state_data = dropout_state_->data<uint8_t>(); auto* dropout_state_data = dropout_state_->data<uint8_t>();
if (!initialized) { if (!initialized) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetDropoutDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetDropoutDescriptor(
desc_, handle, dropout_prob_, dropout_state_data, state_size, seed)); desc_, handle, dropout_prob_, dropout_state_data, state_size, seed));
} else { } else {
auto dropout_state_dims = dropout_state_->dims(); auto dropout_state_dims = dropout_state_->dims();
state_size = dropout_state_dims[0]; state_size = dropout_state_dims[0];
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnRestoreDropoutDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnRestoreDropoutDescriptor(
desc_, handle, dropout_prob_, dropout_state_data, state_size, 0)); desc_, handle, dropout_prob_, dropout_state_data, state_size, 0));
} }
return desc_; return desc_;
...@@ -376,10 +386,10 @@ class ScopedDropoutDescriptor { ...@@ -376,10 +386,10 @@ class ScopedDropoutDescriptor {
class ScopedRNNDescriptor { class ScopedRNNDescriptor {
public: public:
ScopedRNNDescriptor() { ScopedRNNDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateRNNDescriptor(&desc_)); PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnCreateRNNDescriptor(&desc_));
} }
~ScopedRNNDescriptor() PADDLE_MAY_THROW { ~ScopedRNNDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyRNNDescriptor(desc_)); PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnDestroyRNNDescriptor(desc_));
} }
inline cudnnRNNDescriptor_t desc() { return desc_; } inline cudnnRNNDescriptor_t desc() { return desc_; }
...@@ -392,10 +402,12 @@ class ScopedRNNDescriptor { ...@@ -392,10 +402,12 @@ class ScopedRNNDescriptor {
class ScopedFilterDescriptor { class ScopedFilterDescriptor {
public: public:
ScopedFilterDescriptor() { ScopedFilterDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateFilterDescriptor(&desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnCreateFilterDescriptor(&desc_));
} }
~ScopedFilterDescriptor() PADDLE_MAY_THROW { ~ScopedFilterDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyFilterDescriptor(desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnDestroyFilterDescriptor(desc_));
} }
inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format, inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format,
...@@ -412,11 +424,11 @@ class ScopedFilterDescriptor { ...@@ -412,11 +424,11 @@ class ScopedFilterDescriptor {
// NOTE: input filter(C) of the filter is already asserted to be C/groups. // NOTE: input filter(C) of the filter is already asserted to be C/groups.
} }
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnSetFilterNdDescriptor(desc_, phi::dynload::cudnnSetFilterNdDescriptor(desc_,
type, type,
format, format,
kernel_with_group.size(), kernel_with_group.size(),
kernel_with_group.data())); kernel_with_group.data()));
return desc_; return desc_;
} }
...@@ -439,11 +451,11 @@ class ScopedConvolutionDescriptor { ...@@ -439,11 +451,11 @@ class ScopedConvolutionDescriptor {
public: public:
ScopedConvolutionDescriptor() { ScopedConvolutionDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnCreateConvolutionDescriptor(&desc_)); phi::dynload::cudnnCreateConvolutionDescriptor(&desc_));
} }
~ScopedConvolutionDescriptor() PADDLE_MAY_THROW { ~ScopedConvolutionDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnDestroyConvolutionDescriptor(desc_)); phi::dynload::cudnnDestroyConvolutionDescriptor(desc_));
} }
inline cudnnConvolutionDescriptor_t descriptor( inline cudnnConvolutionDescriptor_t descriptor(
...@@ -453,7 +465,7 @@ class ScopedConvolutionDescriptor { ...@@ -453,7 +465,7 @@ class ScopedConvolutionDescriptor {
const std::vector<int>& dilations) { const std::vector<int>& dilations) {
PADDLE_ENFORCE_EQ(pads.size(), PADDLE_ENFORCE_EQ(pads.size(),
strides.size(), strides.size(),
platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The size of pads and strides should be equal. But " "The size of pads and strides should be equal. But "
"received size of pads is %d, size of strides is %d.", "received size of pads is %d, size of strides is %d.",
pads.size(), pads.size(),
...@@ -461,7 +473,7 @@ class ScopedConvolutionDescriptor { ...@@ -461,7 +473,7 @@ class ScopedConvolutionDescriptor {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
pads.size(), pads.size(),
dilations.size(), dilations.size(),
platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The size of pads and dilations should be equal. But received size " "The size of pads and dilations should be equal. But received size "
"of pads is %d, size of dilations is %d.", "of pads is %d, size of dilations is %d.",
pads.size(), pads.size(),
...@@ -470,13 +482,13 @@ class ScopedConvolutionDescriptor { ...@@ -470,13 +482,13 @@ class ScopedConvolutionDescriptor {
cudnnDataType_t compute_type = cudnnDataType_t compute_type =
(type == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT; (type == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT;
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnSetConvolutionNdDescriptor(desc_, phi::dynload::cudnnSetConvolutionNdDescriptor(desc_,
pads.size(), pads.size(),
pads.data(), pads.data(),
strides.data(), strides.data(),
dilations.data(), dilations.data(),
CUDNN_CROSS_CORRELATION, CUDNN_CROSS_CORRELATION,
compute_type)); compute_type));
return desc_; return desc_;
} }
...@@ -496,10 +508,12 @@ class ScopedConvolutionDescriptor { ...@@ -496,10 +508,12 @@ class ScopedConvolutionDescriptor {
class ScopedPoolingDescriptor { class ScopedPoolingDescriptor {
public: public:
ScopedPoolingDescriptor() { ScopedPoolingDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreatePoolingDescriptor(&desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnCreatePoolingDescriptor(&desc_));
} }
~ScopedPoolingDescriptor() PADDLE_MAY_THROW { ~ScopedPoolingDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyPoolingDescriptor(desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnDestroyPoolingDescriptor(desc_));
} }
inline cudnnPoolingDescriptor_t descriptor(const PoolingMode& mode, inline cudnnPoolingDescriptor_t descriptor(const PoolingMode& mode,
...@@ -508,7 +522,7 @@ class ScopedPoolingDescriptor { ...@@ -508,7 +522,7 @@ class ScopedPoolingDescriptor {
const std::vector<int>& strides) { const std::vector<int>& strides) {
PADDLE_ENFORCE_EQ(kernel.size(), PADDLE_ENFORCE_EQ(kernel.size(),
pads.size(), pads.size(),
platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The size of kernel and pads should be equal. But " "The size of kernel and pads should be equal. But "
"received size of kernel is %d, size of pads is %d.", "received size of kernel is %d, size of pads is %d.",
kernel.size(), kernel.size(),
...@@ -516,12 +530,12 @@ class ScopedPoolingDescriptor { ...@@ -516,12 +530,12 @@ class ScopedPoolingDescriptor {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
kernel.size(), kernel.size(),
strides.size(), strides.size(),
platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The size of kernel and strides should be equal. But " "The size of kernel and strides should be equal. But "
"received size of kernel is %d, size of strides is %d.", "received size of kernel is %d, size of strides is %d.",
kernel.size(), kernel.size(),
strides.size())); strides.size()));
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetPoolingNdDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetPoolingNdDescriptor(
desc_, desc_,
(GetPoolingMode(mode)), (GetPoolingMode(mode)),
CUDNN_PROPAGATE_NAN, // Always propagate nans. CUDNN_PROPAGATE_NAN, // Always propagate nans.
...@@ -541,18 +555,23 @@ class ScopedSpatialTransformerDescriptor { ...@@ -541,18 +555,23 @@ class ScopedSpatialTransformerDescriptor {
public: public:
ScopedSpatialTransformerDescriptor() { ScopedSpatialTransformerDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnCreateSpatialTransformerDescriptor(&desc_)); phi::dynload::cudnnCreateSpatialTransformerDescriptor(&desc_));
} }
~ScopedSpatialTransformerDescriptor() PADDLE_MAY_THROW { ~ScopedSpatialTransformerDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnDestroySpatialTransformerDescriptor(desc_)); phi::dynload::cudnnDestroySpatialTransformerDescriptor(desc_));
} }
template <typename T> template <typename T>
inline cudnnSpatialTransformerDescriptor_t descriptor(const int nbDims, inline cudnnSpatialTransformerDescriptor_t descriptor(const int nbDims,
const int dimA[]) { const int dimA[]) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetSpatialTransformerNdDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(
desc_, CUDNN_SAMPLER_BILINEAR, CudnnDataType<T>::type, nbDims, dimA)); phi::dynload::cudnnSetSpatialTransformerNdDescriptor(
desc_,
CUDNN_SAMPLER_BILINEAR,
CudnnDataType<T>::type,
nbDims,
dimA));
return desc_; return desc_;
} }
...@@ -565,11 +584,11 @@ class ScopedActivationDescriptor { ...@@ -565,11 +584,11 @@ class ScopedActivationDescriptor {
public: public:
ScopedActivationDescriptor() { ScopedActivationDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnCreateActivationDescriptor(&desc_)); phi::dynload::cudnnCreateActivationDescriptor(&desc_));
} }
~ScopedActivationDescriptor() PADDLE_MAY_THROW { ~ScopedActivationDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnDestroyActivationDescriptor(desc_)); phi::dynload::cudnnDestroyActivationDescriptor(desc_));
} }
template <typename T> template <typename T>
...@@ -602,11 +621,11 @@ class ScopedActivationDescriptor { ...@@ -602,11 +621,11 @@ class ScopedActivationDescriptor {
mode = CUDNN_ACTIVATION_TANH; mode = CUDNN_ACTIVATION_TANH;
break; break;
default: default:
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(phi::errors::Unimplemented(
"Unrecognized CUDNN activation mode: %d.", "Unrecognized CUDNN activation mode: %d.",
static_cast<int>(activation_mode))); static_cast<int>(activation_mode)));
} }
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetActivationDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetActivationDescriptor(
desc_, mode, CUDNN_NOT_PROPAGATE_NAN, relu_ceiling)); desc_, mode, CUDNN_NOT_PROPAGATE_NAN, relu_ceiling));
return desc_; return desc_;
} }
...@@ -620,16 +639,18 @@ class ScopedActivationDescriptor { ...@@ -620,16 +639,18 @@ class ScopedActivationDescriptor {
class ScopedCTCLossDescriptor { class ScopedCTCLossDescriptor {
public: public:
ScopedCTCLossDescriptor() { ScopedCTCLossDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateCTCLossDescriptor(&desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnCreateCTCLossDescriptor(&desc_));
} }
~ScopedCTCLossDescriptor() PADDLE_MAY_THROW { ~ScopedCTCLossDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyCTCLossDescriptor(desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnDestroyCTCLossDescriptor(desc_));
} }
template <typename T> template <typename T>
inline cudnnCTCLossDescriptor_t descriptor() { inline cudnnCTCLossDescriptor_t descriptor() {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnSetCTCLossDescriptor(desc_, CudnnDataType<T>::type)); phi::dynload::cudnnSetCTCLossDescriptor(desc_, CudnnDataType<T>::type));
return desc_; return desc_;
} }
...@@ -639,5 +660,6 @@ class ScopedCTCLossDescriptor { ...@@ -639,5 +660,6 @@ class ScopedCTCLossDescriptor {
}; };
#endif #endif
} // namespace platform } // namespace gpu
} // namespace paddle } // namespace backends
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#ifdef PADDLE_WITH_HIP
#include "paddle/phi/backends/gpu/rocm/miopen_desc.h"
#include "paddle/phi/backends/gpu/rocm/miopen_helper.h"
#else // CUDA
#include "paddle/phi/backends/gpu/cuda/cudnn_desc.h"
#include "paddle/phi/backends/gpu/cuda/cudnn_helper.h"
#endif
#endif
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -23,22 +23,12 @@ ...@@ -23,22 +23,12 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/platform/device/gpu/rocm/miopen_helper.h" #include "paddle/phi/backends/gpu/rocm/miopen_helper.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/core/utils/data_type.h"
namespace phi { namespace phi {
class DenseTensor; namespace backends {
} // namespace phi namespace gpu {
namespace paddle {
namespace platform {
template <typename T>
inline miopenDataType_t ToCudnnDataType(const T& t) {
auto type = framework::ToDataType(t);
return ToCudnnDataType(phi::TransToPhiDataType(type));
}
inline std::vector<int> TransformDimOrder(const std::vector<int>& dims) { inline std::vector<int> TransformDimOrder(const std::vector<int>& dims) {
std::vector<int> transformed_dims(dims.begin(), dims.end()); std::vector<int> transformed_dims(dims.begin(), dims.end());
...@@ -63,7 +53,6 @@ inline std::vector<int> TransformDimOrder(const std::vector<int>& dims) { ...@@ -63,7 +53,6 @@ inline std::vector<int> TransformDimOrder(const std::vector<int>& dims) {
return transformed_dims; return transformed_dims;
} }
template <>
inline miopenDataType_t ToCudnnDataType(const phi::DataType& t) { inline miopenDataType_t ToCudnnDataType(const phi::DataType& t) {
miopenDataType_t type = miopenFloat; miopenDataType_t type = miopenFloat;
switch (t) { switch (t) {
...@@ -86,7 +75,7 @@ class ActivationDescriptor { ...@@ -86,7 +75,7 @@ class ActivationDescriptor {
void operator()(T* t) { void operator()(T* t) {
if (t != nullptr) { if (t != nullptr) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenDestroyActivationDescriptor(t)); phi::dynload::miopenDestroyActivationDescriptor(t));
t = nullptr; t = nullptr;
} }
} }
...@@ -94,12 +83,12 @@ class ActivationDescriptor { ...@@ -94,12 +83,12 @@ class ActivationDescriptor {
ActivationDescriptor() { ActivationDescriptor() {
T* raw_ptr; T* raw_ptr;
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenCreateActivationDescriptor(&raw_ptr)); phi::dynload::miopenCreateActivationDescriptor(&raw_ptr));
desc_.reset(raw_ptr); desc_.reset(raw_ptr);
} }
template <typename T> template <typename T>
void set(miopenActivationMode_t mode, const T& coef) { void set(miopenActivationMode_t mode, const T& coef) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetActivationDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetActivationDescriptor(
desc_.get(), mode, static_cast<double>(coef), 0.0, 0.0)); desc_.get(), mode, static_cast<double>(coef), 0.0, 0.0));
} }
...@@ -116,14 +105,16 @@ class TensorDescriptor { ...@@ -116,14 +105,16 @@ class TensorDescriptor {
struct Deleter { struct Deleter {
void operator()(T* t) { void operator()(T* t) {
if (t != nullptr) { if (t != nullptr) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyTensorDescriptor(t)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenDestroyTensorDescriptor(t));
t = nullptr; t = nullptr;
} }
} }
}; };
TensorDescriptor() { TensorDescriptor() {
T* raw_ptr; T* raw_ptr;
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateTensorDescriptor(&raw_ptr)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenCreateTensorDescriptor(&raw_ptr));
desc_.reset(raw_ptr); desc_.reset(raw_ptr);
} }
T* desc() { return desc_.get(); } T* desc() { return desc_.get(); }
...@@ -140,7 +131,7 @@ class TensorDescriptor { ...@@ -140,7 +131,7 @@ class TensorDescriptor {
if (groups > 1) { if (groups > 1) {
dims_with_group[1] = dims_with_group[1] / groups; dims_with_group[1] = dims_with_group[1] / groups;
} }
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor(
(miopenTensorDescriptor_t)(desc_.get()), (miopenTensorDescriptor_t)(desc_.get()),
ToCudnnDataType(tensor.dtype()), ToCudnnDataType(tensor.dtype()),
static_cast<int>(dims_with_group.size()), static_cast<int>(dims_with_group.size()),
...@@ -150,10 +141,10 @@ class TensorDescriptor { ...@@ -150,10 +141,10 @@ class TensorDescriptor {
void set(const phi::DenseTensor& tensor, const miopenTensorFormat_t format) { void set(const phi::DenseTensor& tensor, const miopenTensorFormat_t format) {
const int groups = 1; const int groups = 1;
PADDLE_ENFORCE_EQ(format, PADDLE_ENFORCE_EQ(
MIOPEN_TENSOR_NCHW, format,
platform::errors::InvalidArgument( MIOPEN_TENSOR_NCHW,
"format should ONLY be NCHW in MIOPEN.")); phi::errors::InvalidArgument("format should ONLY be NCHW in MIOPEN."));
auto dims = phi::vectorize<int>(tensor.dims()); auto dims = phi::vectorize<int>(tensor.dims());
std::vector<int> strides(dims.size()); std::vector<int> strides(dims.size());
strides[dims.size() - 1] = 1; strides[dims.size() - 1] = 1;
...@@ -164,7 +155,7 @@ class TensorDescriptor { ...@@ -164,7 +155,7 @@ class TensorDescriptor {
if (groups > 1) { if (groups > 1) {
dims_with_group[1] = dims_with_group[1] / groups; dims_with_group[1] = dims_with_group[1] / groups;
} }
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor(
(miopenTensorDescriptor_t)(desc_.get()), (miopenTensorDescriptor_t)(desc_.get()),
ToCudnnDataType(tensor.dtype()), ToCudnnDataType(tensor.dtype()),
static_cast<int>(dims_with_group.size()), static_cast<int>(dims_with_group.size()),
...@@ -182,14 +173,16 @@ class FilterDescriptor { ...@@ -182,14 +173,16 @@ class FilterDescriptor {
struct Deleter { struct Deleter {
void operator()(T* t) { void operator()(T* t) {
if (t != nullptr) { if (t != nullptr) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyTensorDescriptor(t)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenDestroyTensorDescriptor(t));
t = nullptr; t = nullptr;
} }
} }
}; };
FilterDescriptor() { FilterDescriptor() {
T* raw_ptr; T* raw_ptr;
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateTensorDescriptor(&raw_ptr)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenCreateTensorDescriptor(&raw_ptr));
desc_.reset(raw_ptr); desc_.reset(raw_ptr);
} }
T* desc() { return desc_.get(); } T* desc() { return desc_.get(); }
...@@ -198,10 +191,10 @@ class FilterDescriptor { ...@@ -198,10 +191,10 @@ class FilterDescriptor {
void set(const phi::DenseTensor& tensor, void set(const phi::DenseTensor& tensor,
const miopenTensorFormat_t format, const miopenTensorFormat_t format,
const int groups = 1) { const int groups = 1) {
PADDLE_ENFORCE_EQ(format, PADDLE_ENFORCE_EQ(
MIOPEN_TENSOR_NCHW, format,
platform::errors::InvalidArgument( MIOPEN_TENSOR_NCHW,
"format should ONLY be NCHW in MIOPEN.")); phi::errors::InvalidArgument("format should ONLY be NCHW in MIOPEN."));
auto dims = phi::vectorize<int>(tensor.dims()); auto dims = phi::vectorize<int>(tensor.dims());
std::vector<int> strides(dims.size()); std::vector<int> strides(dims.size());
strides[dims.size() - 1] = 1; strides[dims.size() - 1] = 1;
...@@ -212,7 +205,7 @@ class FilterDescriptor { ...@@ -212,7 +205,7 @@ class FilterDescriptor {
if (groups > 1) { if (groups > 1) {
dims_with_group[1] = dims_with_group[1] / groups; dims_with_group[1] = dims_with_group[1] / groups;
} }
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor(
(miopenTensorDescriptor_t)(desc_.get()), (miopenTensorDescriptor_t)(desc_.get()),
ToCudnnDataType(tensor.dtype()), ToCudnnDataType(tensor.dtype()),
static_cast<int>(dims_with_group.size()), static_cast<int>(dims_with_group.size()),
...@@ -231,7 +224,7 @@ class ConvolutionDescriptor { ...@@ -231,7 +224,7 @@ class ConvolutionDescriptor {
void operator()(T* t) { void operator()(T* t) {
if (t != nullptr) { if (t != nullptr) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenDestroyConvolutionDescriptor(t)); phi::dynload::miopenDestroyConvolutionDescriptor(t));
t = nullptr; t = nullptr;
} }
} }
...@@ -239,7 +232,7 @@ class ConvolutionDescriptor { ...@@ -239,7 +232,7 @@ class ConvolutionDescriptor {
ConvolutionDescriptor() { ConvolutionDescriptor() {
T* raw_ptr; T* raw_ptr;
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenCreateConvolutionDescriptor(&raw_ptr)); phi::dynload::miopenCreateConvolutionDescriptor(&raw_ptr));
desc_.reset(raw_ptr); desc_.reset(raw_ptr);
} }
T* desc() { return desc_.get(); } T* desc() { return desc_.get(); }
...@@ -251,21 +244,21 @@ class ConvolutionDescriptor { ...@@ -251,21 +244,21 @@ class ConvolutionDescriptor {
const std::vector<int>& dilations, const std::vector<int>& dilations,
bool allow_tf32, bool allow_tf32,
const int groups = 1) { const int groups = 1) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenInitConvolutionNdDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenInitConvolutionNdDescriptor(
(miopenConvolutionDescriptor_t)desc_.get(), (miopenConvolutionDescriptor_t)desc_.get(),
static_cast<int>(pads.size()), static_cast<int>(pads.size()),
const_cast<int*>(pads.data()), const_cast<int*>(pads.data()),
const_cast<int*>(strides.data()), const_cast<int*>(strides.data()),
const_cast<int*>(dilations.data()), const_cast<int*>(dilations.data()),
miopenConvolution)); miopenConvolution));
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetConvolutionGroupCount(
platform::dynload::miopenSetConvolutionGroupCount( (miopenConvolutionDescriptor_t)desc_.get(), groups));
(miopenConvolutionDescriptor_t)desc_.get(), groups));
} }
private: private:
std::unique_ptr<T, Deleter> desc_; std::unique_ptr<T, Deleter> desc_;
}; };
} // namespace platform } // namespace gpu
} // namespace paddle } // namespace backends
} // namespace phi
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
...@@ -17,20 +17,24 @@ limitations under the License. */ ...@@ -17,20 +17,24 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/operator.h" #include "paddle/phi/backends/dynload/miopen.h"
#include "paddle/fluid/platform/device/gpu/gpu_types.h" #include "paddle/phi/common/bfloat16.h"
#include "paddle/fluid/platform/dynload/miopen.h" #include "paddle/phi/common/float16.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/phi/common/place.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/fluid/platform/macros.h" #include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h"
#include "paddle/phi/core/macros.h"
// MIOPEN do not have epslion definition // MIOPEN do not have epslion definition
#define CUDNN_BN_MIN_EPSILON 1e-05 #define CUDNN_BN_MIN_EPSILON 1e-05
DECLARE_bool(cudnn_deterministic); DECLARE_bool(cudnn_deterministic);
namespace paddle { namespace phi {
namespace platform { namespace backends {
namespace gpu {
inline const char* miopenGetErrorString(miopenStatus_t status) { inline const char* miopenGetErrorString(miopenStatus_t status) {
switch (status) { switch (status) {
case miopenStatusSuccess: case miopenStatusSuccess:
...@@ -95,7 +99,7 @@ inline miopenPoolingMode_t GetPoolingMode(const PoolingMode& mode) { ...@@ -95,7 +99,7 @@ inline miopenPoolingMode_t GetPoolingMode(const PoolingMode& mode) {
return miopenPoolingMax; return miopenPoolingMax;
default: default:
PADDLE_THROW( PADDLE_THROW(
platform::errors::Unimplemented("Unexpected MIOPEN pooling mode.")); phi::errors::Unimplemented("Unexpected MIOPEN pooling mode."));
} }
} }
...@@ -115,7 +119,7 @@ inline ActivationMode StringToActivationMode(const std::string& str) { ...@@ -115,7 +119,7 @@ inline ActivationMode StringToActivationMode(const std::string& str) {
} else if (str == "bandpass") { } else if (str == "bandpass") {
return ActivationMode::kBandPass; return ActivationMode::kBandPass;
} else { } else {
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(phi::errors::Unimplemented(
"Unknown MIOPEN activation string: %s.", str)); "Unknown MIOPEN activation string: %s.", str));
} }
} }
...@@ -124,7 +128,7 @@ template <typename T> ...@@ -124,7 +128,7 @@ template <typename T>
class CudnnDataType; class CudnnDataType;
template <> template <>
class CudnnDataType<float16> { class CudnnDataType<phi::dtype::float16> {
public: public:
static const miopenDataType_t type = miopenHalf; static const miopenDataType_t type = miopenHalf;
// The scaling param type is float for HALF and FLOAT tensors // The scaling param type is float for HALF and FLOAT tensors
...@@ -141,7 +145,7 @@ class CudnnDataType<float16> { ...@@ -141,7 +145,7 @@ class CudnnDataType<float16> {
}; };
template <> template <>
class CudnnDataType<bfloat16> { class CudnnDataType<phi::dtype::bfloat16> {
public: public:
static const miopenDataType_t type = miopenBFloat16; static const miopenDataType_t type = miopenBFloat16;
// The scaling param type is float for HALF and FLOAT tensors // The scaling param type is float for HALF and FLOAT tensors
...@@ -184,7 +188,7 @@ inline miopenTensorFormat_t GetCudnnTensorFormat(const DataLayout& order) { ...@@ -184,7 +188,7 @@ inline miopenTensorFormat_t GetCudnnTensorFormat(const DataLayout& order) {
case DataLayout::kNDHWC: case DataLayout::kNDHWC:
return MIOPEN_TENSOR_NHWC; return MIOPEN_TENSOR_NHWC;
default: default:
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(phi::errors::Unimplemented(
"MIOPEN has no equivalent dataLayout for input order.")); "MIOPEN has no equivalent dataLayout for input order."));
} }
return MIOPEN_TENSOR_NCHW; return MIOPEN_TENSOR_NCHW;
...@@ -193,10 +197,12 @@ inline miopenTensorFormat_t GetCudnnTensorFormat(const DataLayout& order) { ...@@ -193,10 +197,12 @@ inline miopenTensorFormat_t GetCudnnTensorFormat(const DataLayout& order) {
class ScopedTensorDescriptor { class ScopedTensorDescriptor {
public: public:
ScopedTensorDescriptor() { ScopedTensorDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateTensorDescriptor(&desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenCreateTensorDescriptor(&desc_));
} }
~ScopedTensorDescriptor() PADDLE_MAY_THROW { ~ScopedTensorDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyTensorDescriptor(desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenDestroyTensorDescriptor(desc_));
} }
inline miopenTensorDescriptor_t descriptor(const miopenTensorFormat_t format, inline miopenTensorDescriptor_t descriptor(const miopenTensorFormat_t format,
...@@ -217,19 +223,19 @@ class ScopedTensorDescriptor { ...@@ -217,19 +223,19 @@ class ScopedTensorDescriptor {
} }
// MIOPEN ONLY support data layout of NCHW // MIOPEN ONLY support data layout of NCHW
PADDLE_ENFORCE_EQ(format, PADDLE_ENFORCE_EQ(
MIOPEN_TENSOR_NCHW, format,
platform::errors::InvalidArgument( MIOPEN_TENSOR_NCHW,
"format should ONLY be NCHW in MIOPEN.")); phi::errors::InvalidArgument("format should ONLY be NCHW in MIOPEN."));
if (dims.size() == 4) { if (dims.size() == 4) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor(
desc_, desc_,
type, type,
dims_with_group.size(), dims_with_group.size(),
const_cast<int*>(dims_with_group.data()), const_cast<int*>(dims_with_group.data()),
const_cast<int*>(strides.data()))); const_cast<int*>(strides.data())));
} else if (dims.size() == 5) { } else if (dims.size() == 5) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor(
desc_, desc_,
type, type,
dims_with_group.size(), dims_with_group.size(),
...@@ -250,12 +256,12 @@ class ScopedTensorDescriptor { ...@@ -250,12 +256,12 @@ class ScopedTensorDescriptor {
inline miopenTensorDescriptor_t descriptor(const miopenDataType_t miopen_type, inline miopenTensorDescriptor_t descriptor(const miopenDataType_t miopen_type,
const std::vector<int>& dim, const std::vector<int>& dim,
const std::vector<int>& stride) { const std::vector<int>& stride) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor(
dynload::miopenSetTensorDescriptor(desc_, desc_,
miopen_type, miopen_type,
dim.size(), dim.size(),
const_cast<int*>(dim.data()), const_cast<int*>(dim.data()),
const_cast<int*>(stride.data()))); const_cast<int*>(stride.data())));
return desc_; return desc_;
} }
...@@ -275,14 +281,16 @@ class ScopedTensorDescriptor { ...@@ -275,14 +281,16 @@ class ScopedTensorDescriptor {
class ScopedDropoutDescriptor { class ScopedDropoutDescriptor {
public: public:
ScopedDropoutDescriptor() { ScopedDropoutDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateDropoutDescriptor(&desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenCreateDropoutDescriptor(&desc_));
} }
~ScopedDropoutDescriptor() PADDLE_MAY_THROW { ~ScopedDropoutDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyDropoutDescriptor(desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenDestroyDropoutDescriptor(desc_));
} }
inline miopenDropoutDescriptor_t descriptor(const miopenHandle_t& handle, inline miopenDropoutDescriptor_t descriptor(const miopenHandle_t& handle,
const platform::Place& place, const phi::Place& place,
bool initialized, bool initialized,
float dropout_prob_, float dropout_prob_,
phi::DenseTensor* dropout_state_, phi::DenseTensor* dropout_state_,
...@@ -290,42 +298,42 @@ class ScopedDropoutDescriptor { ...@@ -290,42 +298,42 @@ class ScopedDropoutDescriptor {
size_t state_size) { size_t state_size) {
if (dropout_state_ == nullptr) { // for no dropout or test if (dropout_state_ == nullptr) { // for no dropout or test
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenSetDropoutDescriptor(desc_, phi::dynload::miopenSetDropoutDescriptor(desc_,
handle, handle,
0 /* dropout */, 0 /* dropout */,
nullptr, nullptr,
0 /* state_size */, 0 /* state_size */,
0 /* seed */, 0 /* seed */,
false, false,
false, false,
MIOPEN_RNG_PSEUDO_XORWOW)); MIOPEN_RNG_PSEUDO_XORWOW));
return desc_; return desc_;
} }
auto* dropout_state_data = dropout_state_->data<uint8_t>(); auto* dropout_state_data = dropout_state_->data<uint8_t>();
if (!initialized) { if (!initialized) {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenSetDropoutDescriptor(desc_, phi::dynload::miopenSetDropoutDescriptor(desc_,
handle, handle,
dropout_prob_, dropout_prob_,
dropout_state_data, dropout_state_data,
state_size, state_size,
seed, seed,
false, false,
false, false,
MIOPEN_RNG_PSEUDO_XORWOW)); MIOPEN_RNG_PSEUDO_XORWOW));
} else { } else {
auto dropout_state_dims = dropout_state_->dims(); auto dropout_state_dims = dropout_state_->dims();
state_size = dropout_state_dims[0]; state_size = dropout_state_dims[0];
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenRestoreDropoutDescriptor(
dynload::miopenRestoreDropoutDescriptor(desc_, desc_,
handle, handle,
dropout_prob_, dropout_prob_,
dropout_state_data, dropout_state_data,
state_size, state_size,
0, 0,
false, false,
false, false,
MIOPEN_RNG_PSEUDO_XORWOW)); MIOPEN_RNG_PSEUDO_XORWOW));
} }
return desc_; return desc_;
} }
...@@ -339,10 +347,10 @@ class ScopedDropoutDescriptor { ...@@ -339,10 +347,10 @@ class ScopedDropoutDescriptor {
class ScopedRNNDescriptor { class ScopedRNNDescriptor {
public: public:
ScopedRNNDescriptor() { ScopedRNNDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateRNNDescriptor(&desc_)); PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenCreateRNNDescriptor(&desc_));
} }
~ScopedRNNDescriptor() PADDLE_MAY_THROW { ~ScopedRNNDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyRNNDescriptor(desc_)); PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenDestroyRNNDescriptor(desc_));
} }
inline miopenRNNDescriptor_t desc() { return desc_; } inline miopenRNNDescriptor_t desc() { return desc_; }
...@@ -355,10 +363,12 @@ class ScopedRNNDescriptor { ...@@ -355,10 +363,12 @@ class ScopedRNNDescriptor {
class ScopedFilterDescriptor { class ScopedFilterDescriptor {
public: public:
ScopedFilterDescriptor() { ScopedFilterDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateTensorDescriptor(&desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenCreateTensorDescriptor(&desc_));
} }
~ScopedFilterDescriptor() PADDLE_MAY_THROW { ~ScopedFilterDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyTensorDescriptor(desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenDestroyTensorDescriptor(desc_));
} }
inline miopenTensorDescriptor_t descriptor(const miopenTensorFormat_t format, inline miopenTensorDescriptor_t descriptor(const miopenTensorFormat_t format,
...@@ -379,7 +389,7 @@ class ScopedFilterDescriptor { ...@@ -379,7 +389,7 @@ class ScopedFilterDescriptor {
for (int k = kernel_with_group.size() - 2; k >= 0; k--) { for (int k = kernel_with_group.size() - 2; k >= 0; k--) {
stride_dim[k] = stride_dim[k + 1] * kernel_with_group[k + 1]; stride_dim[k] = stride_dim[k + 1] * kernel_with_group[k + 1];
} }
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor(
desc_, desc_,
type, type,
kernel_with_group.size(), kernel_with_group.size(),
...@@ -407,11 +417,11 @@ class ScopedConvolutionDescriptor { ...@@ -407,11 +417,11 @@ class ScopedConvolutionDescriptor {
public: public:
ScopedConvolutionDescriptor() { ScopedConvolutionDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenCreateConvolutionDescriptor(&desc_)); phi::dynload::miopenCreateConvolutionDescriptor(&desc_));
} }
~ScopedConvolutionDescriptor() PADDLE_MAY_THROW { ~ScopedConvolutionDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenDestroyConvolutionDescriptor(desc_)); phi::dynload::miopenDestroyConvolutionDescriptor(desc_));
} }
inline miopenConvolutionDescriptor_t descriptor( inline miopenConvolutionDescriptor_t descriptor(
...@@ -421,7 +431,7 @@ class ScopedConvolutionDescriptor { ...@@ -421,7 +431,7 @@ class ScopedConvolutionDescriptor {
const std::vector<int>& dilations) { const std::vector<int>& dilations) {
PADDLE_ENFORCE_EQ(pads.size(), PADDLE_ENFORCE_EQ(pads.size(),
strides.size(), strides.size(),
platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The size of pads and strides should be equal. But " "The size of pads and strides should be equal. But "
"received size of pads is %d, size of strides is %d.", "received size of pads is %d, size of strides is %d.",
pads.size(), pads.size(),
...@@ -429,12 +439,12 @@ class ScopedConvolutionDescriptor { ...@@ -429,12 +439,12 @@ class ScopedConvolutionDescriptor {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
pads.size(), pads.size(),
dilations.size(), dilations.size(),
platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The size of pads and dilations should be equal. But received size " "The size of pads and dilations should be equal. But received size "
"of pads is %d, size of dilations is %d.", "of pads is %d, size of dilations is %d.",
pads.size(), pads.size(),
dilations.size())); dilations.size()));
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenInitConvolutionNdDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenInitConvolutionNdDescriptor(
desc_, desc_,
pads.size(), pads.size(),
const_cast<int*>(pads.data()), const_cast<int*>(pads.data()),
...@@ -460,10 +470,12 @@ class ScopedConvolutionDescriptor { ...@@ -460,10 +470,12 @@ class ScopedConvolutionDescriptor {
class ScopedPoolingDescriptor { class ScopedPoolingDescriptor {
public: public:
ScopedPoolingDescriptor() { ScopedPoolingDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreatePoolingDescriptor(&desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenCreatePoolingDescriptor(&desc_));
} }
~ScopedPoolingDescriptor() PADDLE_MAY_THROW { ~ScopedPoolingDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyPoolingDescriptor(desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenDestroyPoolingDescriptor(desc_));
} }
inline miopenPoolingDescriptor_t descriptor(const PoolingMode& mode, inline miopenPoolingDescriptor_t descriptor(const PoolingMode& mode,
...@@ -472,7 +484,7 @@ class ScopedPoolingDescriptor { ...@@ -472,7 +484,7 @@ class ScopedPoolingDescriptor {
const std::vector<int>& strides) { const std::vector<int>& strides) {
PADDLE_ENFORCE_EQ(kernel.size(), PADDLE_ENFORCE_EQ(kernel.size(),
pads.size(), pads.size(),
platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The size of kernel and pads should be equal. But " "The size of kernel and pads should be equal. But "
"received size of kernel is %d, size of pads is %d.", "received size of kernel is %d, size of pads is %d.",
kernel.size(), kernel.size(),
...@@ -480,12 +492,12 @@ class ScopedPoolingDescriptor { ...@@ -480,12 +492,12 @@ class ScopedPoolingDescriptor {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
kernel.size(), kernel.size(),
strides.size(), strides.size(),
platform::errors::InvalidArgument( phi::errors::InvalidArgument(
"The size of kernel and strides should be equal. But " "The size of kernel and strides should be equal. But "
"received size of kernel is %d, size of strides is %d.", "received size of kernel is %d, size of strides is %d.",
kernel.size(), kernel.size(),
strides.size())); strides.size()));
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetNdPoolingDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetNdPoolingDescriptor(
desc_, desc_,
GetPoolingMode(mode), GetPoolingMode(mode),
kernel.size(), kernel.size(),
...@@ -504,11 +516,11 @@ class ScopedActivationDescriptor { ...@@ -504,11 +516,11 @@ class ScopedActivationDescriptor {
public: public:
ScopedActivationDescriptor() { ScopedActivationDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenCreateActivationDescriptor(&desc_)); phi::dynload::miopenCreateActivationDescriptor(&desc_));
} }
~ScopedActivationDescriptor() PADDLE_MAY_THROW { ~ScopedActivationDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS( PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenDestroyActivationDescriptor(desc_)); phi::dynload::miopenDestroyActivationDescriptor(desc_));
} }
template <typename T> template <typename T>
...@@ -539,11 +551,11 @@ class ScopedActivationDescriptor { ...@@ -539,11 +551,11 @@ class ScopedActivationDescriptor {
mode = miopenActivationTANH; mode = miopenActivationTANH;
break; break;
default: default:
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(phi::errors::Unimplemented(
"Unrecognized MIOPEN activation mode: %d.", "Unrecognized MIOPEN activation mode: %d.",
static_cast<int>(activation_mode))); static_cast<int>(activation_mode)));
} }
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetActivationDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetActivationDescriptor(
desc_, mode, relu_ceiling, 0.0, 0.0)); desc_, mode, relu_ceiling, 0.0, 0.0));
return desc_; return desc_;
} }
...@@ -556,15 +568,17 @@ class ScopedActivationDescriptor { ...@@ -556,15 +568,17 @@ class ScopedActivationDescriptor {
class ScopedCTCLossDescriptor { class ScopedCTCLossDescriptor {
public: public:
ScopedCTCLossDescriptor() { ScopedCTCLossDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateCTCLossDescriptor(&desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenCreateCTCLossDescriptor(&desc_));
} }
~ScopedCTCLossDescriptor() PADDLE_MAY_THROW { ~ScopedCTCLossDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyCTCLossDescriptor(desc_)); PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenDestroyCTCLossDescriptor(desc_));
} }
template <typename T> template <typename T>
inline miopenCTCLossDescriptor_t descriptor() { inline miopenCTCLossDescriptor_t descriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetCTCLossDescriptor( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetCTCLossDescriptor(
desc_, CudnnDataType<T>::type, 0, false)); desc_, CudnnDataType<T>::type, 0, false));
return desc_; return desc_;
} }
...@@ -574,5 +588,6 @@ class ScopedCTCLossDescriptor { ...@@ -574,5 +588,6 @@ class ScopedCTCLossDescriptor {
DISABLE_COPY_AND_ASSIGN(ScopedCTCLossDescriptor); DISABLE_COPY_AND_ASSIGN(ScopedCTCLossDescriptor);
}; };
} // namespace platform } // namespace gpu
} // namespace paddle } // namespace backends
} // namespace phi
...@@ -14,9 +14,9 @@ limitations under the License. */ ...@@ -14,9 +14,9 @@ limitations under the License. */
#include "paddle/phi/kernels/funcs/cross_entropy.h" #include "paddle/phi/kernels/funcs/cross_entropy.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_device_function.h" #include "paddle/phi/backends/gpu/gpu_device_function.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_primitives.h"
#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/core/utils/data_type.h"
#include "paddle/phi/kernels/funcs/math.h" #include "paddle/phi/kernels/funcs/math.h"
......
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
#include "paddle/fluid/operators/layout_utils.h" #include "paddle/fluid/operators/layout_utils.h"
#include "paddle/fluid/operators/norm_utils.cu.h" #include "paddle/fluid/operators/norm_utils.cu.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/fluid/platform/flags.h" #include "paddle/fluid/platform/flags.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/common/layout.h" #include "paddle/phi/common/layout.h"
#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
...@@ -37,7 +37,7 @@ DECLARE_bool(cudnn_batchnorm_spatial_persistent); ...@@ -37,7 +37,7 @@ DECLARE_bool(cudnn_batchnorm_spatial_persistent);
namespace phi { namespace phi {
template <typename T> template <typename T>
using CudnnDataType = paddle::platform::CudnnDataType<T>; using CudnnDataType = phi::backends::gpu::CudnnDataType<T>;
template <typename T> template <typename T>
using BatchNormParamType = typename CudnnDataType<T>::BatchNormParamType; using BatchNormParamType = typename CudnnDataType<T>::BatchNormParamType;
...@@ -644,7 +644,7 @@ void BatchNormGradRawKernel(const Context &ctx, ...@@ -644,7 +644,7 @@ void BatchNormGradRawKernel(const Context &ctx,
C, C,
scale.dims()[0])); scale.dims()[0]));
auto dtype = paddle::platform::CudnnDataType<T>::type; auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
auto compute_format = auto compute_format =
data_layout == DataLayout::kNHWC ? DataLayout::kNHWC : DataLayout::kNCHW; data_layout == DataLayout::kNHWC ? DataLayout::kNHWC : DataLayout::kNCHW;
......
...@@ -22,9 +22,9 @@ namespace cub = hipcub; ...@@ -22,9 +22,9 @@ namespace cub = hipcub;
#include "paddle/fluid/operators/layout_utils.h" #include "paddle/fluid/operators/layout_utils.h"
#include "paddle/fluid/operators/norm_utils.cu.h" #include "paddle/fluid/operators/norm_utils.cu.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/fluid/platform/flags.h" #include "paddle/fluid/platform/flags.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/common/layout.h" #include "paddle/phi/common/layout.h"
#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
...@@ -45,7 +45,7 @@ DECLARE_bool(cudnn_batchnorm_spatial_persistent); ...@@ -45,7 +45,7 @@ DECLARE_bool(cudnn_batchnorm_spatial_persistent);
namespace phi { namespace phi {
template <typename T> template <typename T>
using CudnnDataType = paddle::platform::CudnnDataType<T>; using CudnnDataType = phi::backends::gpu::CudnnDataType<T>;
template <typename T> template <typename T>
using BatchNormParamType = typename CudnnDataType<T>::BatchNormParamType; using BatchNormParamType = typename CudnnDataType<T>::BatchNormParamType;
...@@ -603,7 +603,7 @@ void BatchNormKernel(const Context &ctx, ...@@ -603,7 +603,7 @@ void BatchNormKernel(const Context &ctx,
int N, C, H, W, D; int N, C, H, W, D;
phi::funcs::ExtractNCWHD(x_dims, data_layout, &N, &C, &H, &W, &D); phi::funcs::ExtractNCWHD(x_dims, data_layout, &N, &C, &H, &W, &D);
auto dtype = paddle::platform::CudnnDataType<T>::type; auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
auto compute_format = auto compute_format =
......
...@@ -23,8 +23,8 @@ namespace cub = hipcub; ...@@ -23,8 +23,8 @@ namespace cub = hipcub;
#endif #endif
#include "paddle/fluid/operators/math/softmax.h" #include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_device_function.h" #include "paddle/phi/backends/gpu/gpu_device_function.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/core/tensor_utils.h"
......
...@@ -23,8 +23,8 @@ namespace cub = hipcub; ...@@ -23,8 +23,8 @@ namespace cub = hipcub;
#endif #endif
#include "paddle/fluid/operators/math/softmax.h" #include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_device_function.h" #include "paddle/phi/backends/gpu/gpu_device_function.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/core/tensor_utils.h"
...@@ -772,10 +772,10 @@ static void SoftmaxWithCrossEntropySoftLabel(const GPUContext& dev_ctx, ...@@ -772,10 +772,10 @@ static void SoftmaxWithCrossEntropySoftLabel(const GPUContext& dev_ctx,
: MIOPEN_SOFTMAX_MODE_CHANNEL; : MIOPEN_SOFTMAX_MODE_CHANNEL;
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxForward_V2( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxForward_V2(
handle, handle,
paddle::platform::CudnnDataType<T>::kOne(), phi::backends::gpu::CudnnDataType<T>::kOne(),
descp, descp,
logits_data, logits_data,
paddle::platform::CudnnDataType<T>::kZero(), phi::backends::gpu::CudnnDataType<T>::kZero(),
descp, descp,
softmax_data, softmax_data,
MIOPEN_SOFTMAX_LOG, MIOPEN_SOFTMAX_LOG,
...@@ -787,10 +787,10 @@ static void SoftmaxWithCrossEntropySoftLabel(const GPUContext& dev_ctx, ...@@ -787,10 +787,10 @@ static void SoftmaxWithCrossEntropySoftLabel(const GPUContext& dev_ctx,
handle, handle,
CUDNN_SOFTMAX_LOG, CUDNN_SOFTMAX_LOG,
mode, mode,
paddle::platform::CudnnDataType<T>::kOne(), phi::backends::gpu::CudnnDataType<T>::kOne(),
descp, descp,
logits_data, logits_data,
paddle::platform::CudnnDataType<T>::kZero(), phi::backends::gpu::CudnnDataType<T>::kZero(),
descp, descp,
softmax_data)); softmax_data));
#endif #endif
...@@ -1206,10 +1206,10 @@ static void SoftmaxWithCrossEntropyHardLabel(const GPUContext& dev_ctx, ...@@ -1206,10 +1206,10 @@ static void SoftmaxWithCrossEntropyHardLabel(const GPUContext& dev_ctx,
: MIOPEN_SOFTMAX_MODE_CHANNEL; : MIOPEN_SOFTMAX_MODE_CHANNEL;
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxForward_V2( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxForward_V2(
handle, handle,
paddle::platform::CudnnDataType<T>::kOne(), phi::backends::gpu::CudnnDataType<T>::kOne(),
descp, descp,
logits_data, logits_data,
paddle::platform::CudnnDataType<T>::kZero(), phi::backends::gpu::CudnnDataType<T>::kZero(),
descp, descp,
softmax_data, softmax_data,
MIOPEN_SOFTMAX_LOG, MIOPEN_SOFTMAX_LOG,
...@@ -1221,10 +1221,10 @@ static void SoftmaxWithCrossEntropyHardLabel(const GPUContext& dev_ctx, ...@@ -1221,10 +1221,10 @@ static void SoftmaxWithCrossEntropyHardLabel(const GPUContext& dev_ctx,
handle, handle,
CUDNN_SOFTMAX_LOG, CUDNN_SOFTMAX_LOG,
mode, mode,
paddle::platform::CudnnDataType<T>::kOne(), phi::backends::gpu::CudnnDataType<T>::kOne(),
descp, descp,
logits_data, logits_data,
paddle::platform::CudnnDataType<T>::kZero(), phi::backends::gpu::CudnnDataType<T>::kZero(),
descp, descp,
softmax_data)); softmax_data));
#endif #endif
......
...@@ -26,12 +26,12 @@ ...@@ -26,12 +26,12 @@
namespace cub = hipcub; namespace cub = hipcub;
#endif #endif
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_dnn.h"
namespace phi { namespace phi {
template <typename T> template <typename T>
using CudnnDataType = paddle::platform::CudnnDataType<T>; using CudnnDataType = phi::backends::gpu::CudnnDataType<T>;
template <typename T> template <typename T>
using BatchNormParamType = typename CudnnDataType<T>::BatchNormParamType; using BatchNormParamType = typename CudnnDataType<T>::BatchNormParamType;
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#pragma once #pragma once
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/common/place.h" #include "paddle/phi/common/place.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
...@@ -64,7 +64,7 @@ class RNNDescriptors { ...@@ -64,7 +64,7 @@ class RNNDescriptors {
size_t *reserve_size, size_t *reserve_size,
DenseTensor *dropout_state) { DenseTensor *dropout_state) {
int numDirections = is_bidirec_ ? 2 : 1; int numDirections = is_bidirec_ ? 2 : 1;
gpuDnnDataType_t cudnn_type = paddle::platform::CudnnDataType<T>::type; gpuDnnDataType_t cudnn_type = phi::backends::gpu::CudnnDataType<T>::type;
// ------------------- cudnn x, y descriptors --------------------- // ------------------- cudnn x, y descriptors ---------------------
std::vector<int> dims_x = {batch_size_, input_size_, 1}; std::vector<int> dims_x = {batch_size_, input_size_, 1};
std::vector<int> strides_x = {input_size_, 1, 1}; std::vector<int> strides_x = {input_size_, 1, 1};
...@@ -179,7 +179,7 @@ class RNNDescriptors { ...@@ -179,7 +179,7 @@ class RNNDescriptors {
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"The cudnn rnn and setting weight size should be same.")); "The cudnn rnn and setting weight size should be same."));
// ------------------- cudnn weight descriptors --------------------- // ------------------- cudnn weight descriptors ---------------------
auto layout = paddle::platform::DataLayout::kNCHW; auto layout = phi::backends::gpu::DataLayout::kNCHW;
int dim_tmp = weights_size_ / sizeof(T); int dim_tmp = weights_size_ / sizeof(T);
std::vector<int> dim_w = {dim_tmp, 1, 1}; std::vector<int> dim_w = {dim_tmp, 1, 1};
weight_desc_.descriptor<T>(layout, dim_w); weight_desc_.descriptor<T>(layout, dim_w);
...@@ -250,19 +250,19 @@ class RNNDescriptors { ...@@ -250,19 +250,19 @@ class RNNDescriptors {
std::vector<cudnnTensorDescriptor_t> y_descs_; std::vector<cudnnTensorDescriptor_t> y_descs_;
#endif #endif
paddle::platform::ScopedTensorDescriptor x_desc_; phi::backends::gpu::ScopedTensorDescriptor x_desc_;
paddle::platform::ScopedTensorDescriptor y_desc_; phi::backends::gpu::ScopedTensorDescriptor y_desc_;
#if defined(PADDLE_WITH_CUDA) && CUDNN_VERSION >= 7201 #if defined(PADDLE_WITH_CUDA) && CUDNN_VERSION >= 7201
paddle::platform::ScopedRNNTensorDescriptor x_seq_desc_; phi::backends::gpu::ScopedRNNTensorDescriptor x_seq_desc_;
paddle::platform::ScopedRNNTensorDescriptor y_seq_desc_; phi::backends::gpu::ScopedRNNTensorDescriptor y_seq_desc_;
#endif #endif
paddle::platform::ScopedTensorDescriptor init_h_desc_; phi::backends::gpu::ScopedTensorDescriptor init_h_desc_;
paddle::platform::ScopedTensorDescriptor init_c_desc_; phi::backends::gpu::ScopedTensorDescriptor init_c_desc_;
paddle::platform::ScopedTensorDescriptor last_h_desc_; phi::backends::gpu::ScopedTensorDescriptor last_h_desc_;
paddle::platform::ScopedTensorDescriptor last_c_desc_; phi::backends::gpu::ScopedTensorDescriptor last_c_desc_;
paddle::platform::ScopedDropoutDescriptor dropout_desc_; phi::backends::gpu::ScopedDropoutDescriptor dropout_desc_;
paddle::platform::ScopedFilterDescriptor weight_desc_; phi::backends::gpu::ScopedFilterDescriptor weight_desc_;
paddle::platform::ScopedRNNDescriptor rnn_desc_; phi::backends::gpu::ScopedRNNDescriptor rnn_desc_;
}; };
template <typename T, typename Type> template <typename T, typename Type>
......
...@@ -31,15 +31,15 @@ namespace cub = hipcub; ...@@ -31,15 +31,15 @@ namespace cub = hipcub;
#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h" #include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h"
#endif #endif
#include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/common/layout.h" #include "paddle/phi/common/layout.h"
#include "paddle/phi/kernels/funcs/norm_utils.h" #include "paddle/phi/kernels/funcs/norm_utils.h"
namespace phi { namespace phi {
template <typename T> template <typename T>
using CudnnDataType = paddle::platform::CudnnDataType<T>; using CudnnDataType = phi::backends::gpu::CudnnDataType<T>;
template <typename T> template <typename T>
using BatchNormParamType = typename CudnnDataType<T>::BatchNormParamType; using BatchNormParamType = typename CudnnDataType<T>::BatchNormParamType;
......
...@@ -15,11 +15,11 @@ ...@@ -15,11 +15,11 @@
#ifndef PADDLE_WITH_HIP #ifndef PADDLE_WITH_HIP
#include "paddle/phi/kernels/affine_grid_grad_kernel.h" #include "paddle/phi/kernels/affine_grid_grad_kernel.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_device_function.h" #include "paddle/phi/backends/gpu/gpu_device_function.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_primitives.h"
#include "paddle/phi/common/int_array.h" #include "paddle/phi/common/int_array.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
namespace phi { namespace phi {
using ScopedSpatialTransformerDescriptor = using ScopedSpatialTransformerDescriptor =
paddle::platform::ScopedSpatialTransformerDescriptor; phi::backends::gpu::ScopedSpatialTransformerDescriptor;
template <typename T, typename Context> template <typename T, typename Context>
void AffineGridGradCudnnKernel(const Context& dev_ctx, void AffineGridGradCudnnKernel(const Context& dev_ctx,
......
...@@ -15,11 +15,11 @@ ...@@ -15,11 +15,11 @@
#ifndef PADDLE_WITH_HIP #ifndef PADDLE_WITH_HIP
#include "paddle/phi/kernels/affine_grid_kernel.h" #include "paddle/phi/kernels/affine_grid_kernel.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_device_function.h" #include "paddle/phi/backends/gpu/gpu_device_function.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/backends/gpu/gpu_primitives.h"
#include "paddle/phi/common/int_array.h" #include "paddle/phi/common/int_array.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
namespace phi { namespace phi {
using ScopedSpatialTransformerDescriptor = using ScopedSpatialTransformerDescriptor =
paddle::platform::ScopedSpatialTransformerDescriptor; phi::backends::gpu::ScopedSpatialTransformerDescriptor;
template <typename T, typename Context> template <typename T, typename Context>
void AffineGridCudnnKernel(const Context& dev_ctx, void AffineGridCudnnKernel(const Context& dev_ctx,
......
...@@ -17,8 +17,8 @@ limitations under the License. */ ...@@ -17,8 +17,8 @@ limitations under the License. */
#include <vector> #include <vector>
#include "paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h"
#include "paddle/phi/backends/dynload/cudnn_frontend.h" #include "paddle/phi/backends/dynload/cudnn_frontend.h"
#include "paddle/phi/backends/gpu/cuda/cudnn_desc.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/core/utils/data_type.h"
...@@ -86,7 +86,8 @@ class CudnnFrontendConvHelper { ...@@ -86,7 +86,8 @@ class CudnnFrontendConvHelper {
cudnnTensorFormat_t layout_format) { cudnnTensorFormat_t layout_format) {
auto transformed_dims = phi::vectorize<int64_t>(tensor->dims()); auto transformed_dims = phi::vectorize<int64_t>(tensor->dims());
if (layout_format == CUDNN_TENSOR_NHWC) { if (layout_format == CUDNN_TENSOR_NHWC) {
transformed_dims = paddle::platform::TransformDimOrder(transformed_dims); transformed_dims =
phi::backends::gpu::TransformDimOrder(transformed_dims);
} }
std::vector<int64_t> strides = std::vector<int64_t> strides =
GenerateStrides(transformed_dims, layout_format); GenerateStrides(transformed_dims, layout_format);
...@@ -95,7 +96,7 @@ class CudnnFrontendConvHelper { ...@@ -95,7 +96,7 @@ class CudnnFrontendConvHelper {
.setStrides(strides.size(), strides.data()) .setStrides(strides.size(), strides.data())
.setId(id) .setId(id)
.setAlignment(GetAlignment(tensor)) .setAlignment(GetAlignment(tensor))
.setDataType(paddle::platform::ToCudnnDataType(tensor->dtype())) .setDataType(phi::backends::gpu::ToCudnnDataType(tensor->dtype()))
.build(); .build();
} }
......
...@@ -559,7 +559,7 @@ struct SearchAlgorithmBase<ConvKind::kBackwardFilter> { ...@@ -559,7 +559,7 @@ struct SearchAlgorithmBase<ConvKind::kBackwardFilter> {
size_t workspace_size_limit = size_t workspace_size_limit =
CalcWorkspaceLimitInBytes(UseFixedWorkspace()); CalcWorkspaceLimitInBytes(UseFixedWorkspace());
auto workspace_handle = ctx.cudnn_workspace_handle(); auto workspace_handle = ctx.cudnn_workspace_handle();
if (paddle::platform::CudnnDataType<T>::type != CUDNN_DATA_HALF) { if (phi::backends::gpu::CudnnDataType<T>::type != CUDNN_DATA_HALF) {
size_t max_workspace_size = size_t max_workspace_size =
GetMaxWorkspaceSize(args, workspace_size_limit); GetMaxWorkspaceSize(args, workspace_size_limit);
VLOG(3) << "max_workspace_size=" << ToMegaBytes(max_workspace_size) VLOG(3) << "max_workspace_size=" << ToMegaBytes(max_workspace_size)
...@@ -674,7 +674,7 @@ struct SearchAlgorithm : public SearchAlgorithmBase<CK> { ...@@ -674,7 +674,7 @@ struct SearchAlgorithm : public SearchAlgorithmBase<CK> {
bool enable_autotune = true) { bool enable_autotune = true) {
SearchResult<AlgoT> result; SearchResult<AlgoT> result;
bool use_autotune = false; bool use_autotune = false;
auto dtype = paddle::platform::CudnnDataType<T>::type; auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
SetConvMathType(ctx, dtype, args.cdesc); SetConvMathType(ctx, dtype, args.cdesc);
if (deterministic) { if (deterministic) {
...@@ -734,7 +734,7 @@ struct SearchAlgorithm : public SearchAlgorithmBase<CK> { ...@@ -734,7 +734,7 @@ struct SearchAlgorithm : public SearchAlgorithmBase<CK> {
static void SetConvMathType( static void SetConvMathType(
const phi::GPUContext& ctx, const phi::GPUContext& ctx,
cudnnDataType_t dtype, cudnnDataType_t dtype,
const paddle::platform::ConvolutionDescriptor& cdesc) { const phi::backends::gpu::ConvolutionDescriptor& cdesc) {
#if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1) #if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1)
if (ctx.GetComputeCapability() >= 70 && dtype == CUDNN_DATA_HALF) { if (ctx.GetComputeCapability() >= 70 && dtype == CUDNN_DATA_HALF) {
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType(
......
...@@ -20,6 +20,7 @@ limitations under the License. */ ...@@ -20,6 +20,7 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/memory/memory.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/kernels/autotune/cache.h" #include "paddle/phi/kernels/autotune/cache.h"
#include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/common.h"
...@@ -28,11 +29,11 @@ limitations under the License. */ ...@@ -28,11 +29,11 @@ limitations under the License. */
namespace phi { namespace phi {
using GPUDNNDataLayout = paddle::platform::DataLayout; using GPUDNNDataLayout = phi::backends::gpu::DataLayout;
template <typename T> template <typename T>
using ScalingParamType = using ScalingParamType =
typename paddle::platform::CudnnDataType<T>::ScalingParamType; typename phi::backends::gpu::CudnnDataType<T>::ScalingParamType;
enum class ConvKind { kForward = 1, kBackwardData = 2, kBackwardFilter = 3 }; enum class ConvKind { kForward = 1, kBackwardData = 2, kBackwardFilter = 3 };
...@@ -96,10 +97,10 @@ static std::ostream& operator<<(std::ostream& out, const std::vector<T>& v) { ...@@ -96,10 +97,10 @@ static std::ostream& operator<<(std::ostream& out, const std::vector<T>& v) {
template <typename HandleT, typename DataT> template <typename HandleT, typename DataT>
struct ConvArgsBase { struct ConvArgsBase {
HandleT handle; HandleT handle;
paddle::platform::TensorDescriptor idesc; phi::backends::gpu::TensorDescriptor idesc;
paddle::platform::TensorDescriptor odesc; phi::backends::gpu::TensorDescriptor odesc;
paddle::platform::FilterDescriptor wdesc; phi::backends::gpu::FilterDescriptor wdesc;
paddle::platform::ConvolutionDescriptor cdesc; phi::backends::gpu::ConvolutionDescriptor cdesc;
const phi::DenseTensor* x = nullptr; const phi::DenseTensor* x = nullptr;
const phi::DenseTensor* w = nullptr; const phi::DenseTensor* w = nullptr;
......
...@@ -18,7 +18,7 @@ limitations under the License. */ ...@@ -18,7 +18,7 @@ limitations under the License. */
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_dnn.h"
DECLARE_int64(conv_workspace_size_limit); DECLARE_int64(conv_workspace_size_limit);
DECLARE_bool(cudnn_exhaustive_search); DECLARE_bool(cudnn_exhaustive_search);
......
...@@ -52,8 +52,8 @@ void ConvCudnnGradKernelImplV7( ...@@ -52,8 +52,8 @@ void ConvCudnnGradKernelImplV7(
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& padding_common, const std::vector<int>& padding_common,
const std::vector<int>& dilations, const std::vector<int>& dilations,
paddle::platform::DataLayout compute_format, phi::backends::gpu::DataLayout compute_format,
paddle::platform::DataLayout layout, phi::backends::gpu::DataLayout layout,
bool use_addto, bool use_addto,
bool exhaustive_search, bool exhaustive_search,
bool deterministic, bool deterministic,
...@@ -70,8 +70,8 @@ void ConvCudnnGradKernelImplV7( ...@@ -70,8 +70,8 @@ void ConvCudnnGradKernelImplV7(
auto handle = ctx.cudnn_handle(); auto handle = ctx.cudnn_handle();
auto workspace_handle = ctx.cudnn_workspace_handle(); auto workspace_handle = ctx.cudnn_workspace_handle();
auto dtype = paddle::platform::CudnnDataType<T>::type; auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
auto layout_tensor = paddle::platform::GetCudnnTensorFormat(layout); auto layout_tensor = phi::backends::gpu::GetCudnnTensorFormat(layout);
ConvArgs args1{handle, ConvArgs args1{handle,
transformed_input_grad, transformed_input_grad,
...@@ -96,16 +96,16 @@ void ConvCudnnGradKernelImplV7( ...@@ -96,16 +96,16 @@ void ConvCudnnGradKernelImplV7(
int i_n, i_c, i_d, i_h, i_w; int i_n, i_c, i_d, i_h, i_w;
int o_n, o_c, o_d, o_h, o_w; int o_n, o_c, o_d, o_h, o_w;
if (compute_format == paddle::platform::DataLayout::kNHWC) { if (compute_format == phi::backends::gpu::DataLayout::kNHWC) {
GetNCDHW(transformed_input->dims(), GetNCDHW(transformed_input->dims(),
paddle::platform::DataLayout::kNHWC, phi::backends::gpu::DataLayout::kNHWC,
&i_n, &i_n,
&i_c, &i_c,
&i_d, &i_d,
&i_h, &i_h,
&i_w); &i_w);
GetNCDHW(transformed_output_grad_channel->dims(), GetNCDHW(transformed_output_grad_channel->dims(),
paddle::platform::DataLayout::kNHWC, phi::backends::gpu::DataLayout::kNHWC,
&o_n, &o_n,
&o_c, &o_c,
&o_d, &o_d,
...@@ -113,14 +113,14 @@ void ConvCudnnGradKernelImplV7( ...@@ -113,14 +113,14 @@ void ConvCudnnGradKernelImplV7(
&o_w); &o_w);
} else { } else {
GetNCDHW(transformed_input->dims(), GetNCDHW(transformed_input->dims(),
paddle::platform::DataLayout::kNCHW, phi::backends::gpu::DataLayout::kNCHW,
&i_n, &i_n,
&i_c, &i_c,
&i_d, &i_d,
&i_h, &i_h,
&i_w); &i_w);
GetNCDHW(transformed_output_grad_channel->dims(), GetNCDHW(transformed_output_grad_channel->dims(),
paddle::platform::DataLayout::kNCHW, phi::backends::gpu::DataLayout::kNCHW,
&o_n, &o_n,
&o_c, &o_c,
&o_d, &o_d,
...@@ -347,7 +347,7 @@ void ConvCudnnGradKernelImplV8( ...@@ -347,7 +347,7 @@ void ConvCudnnGradKernelImplV8(
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& padding_common, const std::vector<int>& padding_common,
const std::vector<int>& dilations, const std::vector<int>& dilations,
paddle::platform::DataLayout layout, phi::backends::gpu::DataLayout layout,
bool use_addto, bool use_addto,
bool exhaustive_search, bool exhaustive_search,
bool deterministic, bool deterministic,
...@@ -363,8 +363,8 @@ void ConvCudnnGradKernelImplV8( ...@@ -363,8 +363,8 @@ void ConvCudnnGradKernelImplV8(
cudnnHandle_t handle = const_cast<cudnnHandle_t>(ctx.cudnn_handle()); cudnnHandle_t handle = const_cast<cudnnHandle_t>(ctx.cudnn_handle());
auto workspace_handle = ctx.cudnn_workspace_handle(); auto workspace_handle = ctx.cudnn_workspace_handle();
auto dtype = paddle::platform::CudnnDataType<T>::type; auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
auto layout_format = paddle::platform::GetCudnnTensorFormat(layout); auto layout_format = phi::backends::gpu::GetCudnnTensorFormat(layout);
if (input_grad) { if (input_grad) {
CudnnConvBwdDataV8<T>(transformed_output_grad_channel, CudnnConvBwdDataV8<T>(transformed_output_grad_channel,
...@@ -449,11 +449,11 @@ void ConvCudnnGradKernel(const Context& ctx, ...@@ -449,11 +449,11 @@ void ConvCudnnGradKernel(const Context& ctx,
const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
auto dtype = paddle::platform::CudnnDataType<T>::type; auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
// HIP MIOPEN ONLY SUPPORT NCHW format // HIP MIOPEN ONLY SUPPORT NCHW format
auto compute_format = paddle::platform::DataLayout::kNCHW; auto compute_format = phi::backends::gpu::DataLayout::kNCHW;
#else #else
#if CUDNN_VERSION_MIN(8, 1, 0) #if CUDNN_VERSION_MIN(8, 1, 0)
const bool compute_in_nhwc = const bool compute_in_nhwc =
...@@ -463,13 +463,13 @@ void ConvCudnnGradKernel(const Context& ctx, ...@@ -463,13 +463,13 @@ void ConvCudnnGradKernel(const Context& ctx,
const bool compute_in_nhwc = dtype == CUDNN_DATA_HALF && IsVoltaOrLater(ctx); const bool compute_in_nhwc = dtype == CUDNN_DATA_HALF && IsVoltaOrLater(ctx);
#endif #endif
auto compute_format = compute_in_nhwc && channel_last auto compute_format = compute_in_nhwc && channel_last
? paddle::platform::DataLayout::kNHWC ? phi::backends::gpu::DataLayout::kNHWC
: paddle::platform::DataLayout::kNCHW; : phi::backends::gpu::DataLayout::kNCHW;
#endif #endif
VLOG(3) << "Compute ConvGradOp with cuDNN:" VLOG(3) << "Compute ConvGradOp with cuDNN:"
<< " data_format=" << data_format << " compute_format=" << " data_format=" << data_format << " compute_format="
<< (compute_format == paddle::platform::DataLayout::kNHWC ? "NHWC" << (compute_format == phi::backends::gpu::DataLayout::kNHWC ? "NHWC"
: "NCHW"); : "NCHW");
// transform Tensor // transform Tensor
DenseTensor transformed_input_channel(input.type()); DenseTensor transformed_input_channel(input.type());
...@@ -478,7 +478,7 @@ void ConvCudnnGradKernel(const Context& ctx, ...@@ -478,7 +478,7 @@ void ConvCudnnGradKernel(const Context& ctx,
DenseTensor transformed_filter_channel(filter.type()); DenseTensor transformed_filter_channel(filter.type());
DenseTensor transformed_filter_grad_channel(filter.type()); DenseTensor transformed_filter_grad_channel(filter.type());
if (channel_last && compute_format == paddle::platform::DataLayout::kNCHW) { if (channel_last && compute_format == phi::backends::gpu::DataLayout::kNCHW) {
VLOG(3) << "Transform input, output_grad, input_grad and tensor from " VLOG(3) << "Transform input, output_grad, input_grad and tensor from "
"NHWC to NCHW."; "NHWC to NCHW.";
ResizeToChannelFirst<Context, T>(ctx, &input, &transformed_input_channel); ResizeToChannelFirst<Context, T>(ctx, &input, &transformed_input_channel);
...@@ -507,7 +507,7 @@ void ConvCudnnGradKernel(const Context& ctx, ...@@ -507,7 +507,7 @@ void ConvCudnnGradKernel(const Context& ctx,
} }
} }
if (compute_format == paddle::platform::DataLayout::kNHWC) { if (compute_format == phi::backends::gpu::DataLayout::kNHWC) {
VLOG(3) << "Transform filter and filter_grad tensor from NCHW to NHWC."; VLOG(3) << "Transform filter and filter_grad tensor from NCHW to NHWC.";
ResizeToChannelLast<Context, T>(ctx, &filter, &transformed_filter_channel); ResizeToChannelLast<Context, T>(ctx, &filter, &transformed_filter_channel);
TransToChannelLast<Context, T>(ctx, &filter, &transformed_filter_channel); TransToChannelLast<Context, T>(ctx, &filter, &transformed_filter_channel);
...@@ -528,7 +528,7 @@ void ConvCudnnGradKernel(const Context& ctx, ...@@ -528,7 +528,7 @@ void ConvCudnnGradKernel(const Context& ctx,
auto filter_dims = transformed_filter_channel.dims(); auto filter_dims = transformed_filter_channel.dims();
DDim in_data_dims; DDim in_data_dims;
DDim filter_data_dims; DDim filter_data_dims;
if (compute_format == paddle::platform::DataLayout::kNCHW) { if (compute_format == phi::backends::gpu::DataLayout::kNCHW) {
in_data_dims = slice_ddim(in_dims, 2, in_dims.size()); in_data_dims = slice_ddim(in_dims, 2, in_dims.size());
filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size()); filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size());
} else { } else {
...@@ -553,7 +553,7 @@ void ConvCudnnGradKernel(const Context& ctx, ...@@ -553,7 +553,7 @@ void ConvCudnnGradKernel(const Context& ctx,
std::vector<int> padding_diff(data_dim); std::vector<int> padding_diff(data_dim);
std::vector<int> new_input_shape_vec(data_dim + 2); std::vector<int> new_input_shape_vec(data_dim + 2);
new_input_shape_vec[0] = transformed_input_channel.dims()[0]; new_input_shape_vec[0] = transformed_input_channel.dims()[0];
if (compute_format == paddle::platform::DataLayout::kNCHW) { if (compute_format == phi::backends::gpu::DataLayout::kNCHW) {
new_input_shape_vec[1] = transformed_input_channel.dims()[1]; new_input_shape_vec[1] = transformed_input_channel.dims()[1];
} else { } else {
new_input_shape_vec[data_dim + 1] = new_input_shape_vec[data_dim + 1] =
...@@ -563,14 +563,14 @@ void ConvCudnnGradKernel(const Context& ctx, ...@@ -563,14 +563,14 @@ void ConvCudnnGradKernel(const Context& ctx,
for (size_t i = 0; i < data_dim; ++i) { for (size_t i = 0; i < data_dim; ++i) {
padding_diff[i] = std::abs(paddings[2 * i] - paddings[2 * i + 1]); padding_diff[i] = std::abs(paddings[2 * i] - paddings[2 * i + 1]);
padding_common[i] = std::min(paddings[2 * i], paddings[2 * i + 1]); padding_common[i] = std::min(paddings[2 * i], paddings[2 * i + 1]);
if (compute_format == paddle::platform::DataLayout::kNCHW) { if (compute_format == phi::backends::gpu::DataLayout::kNCHW) {
new_input_shape_vec[i + 2] = new_input_shape_vec[i + 2] =
transformed_input_channel.dims()[i + 2] + padding_diff[i]; transformed_input_channel.dims()[i + 2] + padding_diff[i];
} else { } else {
new_input_shape_vec[i + 1] = new_input_shape_vec[i + 1] =
transformed_input_channel.dims()[i + 1] + padding_diff[i]; transformed_input_channel.dims()[i + 1] + padding_diff[i];
} }
if (compute_format == paddle::platform::DataLayout::kNCHW) { if (compute_format == phi::backends::gpu::DataLayout::kNCHW) {
input_pad[2 * i + 4] = paddings[2 * i] - padding_common[i]; input_pad[2 * i + 4] = paddings[2 * i] - padding_common[i];
input_pad[2 * i + 4 + 1] = paddings[2 * i + 1] - padding_common[i]; input_pad[2 * i + 4 + 1] = paddings[2 * i + 1] - padding_common[i];
} else { } else {
...@@ -624,15 +624,15 @@ void ConvCudnnGradKernel(const Context& ctx, ...@@ -624,15 +624,15 @@ void ConvCudnnGradKernel(const Context& ctx,
} }
} }
} }
paddle::platform::DataLayout layout = phi::backends::gpu::DataLayout layout =
compute_format == paddle::platform::DataLayout::kNHWC compute_format == phi::backends::gpu::DataLayout::kNHWC
? paddle::platform::DataLayout::kNHWC ? phi::backends::gpu::DataLayout::kNHWC
: paddle::platform::DataLayout::kNCHW; : phi::backends::gpu::DataLayout::kNCHW;
// TODO(phlrain): replace paddle::platform::DataLaytout to phi::DataLayout // TODO(phlrain): replace paddle::platform::DataLaytout to phi::DataLayout
if (transformed_input.dims().size() == 5) { if (transformed_input.dims().size() == 5) {
layout = compute_format == paddle::platform::DataLayout::kNHWC layout = compute_format == phi::backends::gpu::DataLayout::kNHWC
? paddle::platform::DataLayout::kNDHWC ? phi::backends::gpu::DataLayout::kNDHWC
: paddle::platform::DataLayout::kNCDHW; : phi::backends::gpu::DataLayout::kNCDHW;
} }
#ifdef PADDLE_WITH_CUDNN_FRONTEND #ifdef PADDLE_WITH_CUDNN_FRONTEND
...@@ -717,14 +717,15 @@ void ConvCudnnGradKernel(const Context& ctx, ...@@ -717,14 +717,15 @@ void ConvCudnnGradKernel(const Context& ctx,
} }
} }
if (channel_last && compute_format == paddle::platform::DataLayout::kNCHW) { if (channel_last &&
compute_format == phi::backends::gpu::DataLayout::kNCHW) {
TransToChannelLast<Context, T>( TransToChannelLast<Context, T>(
ctx, &transformed_input_grad_channel, input_grad); ctx, &transformed_input_grad_channel, input_grad);
} }
} }
if (filter_grad) { if (filter_grad) {
if (compute_format == paddle::platform::DataLayout::kNHWC) { if (compute_format == phi::backends::gpu::DataLayout::kNHWC) {
TransToChannelFirst<Context, T>( TransToChannelFirst<Context, T>(
ctx, &transformed_filter_grad_channel, filter_grad); ctx, &transformed_filter_grad_channel, filter_grad);
} }
...@@ -1008,11 +1009,11 @@ void ConvCudnnGradGradKernel( ...@@ -1008,11 +1009,11 @@ void ConvCudnnGradGradKernel(
c_group = groups; c_group = groups;
groups = 1; groups = 1;
#endif #endif
auto dtype = paddle::platform::CudnnDataType<T>::type; auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
auto handle = ctx.cudnn_handle(); auto handle = ctx.cudnn_handle();
auto layout = paddle::platform::GetCudnnTensorFormat( auto layout = phi::backends::gpu::GetCudnnTensorFormat(
paddle::platform::DataLayout::kNCHW); phi::backends::gpu::DataLayout::kNCHW);
ConvArgs args1{handle, ConvArgs args1{handle,
&transformed_ddX, &transformed_ddX,
...@@ -1023,7 +1024,7 @@ void ConvCudnnGradGradKernel( ...@@ -1023,7 +1024,7 @@ void ConvCudnnGradGradKernel(
dilations, dilations,
dtype, dtype,
groups, groups,
paddle::platform::DataLayout::kNCHW}; phi::backends::gpu::DataLayout::kNCHW};
ConvArgs args2{handle, ConvArgs args2{handle,
&transformed_X, &transformed_X,
ddW, ddW,
...@@ -1033,7 +1034,7 @@ void ConvCudnnGradGradKernel( ...@@ -1033,7 +1034,7 @@ void ConvCudnnGradGradKernel(
dilations, dilations,
dtype, dtype,
groups, groups,
paddle::platform::DataLayout::kNCHW}; phi::backends::gpu::DataLayout::kNCHW};
ConvArgs args3{handle, ConvArgs args3{handle,
&transformed_ddX, &transformed_ddX,
dW, dW,
...@@ -1043,7 +1044,7 @@ void ConvCudnnGradGradKernel( ...@@ -1043,7 +1044,7 @@ void ConvCudnnGradGradKernel(
dilations, dilations,
dtype, dtype,
groups, groups,
paddle::platform::DataLayout::kNCHW}; phi::backends::gpu::DataLayout::kNCHW};
ConvArgs args4{handle, ConvArgs args4{handle,
&transformed_dX, &transformed_dX,
ddW, ddW,
...@@ -1053,7 +1054,7 @@ void ConvCudnnGradGradKernel( ...@@ -1053,7 +1054,7 @@ void ConvCudnnGradGradKernel(
dilations, dilations,
dtype, dtype,
groups, groups,
paddle::platform::DataLayout::kNCHW}; phi::backends::gpu::DataLayout::kNCHW};
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
SearchResult<miopenConvFwdAlgorithm_t> fwd_result1; SearchResult<miopenConvFwdAlgorithm_t> fwd_result1;
......
...@@ -50,8 +50,8 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input, ...@@ -50,8 +50,8 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& padding_common, const std::vector<int>& padding_common,
const std::vector<int>& dilations, const std::vector<int>& dilations,
paddle::platform::DataLayout compute_format, phi::backends::gpu::DataLayout compute_format,
paddle::platform::DataLayout layout, phi::backends::gpu::DataLayout layout,
bool exhaustive_search, bool exhaustive_search,
bool deterministic, bool deterministic,
int groups, int groups,
...@@ -63,8 +63,8 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input, ...@@ -63,8 +63,8 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input,
auto handle = ctx.cudnn_handle(); auto handle = ctx.cudnn_handle();
auto workspace_handle = ctx.cudnn_workspace_handle(); auto workspace_handle = ctx.cudnn_workspace_handle();
auto layout_format = paddle::platform::GetCudnnTensorFormat(layout); auto layout_format = phi::backends::gpu::GetCudnnTensorFormat(layout);
auto dtype = paddle::platform::CudnnDataType<T>::type; auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
// ------------------- cudnn descriptors --------------------- // ------------------- cudnn descriptors ---------------------
ConvArgs args{handle, ConvArgs args{handle,
...@@ -113,16 +113,16 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input, ...@@ -113,16 +113,16 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input,
int i_n, i_c, i_d, i_h, i_w; int i_n, i_c, i_d, i_h, i_w;
int o_n, o_c, o_d, o_h, o_w; int o_n, o_c, o_d, o_h, o_w;
if (compute_format == paddle::platform::DataLayout::kNHWC) { if (compute_format == phi::backends::gpu::DataLayout::kNHWC) {
GetNCDHW(transformed_input->dims(), GetNCDHW(transformed_input->dims(),
paddle::platform::DataLayout::kNHWC, phi::backends::gpu::DataLayout::kNHWC,
&i_n, &i_n,
&i_c, &i_c,
&i_d, &i_d,
&i_h, &i_h,
&i_w); &i_w);
GetNCDHW(transformed_output->dims(), GetNCDHW(transformed_output->dims(),
paddle::platform::DataLayout::kNHWC, phi::backends::gpu::DataLayout::kNHWC,
&o_n, &o_n,
&o_c, &o_c,
&o_d, &o_d,
...@@ -130,14 +130,14 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input, ...@@ -130,14 +130,14 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input,
&o_w); &o_w);
} else { } else {
GetNCDHW(transformed_input->dims(), GetNCDHW(transformed_input->dims(),
paddle::platform::DataLayout::kNCHW, phi::backends::gpu::DataLayout::kNCHW,
&i_n, &i_n,
&i_c, &i_c,
&i_d, &i_d,
&i_h, &i_h,
&i_w); &i_w);
GetNCDHW(transformed_output->dims(), GetNCDHW(transformed_output->dims(),
paddle::platform::DataLayout::kNCHW, phi::backends::gpu::DataLayout::kNCHW,
&o_n, &o_n,
&o_c, &o_c,
&o_d, &o_d,
...@@ -227,7 +227,7 @@ void ConvCudnnKernelImplV8(const DenseTensor* input_tensor, ...@@ -227,7 +227,7 @@ void ConvCudnnKernelImplV8(const DenseTensor* input_tensor,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& padding_common, const std::vector<int>& padding_common,
const std::vector<int>& dilations, const std::vector<int>& dilations,
paddle::platform::DataLayout layout, phi::backends::gpu::DataLayout layout,
bool exhaustive_search, bool exhaustive_search,
bool deterministic, bool deterministic,
int groups, int groups,
...@@ -247,8 +247,8 @@ void ConvCudnnKernelImplV8(const DenseTensor* input_tensor, ...@@ -247,8 +247,8 @@ void ConvCudnnKernelImplV8(const DenseTensor* input_tensor,
cudnnHandle_t handle = const_cast<cudnnHandle_t>(ctx.cudnn_handle()); cudnnHandle_t handle = const_cast<cudnnHandle_t>(ctx.cudnn_handle());
auto workspace_handle = ctx.cudnn_workspace_handle(); auto workspace_handle = ctx.cudnn_workspace_handle();
auto layout_format = paddle::platform::GetCudnnTensorFormat(layout); auto layout_format = phi::backends::gpu::GetCudnnTensorFormat(layout);
auto dtype = paddle::platform::CudnnDataType<T>::type; auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
float alpha = 1.0f; float alpha = 1.0f;
float beta = 0.0f; float beta = 0.0f;
...@@ -368,11 +368,11 @@ void ConvCudnnKernel(const Context& ctx, ...@@ -368,11 +368,11 @@ void ConvCudnnKernel(const Context& ctx,
"FLAGS_cudnn_deterministic True at same time.")); "FLAGS_cudnn_deterministic True at same time."));
const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
auto dtype = paddle::platform::CudnnDataType<T>::type; auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
// HIP MIOPEN ONLY SUPPORT NCHW format // HIP MIOPEN ONLY SUPPORT NCHW format
auto compute_format = paddle::platform::DataLayout::kNCHW; auto compute_format = phi::backends::gpu::DataLayout::kNCHW;
#else #else
#if CUDNN_VERSION_MIN(8, 1, 0) #if CUDNN_VERSION_MIN(8, 1, 0)
// Tensor Core introduced from Volta GPUs supports more faster conv op // Tensor Core introduced from Volta GPUs supports more faster conv op
...@@ -388,20 +388,20 @@ void ConvCudnnKernel(const Context& ctx, ...@@ -388,20 +388,20 @@ void ConvCudnnKernel(const Context& ctx,
// We will only do data format conversion from NHWC to NCHW. // We will only do data format conversion from NHWC to NCHW.
// cudnn will convert NCHW to NHWC automatically on Tensor Core. // cudnn will convert NCHW to NHWC automatically on Tensor Core.
auto compute_format = compute_in_nhwc && channel_last auto compute_format = compute_in_nhwc && channel_last
? paddle::platform::DataLayout::kNHWC ? phi::backends::gpu::DataLayout::kNHWC
: paddle::platform::DataLayout::kNCHW; : phi::backends::gpu::DataLayout::kNCHW;
#endif #endif
VLOG(3) << "Compute ConvOp with cuDNN:" VLOG(3) << "Compute ConvOp with cuDNN:"
<< " data_format=" << data_format << " compute_format=" << " data_format=" << data_format << " compute_format="
<< (compute_format == paddle::platform::DataLayout::kNHWC ? "NHWC" << (compute_format == phi::backends::gpu::DataLayout::kNHWC ? "NHWC"
: "NCHW"); : "NCHW");
// ------------ transformed tensor ----------- // ------------ transformed tensor -----------
DenseTensor transformed_input_channel(input.type()); DenseTensor transformed_input_channel(input.type());
DenseTensor transformed_output(output->type()); DenseTensor transformed_output(output->type());
DenseTensor transformed_filter_channel(filter.type()); DenseTensor transformed_filter_channel(filter.type());
if (channel_last && compute_format == paddle::platform::DataLayout::kNCHW) { if (channel_last && compute_format == phi::backends::gpu::DataLayout::kNCHW) {
VLOG(3) << "Transform input tensor from NHWC to NCHW."; VLOG(3) << "Transform input tensor from NHWC to NCHW.";
ResizeToChannelFirst<Context, T>(ctx, &input, &transformed_input_channel); ResizeToChannelFirst<Context, T>(ctx, &input, &transformed_input_channel);
TransToChannelFirst<Context, T>(ctx, &input, &transformed_input_channel); TransToChannelFirst<Context, T>(ctx, &input, &transformed_input_channel);
...@@ -412,7 +412,7 @@ void ConvCudnnKernel(const Context& ctx, ...@@ -412,7 +412,7 @@ void ConvCudnnKernel(const Context& ctx,
transformed_input_channel.ShareDataWith(input); transformed_input_channel.ShareDataWith(input);
transformed_output.ShareDataWith(*output); transformed_output.ShareDataWith(*output);
} }
if (compute_format == paddle::platform::DataLayout::kNHWC) { if (compute_format == phi::backends::gpu::DataLayout::kNHWC) {
VLOG(3) << "Transform filter tensor from NCHW to NHWC."; VLOG(3) << "Transform filter tensor from NCHW to NHWC.";
ResizeToChannelLast<Context, T>(ctx, &filter, &transformed_filter_channel); ResizeToChannelLast<Context, T>(ctx, &filter, &transformed_filter_channel);
TransToChannelLast<Context, T>(ctx, &filter, &transformed_filter_channel); TransToChannelLast<Context, T>(ctx, &filter, &transformed_filter_channel);
...@@ -426,7 +426,7 @@ void ConvCudnnKernel(const Context& ctx, ...@@ -426,7 +426,7 @@ void ConvCudnnKernel(const Context& ctx,
DDim in_data_dims; DDim in_data_dims;
DDim filter_data_dims; DDim filter_data_dims;
if (compute_format == paddle::platform::DataLayout::kNCHW) { if (compute_format == phi::backends::gpu::DataLayout::kNCHW) {
in_data_dims = slice_ddim(in_dims, 2, in_dims.size()); in_data_dims = slice_ddim(in_dims, 2, in_dims.size());
filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size()); filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size());
} else { } else {
...@@ -448,7 +448,7 @@ void ConvCudnnKernel(const Context& ctx, ...@@ -448,7 +448,7 @@ void ConvCudnnKernel(const Context& ctx,
std::vector<int> new_input_shape_vec(data_dim + 2); std::vector<int> new_input_shape_vec(data_dim + 2);
new_input_shape_vec[0] = transformed_input_channel.dims()[0]; new_input_shape_vec[0] = transformed_input_channel.dims()[0];
if (compute_format == paddle::platform::DataLayout::kNCHW) { if (compute_format == phi::backends::gpu::DataLayout::kNCHW) {
new_input_shape_vec[1] = transformed_input_channel.dims()[1]; new_input_shape_vec[1] = transformed_input_channel.dims()[1];
} else { } else {
new_input_shape_vec[data_dim + 1] = new_input_shape_vec[data_dim + 1] =
...@@ -459,14 +459,14 @@ void ConvCudnnKernel(const Context& ctx, ...@@ -459,14 +459,14 @@ void ConvCudnnKernel(const Context& ctx,
for (size_t i = 0; i < data_dim; ++i) { for (size_t i = 0; i < data_dim; ++i) {
padding_diff[i] = std::abs(paddings[2 * i] - paddings[2 * i + 1]); padding_diff[i] = std::abs(paddings[2 * i] - paddings[2 * i + 1]);
padding_common[i] = std::min(paddings[2 * i], paddings[2 * i + 1]); padding_common[i] = std::min(paddings[2 * i], paddings[2 * i + 1]);
if (compute_format == paddle::platform::DataLayout::kNCHW) { if (compute_format == phi::backends::gpu::DataLayout::kNCHW) {
new_input_shape_vec[i + 2] = new_input_shape_vec[i + 2] =
transformed_input_channel.dims()[i + 2] + padding_diff[i]; transformed_input_channel.dims()[i + 2] + padding_diff[i];
} else { } else {
new_input_shape_vec[i + 1] = new_input_shape_vec[i + 1] =
transformed_input_channel.dims()[i + 1] + padding_diff[i]; transformed_input_channel.dims()[i + 1] + padding_diff[i];
} }
if (compute_format == paddle::platform::DataLayout::kNCHW) { if (compute_format == phi::backends::gpu::DataLayout::kNCHW) {
input_pad[2 * i + 4] = paddings[2 * i] - padding_common[i]; input_pad[2 * i + 4] = paddings[2 * i] - padding_common[i];
input_pad[2 * i + 4 + 1] = paddings[2 * i + 1] - padding_common[i]; input_pad[2 * i + 4 + 1] = paddings[2 * i + 1] - padding_common[i];
} else { } else {
...@@ -513,14 +513,14 @@ void ConvCudnnKernel(const Context& ctx, ...@@ -513,14 +513,14 @@ void ConvCudnnKernel(const Context& ctx,
} }
} }
paddle::platform::DataLayout layout = phi::backends::gpu::DataLayout layout =
compute_format == paddle::platform::DataLayout::kNHWC compute_format == phi::backends::gpu::DataLayout::kNHWC
? paddle::platform::DataLayout::kNHWC ? phi::backends::gpu::DataLayout::kNHWC
: paddle::platform::DataLayout::kNCHW; : phi::backends::gpu::DataLayout::kNCHW;
if (transformed_input.dims().size() == 5) { if (transformed_input.dims().size() == 5) {
layout = compute_format == paddle::platform::DataLayout::kNHWC layout = compute_format == phi::backends::gpu::DataLayout::kNHWC
? paddle::platform::DataLayout::kNDHWC ? phi::backends::gpu::DataLayout::kNDHWC
: paddle::platform::DataLayout::kNCDHW; : phi::backends::gpu::DataLayout::kNCDHW;
} }
#ifdef PADDLE_WITH_CUDNN_FRONTEND #ifdef PADDLE_WITH_CUDNN_FRONTEND
...@@ -564,7 +564,7 @@ void ConvCudnnKernel(const Context& ctx, ...@@ -564,7 +564,7 @@ void ConvCudnnKernel(const Context& ctx,
&transformed_output); &transformed_output);
#endif #endif
if (channel_last && compute_format == paddle::platform::DataLayout::kNCHW) { if (channel_last && compute_format == phi::backends::gpu::DataLayout::kNCHW) {
TransToChannelLast<Context, T>(ctx, &transformed_output, output); TransToChannelLast<Context, T>(ctx, &transformed_output, output);
} }
} }
......
...@@ -28,16 +28,16 @@ limitations under the License. */ ...@@ -28,16 +28,16 @@ limitations under the License. */
#include "paddle/phi/kernels/transpose_kernel.h" #include "paddle/phi/kernels/transpose_kernel.h"
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
#include "paddle/fluid/platform/device/gpu/rocm/miopen_helper.h" #include "paddle/phi/backends/gpu/rocm/miopen_helper.h"
#include "paddle/phi/kernels/gpudnn/conv_miopen_helper.h" #include "paddle/phi/kernels/gpudnn/conv_miopen_helper.h"
#else #else
#include "paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h" #include "paddle/phi/backends/gpu/cuda/cudnn_helper.h"
#include "paddle/phi/kernels/gpudnn/conv_cudnn_v7.h" #include "paddle/phi/kernels/gpudnn/conv_cudnn_v7.h"
#endif #endif
namespace phi { namespace phi {
using GPUDNNDataLayout = paddle::platform::DataLayout; using GPUDNNDataLayout = phi::backends::gpu::DataLayout;
template <typename T, typename Context> template <typename T, typename Context>
void ConvTransposeGradRawGPUDNNKernel(const Context& ctx, void ConvTransposeGradRawGPUDNNKernel(const Context& ctx,
...@@ -171,7 +171,7 @@ void ConvTransposeGradRawGPUDNNKernel(const Context& ctx, ...@@ -171,7 +171,7 @@ void ConvTransposeGradRawGPUDNNKernel(const Context& ctx,
groups = 1; groups = 1;
#endif #endif
auto dtype = paddle::platform::CudnnDataType<T>::type; auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
auto handle = ctx.cudnn_handle(); auto handle = ctx.cudnn_handle();
ConvArgs args1{handle, ConvArgs args1{handle,
...@@ -203,7 +203,7 @@ void ConvTransposeGradRawGPUDNNKernel(const Context& ctx, ...@@ -203,7 +203,7 @@ void ConvTransposeGradRawGPUDNNKernel(const Context& ctx,
SearchResult<cudnnConvolutionBwdFilterAlgo_t> filter_result; SearchResult<cudnnConvolutionBwdFilterAlgo_t> filter_result;
#endif #endif
auto layout_tensor = paddle::platform::GetCudnnTensorFormat(layout); auto layout_tensor = phi::backends::gpu::GetCudnnTensorFormat(layout);
size_t workspace_size = 0; size_t workspace_size = 0;
bool deterministic = FLAGS_cudnn_deterministic; bool deterministic = FLAGS_cudnn_deterministic;
T* dx_data = nullptr; T* dx_data = nullptr;
...@@ -616,10 +616,11 @@ void Conv2dTransposeDoubleGradGPUDNNKernel( ...@@ -616,10 +616,11 @@ void Conv2dTransposeDoubleGradGPUDNNKernel(
c_group = groups; c_group = groups;
groups = 1; groups = 1;
#endif #endif
auto dtype = paddle::platform::CudnnDataType<T>::type; auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
auto handle = ctx.cudnn_handle(); auto handle = ctx.cudnn_handle();
auto layout = paddle::platform::GetCudnnTensorFormat(GPUDNNDataLayout::kNCHW); auto layout =
phi::backends::gpu::GetCudnnTensorFormat(GPUDNNDataLayout::kNCHW);
ConvArgs args1{handle, ConvArgs args1{handle,
&transformed_ddout_channel, &transformed_ddout_channel,
......
...@@ -26,16 +26,16 @@ limitations under the License. */ ...@@ -26,16 +26,16 @@ limitations under the License. */
#include "paddle/phi/kernels/transpose_kernel.h" #include "paddle/phi/kernels/transpose_kernel.h"
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
#include "paddle/fluid/platform/device/gpu/rocm/miopen_helper.h" #include "paddle/phi/backends/gpu/rocm/miopen_helper.h"
#include "paddle/phi/kernels/gpudnn/conv_miopen_helper.h" #include "paddle/phi/kernels/gpudnn/conv_miopen_helper.h"
#else #else
#include "paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h" #include "paddle/phi/backends/gpu/cuda/cudnn_helper.h"
#include "paddle/phi/kernels/gpudnn/conv_cudnn_v7.h" #include "paddle/phi/kernels/gpudnn/conv_cudnn_v7.h"
#endif #endif
namespace phi { namespace phi {
using GPUDNNDataLayout = paddle::platform::DataLayout; using GPUDNNDataLayout = phi::backends::gpu::DataLayout;
template <typename T, typename Context> template <typename T, typename Context>
void ConvTransposeRawGPUDNNKernel(const Context& ctx, void ConvTransposeRawGPUDNNKernel(const Context& ctx,
...@@ -194,10 +194,10 @@ void ConvTransposeRawGPUDNNKernel(const Context& ctx, ...@@ -194,10 +194,10 @@ void ConvTransposeRawGPUDNNKernel(const Context& ctx,
#endif #endif
// ------------------- cudnn conv algorithm --------------------- // ------------------- cudnn conv algorithm ---------------------
auto handle = ctx.cudnn_handle(); auto handle = ctx.cudnn_handle();
auto layout_tensor = paddle::platform::GetCudnnTensorFormat(layout); auto layout_tensor = phi::backends::gpu::GetCudnnTensorFormat(layout);
bool deterministic = FLAGS_cudnn_deterministic; bool deterministic = FLAGS_cudnn_deterministic;
auto dtype = paddle::platform::CudnnDataType<T>::type; auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
// ------------------- cudnn descriptors --------------------- // ------------------- cudnn descriptors ---------------------
ConvArgs args{handle, ConvArgs args{handle,
&transformed_out, &transformed_out,
......
...@@ -16,18 +16,18 @@ limitations under the License. */ ...@@ -16,18 +16,18 @@ limitations under the License. */
#include <string> #include <string>
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_dnn.h"
namespace phi { namespace phi {
using GPUDNNDataLayout = paddle::platform::DataLayout; using GPUDNNDataLayout = phi::backends::gpu::DataLayout;
using PoolingMode = paddle::platform::PoolingMode; using PoolingMode = phi::backends::gpu::PoolingMode;
using ScopedPoolingDescriptor = paddle::platform::ScopedPoolingDescriptor; using ScopedPoolingDescriptor = phi::backends::gpu::ScopedPoolingDescriptor;
using ScopedTensorDescriptor = paddle::platform::ScopedTensorDescriptor; using ScopedTensorDescriptor = phi::backends::gpu::ScopedTensorDescriptor;
template <typename T> template <typename T>
using ScalingParamType = using ScalingParamType =
typename paddle::platform::CudnnDataType<T>::ScalingParamType; typename phi::backends::gpu::CudnnDataType<T>::ScalingParamType;
inline GPUDNNDataLayout GetLayoutFromStr(std::string data_format) { inline GPUDNNDataLayout GetLayoutFromStr(std::string data_format) {
if (data_format == "NHWC") { if (data_format == "NHWC") {
......
...@@ -14,8 +14,8 @@ limitations under the License. */ ...@@ -14,8 +14,8 @@ limitations under the License. */
#include "paddle/phi/kernels/pool_grad_kernel.h" #include "paddle/phi/kernels/pool_grad_kernel.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/pooling.h" #include "paddle/phi/kernels/funcs/pooling.h"
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/phi/kernels/pool_kernel.h" #include "paddle/phi/kernels/pool_kernel.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/pooling.h" #include "paddle/phi/kernels/funcs/pooling.h"
......
...@@ -24,8 +24,8 @@ limitations under the License. */ ...@@ -24,8 +24,8 @@ limitations under the License. */
#include "paddle/phi/kernels/primitive/kernel_primitives.h" #include "paddle/phi/kernels/primitive/kernel_primitives.h"
// See Note [ Why still include the fluid headers? ] // See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_device_function.h" #include "paddle/phi/backends/gpu/gpu_device_function.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#define MATRIX_SOFTMAX_ALIGN_BYTES 16 #define MATRIX_SOFTMAX_ALIGN_BYTES 16
#define MATRIX_SOFTMAX_THREAHOLD 100000 #define MATRIX_SOFTMAX_THREAHOLD 100000
...@@ -55,8 +55,8 @@ limitations under the License. */ ...@@ -55,8 +55,8 @@ limitations under the License. */
namespace phi { namespace phi {
using ScopedTensorDescriptor = paddle::platform::ScopedTensorDescriptor; using ScopedTensorDescriptor = phi::backends::gpu::ScopedTensorDescriptor;
using GPUDNNDataLayout = paddle::platform::DataLayout; using GPUDNNDataLayout = phi::backends::gpu::DataLayout;
// Vectorization trait 4 * sizeof(T) // Vectorization trait 4 * sizeof(T)
template <typename T> template <typename T>
...@@ -1065,10 +1065,10 @@ void SoftmaxForwardCudnnKernel(const GPUContext& dev_ctx, ...@@ -1065,10 +1065,10 @@ void SoftmaxForwardCudnnKernel(const GPUContext& dev_ctx,
auto algo = log_mode ? MIOPEN_SOFTMAX_LOG : MIOPEN_SOFTMAX_ACCURATE; auto algo = log_mode ? MIOPEN_SOFTMAX_LOG : MIOPEN_SOFTMAX_ACCURATE;
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxForward_V2( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxForward_V2(
handle, handle,
paddle::platform::CudnnDataType<T>::kOne(), phi::backends::gpu::CudnnDataType<T>::kOne(),
desc, desc,
x_data, x_data,
paddle::platform::CudnnDataType<T>::kZero(), phi::backends::gpu::CudnnDataType<T>::kZero(),
desc, desc,
out_data, out_data,
algo, algo,
...@@ -1082,10 +1082,10 @@ void SoftmaxForwardCudnnKernel(const GPUContext& dev_ctx, ...@@ -1082,10 +1082,10 @@ void SoftmaxForwardCudnnKernel(const GPUContext& dev_ctx,
handle, handle,
algo, algo,
mode, mode,
paddle::platform::CudnnDataType<T>::kOne(), phi::backends::gpu::CudnnDataType<T>::kOne(),
desc, desc,
x_data, x_data,
paddle::platform::CudnnDataType<T>::kZero(), phi::backends::gpu::CudnnDataType<T>::kZero(),
desc, desc,
out_data)); out_data));
#endif #endif
...@@ -1137,12 +1137,12 @@ void SoftmaxBackwardCudnnKernel(const GPUContext& dev_ctx, ...@@ -1137,12 +1137,12 @@ void SoftmaxBackwardCudnnKernel(const GPUContext& dev_ctx,
auto algo = log_mode ? MIOPEN_SOFTMAX_LOG : MIOPEN_SOFTMAX_ACCURATE; auto algo = log_mode ? MIOPEN_SOFTMAX_LOG : MIOPEN_SOFTMAX_ACCURATE;
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxBackward_V2( PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxBackward_V2(
handle, handle,
paddle::platform::CudnnDataType<T>::kOne(), phi::backends::gpu::CudnnDataType<T>::kOne(),
desc, desc,
out_data, out_data,
desc, desc,
dout_data, dout_data,
paddle::platform::CudnnDataType<T>::kZero(), phi::backends::gpu::CudnnDataType<T>::kZero(),
desc, desc,
dx_data, dx_data,
algo, algo,
...@@ -1156,12 +1156,12 @@ void SoftmaxBackwardCudnnKernel(const GPUContext& dev_ctx, ...@@ -1156,12 +1156,12 @@ void SoftmaxBackwardCudnnKernel(const GPUContext& dev_ctx,
handle, handle,
algo, algo,
mode, mode,
paddle::platform::CudnnDataType<T>::kOne(), phi::backends::gpu::CudnnDataType<T>::kOne(),
desc, desc,
out_data, out_data,
desc, desc,
dout_data, dout_data,
paddle::platform::CudnnDataType<T>::kZero(), phi::backends::gpu::CudnnDataType<T>::kZero(),
desc, desc,
dx_data)); dx_data));
#endif #endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册