未验证 提交 fd9c91c3 编写于 作者: H huangjiyi 提交者: GitHub

[PHI decoupling] move several header files from fluid to phi (#48415)

* decouple cudnn_desc.h from fluid

* move cudnn_desc.h from fluid to phi

* fix bugs

* decouple cudnn_helper.h from fluid

* fix bugs

* move cudnn_helper.h from fluid to phi

* add fluid cudnn_helper.h

* move miopen_desc.h from fluid to phi

* move miopen_helper.h from fluid to phi

* fix bugs

* move gpu_dnn.h from fluid to phi

* fix bugs

* update copyright year

* simplify gpu_dnn.h in fluid

* fix bugs

* fix xpu build bug

* fix compile bug

* fix bug
上级 30a31a53
......@@ -14,6 +14,7 @@
#pragma once
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cuda_graph_with_memory_pool.h"
......
......@@ -54,8 +54,8 @@ struct BNStatsFinalizeArgs {
cudnnDataType_t param_dtype;
cudnnTensorFormat_t format;
platform::TensorDescriptor in_desc;
platform::TensorDescriptor out_desc;
phi::backends::gpu::TensorDescriptor in_desc;
phi::backends::gpu::TensorDescriptor out_desc;
};
template <typename T>
......
......@@ -163,11 +163,11 @@ struct NormConvolutionArgs {
std::vector<int> paddings;
std::vector<int> dilations;
platform::TensorDescriptor in_desc;
platform::FilterDescriptor filter_desc;
platform::TensorDescriptor out_desc;
platform::TensorDescriptor out_stats_desc;
platform::ConvolutionDescriptor conv_desc;
phi::backends::gpu::TensorDescriptor in_desc;
phi::backends::gpu::FilterDescriptor filter_desc;
phi::backends::gpu::TensorDescriptor out_desc;
phi::backends::gpu::TensorDescriptor out_stats_desc;
phi::backends::gpu::ConvolutionDescriptor conv_desc;
bool is_support;
};
......
......@@ -89,12 +89,12 @@ struct ScaleBiasAddReluArgs {
cudnnDataType_t param_dtype;
cudnnTensorFormat_t format;
platform::TensorDescriptor in_desc;
platform::TensorDescriptor out_desc;
platform::TensorDescriptor equiv_scale_bias_desc;
platform::TensorDescriptor scale_bias_mean_var_desc;
platform::TensorDescriptor bitmask_desc;
platform::ActivationDescriptor activation_desc;
phi::backends::gpu::TensorDescriptor in_desc;
phi::backends::gpu::TensorDescriptor out_desc;
phi::backends::gpu::TensorDescriptor equiv_scale_bias_desc;
phi::backends::gpu::TensorDescriptor scale_bias_mean_var_desc;
phi::backends::gpu::TensorDescriptor bitmask_desc;
phi::backends::gpu::ActivationDescriptor activation_desc;
};
template <typename T>
......
......@@ -20,8 +20,8 @@ limitations under the License. */
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
TEST(CudnnHelper, ScopedTensorDescriptor) {
using paddle::platform::DataLayout;
using paddle::platform::ScopedTensorDescriptor;
using phi::backends::gpu::DataLayout;
using phi::backends::gpu::ScopedTensorDescriptor;
ScopedTensorDescriptor tensor_desc;
std::vector<int> shape = {2, 4, 6, 6};
......@@ -31,7 +31,7 @@ TEST(CudnnHelper, ScopedTensorDescriptor) {
int nd;
std::vector<int> dims(4);
std::vector<int> strides(4);
paddle::platform::dynload::cudnnGetTensorNdDescriptor(
phi::dynload::cudnnGetTensorNdDescriptor(
desc, 4, &type, &nd, dims.data(), strides.data());
EXPECT_EQ(nd, 4);
......@@ -50,7 +50,7 @@ TEST(CudnnHelper, ScopedTensorDescriptor) {
std::vector<int> dims_5d(5);
std::vector<int> strides_5d(5);
paddle::platform::dynload::cudnnGetTensorNdDescriptor(
phi::dynload::cudnnGetTensorNdDescriptor(
desc_5d, 5, &type, &nd, dims_5d.data(), strides_5d.data());
EXPECT_EQ(nd, 5);
......@@ -65,8 +65,8 @@ TEST(CudnnHelper, ScopedTensorDescriptor) {
}
TEST(CudnnHelper, ScopedFilterDescriptor) {
using paddle::platform::DataLayout;
using paddle::platform::ScopedFilterDescriptor;
using phi::backends::gpu::DataLayout;
using phi::backends::gpu::ScopedFilterDescriptor;
ScopedFilterDescriptor filter_desc;
std::vector<int> shape = {2, 3, 3};
......@@ -76,7 +76,7 @@ TEST(CudnnHelper, ScopedFilterDescriptor) {
int nd;
cudnnTensorFormat_t format;
std::vector<int> kernel(3);
paddle::platform::dynload::cudnnGetFilterNdDescriptor(
phi::dynload::cudnnGetFilterNdDescriptor(
desc, 3, &type, &format, &nd, kernel.data());
EXPECT_EQ(GetCudnnTensorFormat(DataLayout::kNCHW), format);
......@@ -90,7 +90,7 @@ TEST(CudnnHelper, ScopedFilterDescriptor) {
auto desc_4d = filter_desc.descriptor<float>(DataLayout::kNCDHW, shape_4d);
std::vector<int> kernel_4d(4);
paddle::platform::dynload::cudnnGetFilterNdDescriptor(
phi::dynload::cudnnGetFilterNdDescriptor(
desc_4d, 4, &type, &format, &nd, kernel_4d.data());
EXPECT_EQ(GetCudnnTensorFormat(DataLayout::kNCHW), format);
......@@ -101,7 +101,7 @@ TEST(CudnnHelper, ScopedFilterDescriptor) {
}
TEST(CudnnHelper, ScopedConvolutionDescriptor) {
using paddle::platform::ScopedConvolutionDescriptor;
using phi::backends::gpu::ScopedConvolutionDescriptor;
ScopedConvolutionDescriptor conv_desc;
std::vector<int> src_pads = {2, 2, 2};
......@@ -115,14 +115,14 @@ TEST(CudnnHelper, ScopedConvolutionDescriptor) {
std::vector<int> pads(3);
std::vector<int> strides(3);
std::vector<int> dilations(3);
paddle::platform::dynload::cudnnGetConvolutionNdDescriptor(desc,
3,
&nd,
pads.data(),
strides.data(),
dilations.data(),
&mode,
&type);
phi::dynload::cudnnGetConvolutionNdDescriptor(desc,
3,
&nd,
pads.data(),
strides.data(),
dilations.data(),
&mode,
&type);
EXPECT_EQ(nd, 3);
for (size_t i = 0; i < src_pads.size(); ++i) {
......@@ -134,8 +134,8 @@ TEST(CudnnHelper, ScopedConvolutionDescriptor) {
}
TEST(CudnnHelper, ScopedPoolingDescriptor) {
using paddle::platform::PoolingMode;
using paddle::platform::ScopedPoolingDescriptor;
using phi::backends::gpu::PoolingMode;
using phi::backends::gpu::ScopedPoolingDescriptor;
ScopedPoolingDescriptor pool_desc;
std::vector<int> src_kernel = {2, 2, 5};
......@@ -150,7 +150,7 @@ TEST(CudnnHelper, ScopedPoolingDescriptor) {
std::vector<int> kernel(3);
std::vector<int> pads(3);
std::vector<int> strides(3);
paddle::platform::dynload::cudnnGetPoolingNdDescriptor(
phi::dynload::cudnnGetPoolingNdDescriptor(
desc, 3, &mode, &nan_t, &nd, kernel.data(), pads.data(), strides.data());
EXPECT_EQ(nd, 3);
......
......@@ -20,20 +20,22 @@ namespace paddle {
namespace platform {
TEST(TensorDescriptor, Empty) {
ActivationDescriptor a;
TensorDescriptor t;
TensorDescriptor t1;
TensorDescriptor *t11 = new TensorDescriptor();
phi::backends::gpu::ActivationDescriptor a;
phi::backends::gpu::TensorDescriptor t;
phi::backends::gpu::TensorDescriptor t1;
phi::backends::gpu::TensorDescriptor *t11 =
new phi::backends::gpu::TensorDescriptor();
delete t11;
std::unique_ptr<TensorDescriptor> tt(new TensorDescriptor());
std::unique_ptr<phi::backends::gpu::TensorDescriptor> tt(
new phi::backends::gpu::TensorDescriptor());
}
TEST(TensorDescriptor, Normal) {
phi::DenseTensor tt;
tt.Resize({2, 3, 4});
tt.mutable_data<float>(platform::CPUPlace());
tt.mutable_data<float>(phi::CPUPlace());
TensorDescriptor desc;
phi::backends::gpu::TensorDescriptor desc;
desc.set(tt);
EXPECT_TRUE(desc.desc() != nullptr);
}
......
......@@ -14,14 +14,34 @@
#pragma once
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#ifdef PADDLE_WITH_HIP
#include "paddle/fluid/platform/device/gpu/rocm/miopen_desc.h"
#include "paddle/fluid/platform/device/gpu/rocm/miopen_helper.h"
#else // CUDA
#include "paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h"
#include "paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h"
namespace paddle {
namespace platform {
using DataLayout = phi::backends::gpu::DataLayout;
using PoolingMode = phi::backends::gpu::PoolingMode;
template <typename T>
using CudnnDataType = phi::backends::gpu::CudnnDataType<T>;
using ScopedTensorDescriptor = phi::backends::gpu::ScopedTensorDescriptor;
using ScopedDropoutDescriptor = phi::backends::gpu::ScopedDropoutDescriptor;
using ScopedRNNDescriptor = phi::backends::gpu::ScopedRNNDescriptor;
using ScopedFilterDescriptor = phi::backends::gpu::ScopedFilterDescriptor;
using ScopedConvolutionDescriptor =
phi::backends::gpu::ScopedConvolutionDescriptor;
using ScopedPoolingDescriptor = phi::backends::gpu::ScopedPoolingDescriptor;
using ScopedActivationDescriptor =
phi::backends::gpu::ScopedActivationDescriptor;
#if defined(PADDLE_WITH_CUDA)
using ScopedRNNTensorDescriptor = phi::backends::gpu::ScopedRNNTensorDescriptor;
using ScopedSpatialTransformerDescriptor =
phi::backends::gpu::ScopedSpatialTransformerDescriptor;
#endif
} // namespace platform
} // namespace paddle
#endif
......@@ -20,8 +20,8 @@ limitations under the License. */
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
TEST(MIOpenHelper, ScopedTensorDescriptor) {
using paddle::platform::DataLayout;
using paddle::platform::ScopedTensorDescriptor;
using phi::backends::gpu::DataLayout;
using phi::backends::gpu::ScopedTensorDescriptor;
ScopedTensorDescriptor tensor_desc;
std::vector<int> shape = {2, 4, 6, 6};
......@@ -31,9 +31,9 @@ TEST(MIOpenHelper, ScopedTensorDescriptor) {
int nd;
std::vector<int> dims(4);
std::vector<int> strides(4);
paddle::platform::dynload::miopenGetTensorDescriptor(
phi::dynload::miopenGetTensorDescriptor(
desc, &type, dims.data(), strides.data());
paddle::platform::dynload::miopenGetTensorDescriptorSize(desc, &nd);
phi::dynload::miopenGetTensorDescriptorSize(desc, &nd);
EXPECT_EQ(nd, 4);
for (size_t i = 0; i < dims.size(); ++i) {
......@@ -51,9 +51,9 @@ TEST(MIOpenHelper, ScopedTensorDescriptor) {
std::vector<int> dims_5d(5);
std::vector<int> strides_5d(5);
paddle::platform::dynload::miopenGetTensorDescriptor(
phi::dynload::miopenGetTensorDescriptor(
desc_5d, &type, dims_5d.data(), strides_5d.data());
paddle::platform::dynload::miopenGetTensorDescriptorSize(desc_5d, &nd);
phi::dynload::miopenGetTensorDescriptorSize(desc_5d, &nd);
EXPECT_EQ(nd, 5);
for (size_t i = 0; i < dims_5d.size(); ++i) {
......@@ -67,7 +67,7 @@ TEST(MIOpenHelper, ScopedTensorDescriptor) {
}
TEST(MIOpenHelper, ScopedConvolutionDescriptor) {
using paddle::platform::ScopedConvolutionDescriptor;
using phi::backends::gpu::ScopedConvolutionDescriptor;
ScopedConvolutionDescriptor conv_desc;
std::vector<int> src_pads = {2, 2, 2};
......@@ -80,7 +80,7 @@ TEST(MIOpenHelper, ScopedConvolutionDescriptor) {
std::vector<int> pads(3);
std::vector<int> strides(3);
std::vector<int> dilations(3);
paddle::platform::dynload::miopenGetConvolutionNdDescriptor(
phi::dynload::miopenGetConvolutionNdDescriptor(
desc, 3, &nd, pads.data(), strides.data(), dilations.data(), &mode);
EXPECT_EQ(nd, 3);
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -23,22 +23,12 @@
#include <string>
#include <vector>
#include "paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/backends/gpu/cuda/cudnn_helper.h"
#include "paddle/phi/core/utils/data_type.h"
namespace phi {
class DenseTensor;
} // namespace phi
namespace paddle {
namespace platform {
template <typename T>
inline cudnnDataType_t ToCudnnDataType(const T& t) {
auto type = framework::ToDataType(t);
return ToCudnnDataType(phi::TransToPhiDataType(type));
}
namespace backends {
namespace gpu {
template <typename T>
inline std::vector<T> TransformDimOrder(const std::vector<T>& dims) {
......@@ -67,7 +57,6 @@ inline std::vector<T> TransformDimOrder(const std::vector<T>& dims) {
return transformed_dims;
}
template <>
inline cudnnDataType_t ToCudnnDataType(const phi::DataType& t) {
cudnnDataType_t type = CUDNN_DATA_FLOAT;
switch (t) {
......@@ -98,7 +87,7 @@ class ActivationDescriptor {
void operator()(T* t) {
if (t != nullptr) {
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnDestroyActivationDescriptor(t));
phi::dynload::cudnnDestroyActivationDescriptor(t));
t = nullptr;
}
}
......@@ -106,12 +95,12 @@ class ActivationDescriptor {
ActivationDescriptor() {
T* raw_ptr;
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnCreateActivationDescriptor(&raw_ptr));
phi::dynload::cudnnCreateActivationDescriptor(&raw_ptr));
desc_.reset(raw_ptr);
}
template <typename T>
void set(cudnnActivationMode_t mode, const T& coef) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetActivationDescriptor(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetActivationDescriptor(
desc_.get(), mode, CUDNN_NOT_PROPAGATE_NAN, static_cast<double>(coef)));
}
......@@ -128,14 +117,16 @@ class TensorDescriptor {
struct Deleter {
void operator()(T* t) {
if (t != nullptr) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyTensorDescriptor(t));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnDestroyTensorDescriptor(t));
t = nullptr;
}
}
};
TensorDescriptor() {
T* raw_ptr;
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateTensorDescriptor(&raw_ptr));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnCreateTensorDescriptor(&raw_ptr));
desc_.reset(raw_ptr);
}
T* desc() { return desc_.get(); }
......@@ -151,12 +142,12 @@ class TensorDescriptor {
if (groups > 1) {
dims_with_group[1] = dims_with_group[1] / groups;
}
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnSetTensorNdDescriptor(desc_.get(),
ToCudnnDataType(tensor.dtype()),
dims_with_group.size(),
dims_with_group.data(),
strides.data()));
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor(
desc_.get(),
ToCudnnDataType(tensor.dtype()),
dims_with_group.size(),
dims_with_group.data(),
strides.data()));
}
void set(const std::vector<int>& dims,
......@@ -169,11 +160,11 @@ class TensorDescriptor {
transformed_dims = dims;
}
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnSetTensorNdDescriptorEx(desc_.get(),
format,
dtype,
transformed_dims.size(),
transformed_dims.data()));
phi::dynload::cudnnSetTensorNdDescriptorEx(desc_.get(),
format,
dtype,
transformed_dims.size(),
transformed_dims.data()));
}
void set(const phi::DenseTensor& tensor, const cudnnTensorFormat_t format) {
......@@ -192,14 +183,16 @@ class FilterDescriptor {
struct Deleter {
void operator()(T* t) {
if (t != nullptr) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyFilterDescriptor(t));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnDestroyFilterDescriptor(t));
t = nullptr;
}
}
};
FilterDescriptor() {
T* raw_ptr;
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateFilterDescriptor(&raw_ptr));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnCreateFilterDescriptor(&raw_ptr));
desc_.reset(raw_ptr);
}
T* desc() { return desc_.get(); }
......@@ -219,11 +212,11 @@ class FilterDescriptor {
transformed_dims[1] = transformed_dims[1] / groups;
}
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnSetFilterNdDescriptor(desc_.get(),
dtype,
format,
transformed_dims.size(),
transformed_dims.data()));
phi::dynload::cudnnSetFilterNdDescriptor(desc_.get(),
dtype,
format,
transformed_dims.size(),
transformed_dims.data()));
}
void set(const phi::DenseTensor& tensor,
......@@ -245,7 +238,7 @@ class ConvolutionDescriptor {
void operator()(T* t) {
if (t != nullptr) {
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnDestroyConvolutionDescriptor(t));
phi::dynload::cudnnDestroyConvolutionDescriptor(t));
t = nullptr;
}
}
......@@ -253,7 +246,7 @@ class ConvolutionDescriptor {
ConvolutionDescriptor() {
T* raw_ptr;
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnCreateConvolutionDescriptor(&raw_ptr));
phi::dynload::cudnnCreateConvolutionDescriptor(&raw_ptr));
desc_.reset(raw_ptr);
}
T* desc() { return desc_.get(); }
......@@ -270,31 +263,31 @@ class ConvolutionDescriptor {
(dtype == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT;
T* desc = desc_.get();
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnSetConvolutionNdDescriptor(desc,
pads.size(),
pads.data(),
strides.data(),
dilations.data(),
CUDNN_CROSS_CORRELATION,
compute_type));
phi::dynload::cudnnSetConvolutionNdDescriptor(desc,
pads.size(),
pads.data(),
strides.data(),
dilations.data(),
CUDNN_CROSS_CORRELATION,
compute_type));
#if CUDNN_VERSION_MIN(7, 0, 1)
PADDLE_ENFORCE_GPU_SUCCESS(
platform::dynload::cudnnSetConvolutionGroupCount(desc, groups));
phi::dynload::cudnnSetConvolutionGroupCount(desc, groups));
#if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1)
PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnSetConvolutionMathType(
desc, CUDNN_DEFAULT_MATH));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnSetConvolutionMathType(desc, CUDNN_DEFAULT_MATH));
if (dtype == CUDNN_DATA_HALF) {
PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnSetConvolutionMathType(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType(
desc, CUDNN_TENSOR_OP_MATH));
#if CUDA_VERSION >= 11000
#if CUDNN_VERSION_MIN(8, 1, 0)
} else if (dtype == CUDNN_DATA_BFLOAT16) {
PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnSetConvolutionMathType(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType(
desc, CUDNN_TENSOR_OP_MATH));
#endif // CUDNN_VERSION_MIN(8,1,0)
} else if (dtype == CUDNN_DATA_FLOAT && !allow_tf32) {
PADDLE_ENFORCE_GPU_SUCCESS(
platform::dynload::cudnnSetConvolutionMathType(desc, CUDNN_FMA_MATH));
phi::dynload::cudnnSetConvolutionMathType(desc, CUDNN_FMA_MATH));
#endif // CUDA_VERSION >= 11000
}
#endif
......@@ -307,5 +300,6 @@ class ConvolutionDescriptor {
std::unique_ptr<T, Deleter> desc_;
};
} // namespace platform
} // namespace paddle
} // namespace gpu
} // namespace backends
} // namespace phi
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
......@@ -17,16 +17,20 @@ limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/phi/backends/dynload/cudnn.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h"
#include "paddle/phi/core/macros.h"
DECLARE_bool(cudnn_deterministic);
namespace paddle {
namespace platform {
namespace phi {
namespace backends {
namespace gpu {
#define CUDNN_VERSION_MIN(major, minor, patch) \
(CUDNN_VERSION >= ((major)*1000 + (minor)*100 + (patch)))
......@@ -68,7 +72,7 @@ inline cudnnPoolingMode_t GetPoolingMode(const PoolingMode& mode) {
return CUDNN_POOLING_MAX;
default:
PADDLE_THROW(
platform::errors::Unimplemented("Unexpected CUDNN pooling mode."));
phi::errors::Unimplemented("Unexpected CUDNN pooling mode."));
}
}
......@@ -88,7 +92,7 @@ inline ActivationMode StringToActivationMode(const std::string& str) {
} else if (str == "bandpass") {
return ActivationMode::kBandPass;
} else {
PADDLE_THROW(platform::errors::Unimplemented(
PADDLE_THROW(phi::errors::Unimplemented(
"Unknown CUDNN activation string: %s.", str));
}
}
......@@ -99,7 +103,7 @@ class CudnnDataType;
// CUDNN_DATA_BFLOAT16 is not valid before cudnn8.1
#if CUDNN_VERSION_MIN(8, 1, 0)
template <>
class CudnnDataType<bfloat16> {
class CudnnDataType<phi::dtype::bfloat16> {
public:
static const cudnnDataType_t type = CUDNN_DATA_BFLOAT16;
using ScalingParamType = const float;
......@@ -116,7 +120,7 @@ class CudnnDataType<bfloat16> {
#endif
template <>
class CudnnDataType<float16> {
class CudnnDataType<phi::dtype::float16> {
public:
static const cudnnDataType_t type = CUDNN_DATA_HALF;
// The scaling param type is float for HALF and FLOAT tensors
......@@ -176,7 +180,7 @@ inline cudnnTensorFormat_t GetCudnnTensorFormat(
case DataLayout::kNDHWC:
return CUDNN_TENSOR_NHWC; // add, liyamei
default:
PADDLE_THROW(platform::errors::Unimplemented(
PADDLE_THROW(phi::errors::Unimplemented(
"CUDNN has no equivalent dataLayout for input order."));
}
return CUDNN_TENSOR_NCHW;
......@@ -185,10 +189,12 @@ inline cudnnTensorFormat_t GetCudnnTensorFormat(
class ScopedTensorDescriptor {
public:
ScopedTensorDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateTensorDescriptor(&desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnCreateTensorDescriptor(&desc_));
}
~ScopedTensorDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyTensorDescriptor(desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnDestroyTensorDescriptor(desc_));
}
inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format,
......@@ -211,25 +217,25 @@ class ScopedTensorDescriptor {
if (dims.size() == 4) {
if (format == CUDNN_TENSOR_NCHW) {
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnSetTensorNdDescriptor(desc_,
type,
dims_with_group.size(),
dims_with_group.data(),
strides.data()));
phi::dynload::cudnnSetTensorNdDescriptor(desc_,
type,
dims_with_group.size(),
dims_with_group.data(),
strides.data()));
} else { // CUDNN_TENSOR_NHWC
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensor4dDescriptor(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensor4dDescriptor(
desc_, format, type, dims[0], dims[3], dims[1], dims[2]));
}
} else if (dims.size() == 5) {
if (format == CUDNN_TENSOR_NCHW) {
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnSetTensorNdDescriptor(desc_,
type,
dims_with_group.size(),
dims_with_group.data(),
strides.data()));
phi::dynload::cudnnSetTensorNdDescriptor(desc_,
type,
dims_with_group.size(),
dims_with_group.data(),
strides.data()));
} else { // CUDNN_TENSOR_NHWC
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensorNdDescriptorEx(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptorEx(
desc_, format, type, dims.size(), dims.data()));
}
}
......@@ -247,7 +253,7 @@ class ScopedTensorDescriptor {
inline cudnnTensorDescriptor_t descriptor(const cudnnDataType_t cudnn_type,
const std::vector<int>& dim,
const std::vector<int>& stride) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensorNdDescriptor(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor(
desc_, cudnn_type, dim.size(), dim.data(), stride.data()));
return desc_;
}
......@@ -269,11 +275,13 @@ class ScopedTensorDescriptor {
class ScopedRNNTensorDescriptor {
public:
ScopedRNNTensorDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateRNNDataDescriptor(&desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnCreateRNNDataDescriptor(&desc_));
}
~ScopedRNNTensorDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyRNNDataDescriptor(desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnDestroyRNNDataDescriptor(desc_));
}
inline cudnnRNNDataDescriptor_t descriptor(
......@@ -292,15 +300,15 @@ class ScopedRNNTensorDescriptor {
layout = CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED;
}
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnSetRNNDataDescriptor(desc_,
cudnn_type,
layout,
max_seq_length,
batch_size,
input_size,
seq_length.data(),
static_cast<void*>(&padding_fill)));
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetRNNDataDescriptor(
desc_,
cudnn_type,
layout,
max_seq_length,
batch_size,
input_size,
seq_length.data(),
static_cast<void*>(&padding_fill)));
return desc_;
}
......@@ -331,14 +339,16 @@ class ScopedRNNTensorDescriptor {
class ScopedDropoutDescriptor {
public:
ScopedDropoutDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateDropoutDescriptor(&desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnCreateDropoutDescriptor(&desc_));
}
~ScopedDropoutDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyDropoutDescriptor(desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnDestroyDropoutDescriptor(desc_));
}
inline cudnnDropoutDescriptor_t descriptor(const cudnnHandle_t& handle,
const platform::Place& place,
const phi::Place& place,
bool initialized,
float dropout_prob_,
phi::DenseTensor* dropout_state_,
......@@ -346,22 +356,22 @@ class ScopedDropoutDescriptor {
size_t state_size) {
if (dropout_state_ == nullptr) { // for no dropout or test
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnSetDropoutDescriptor(desc_,
handle,
0 /* dropout */,
nullptr,
0 /* state_size */,
0 /* seed */));
phi::dynload::cudnnSetDropoutDescriptor(desc_,
handle,
0 /* dropout */,
nullptr,
0 /* state_size */,
0 /* seed */));
return desc_;
}
auto* dropout_state_data = dropout_state_->data<uint8_t>();
if (!initialized) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetDropoutDescriptor(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetDropoutDescriptor(
desc_, handle, dropout_prob_, dropout_state_data, state_size, seed));
} else {
auto dropout_state_dims = dropout_state_->dims();
state_size = dropout_state_dims[0];
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnRestoreDropoutDescriptor(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnRestoreDropoutDescriptor(
desc_, handle, dropout_prob_, dropout_state_data, state_size, 0));
}
return desc_;
......@@ -376,10 +386,10 @@ class ScopedDropoutDescriptor {
class ScopedRNNDescriptor {
public:
ScopedRNNDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateRNNDescriptor(&desc_));
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnCreateRNNDescriptor(&desc_));
}
~ScopedRNNDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyRNNDescriptor(desc_));
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnDestroyRNNDescriptor(desc_));
}
inline cudnnRNNDescriptor_t desc() { return desc_; }
......@@ -392,10 +402,12 @@ class ScopedRNNDescriptor {
class ScopedFilterDescriptor {
public:
ScopedFilterDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateFilterDescriptor(&desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnCreateFilterDescriptor(&desc_));
}
~ScopedFilterDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyFilterDescriptor(desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnDestroyFilterDescriptor(desc_));
}
inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format,
......@@ -412,11 +424,11 @@ class ScopedFilterDescriptor {
// NOTE: input filter(C) of the filter is already asserted to be C/groups.
}
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnSetFilterNdDescriptor(desc_,
type,
format,
kernel_with_group.size(),
kernel_with_group.data()));
phi::dynload::cudnnSetFilterNdDescriptor(desc_,
type,
format,
kernel_with_group.size(),
kernel_with_group.data()));
return desc_;
}
......@@ -439,11 +451,11 @@ class ScopedConvolutionDescriptor {
public:
ScopedConvolutionDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnCreateConvolutionDescriptor(&desc_));
phi::dynload::cudnnCreateConvolutionDescriptor(&desc_));
}
~ScopedConvolutionDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnDestroyConvolutionDescriptor(desc_));
phi::dynload::cudnnDestroyConvolutionDescriptor(desc_));
}
inline cudnnConvolutionDescriptor_t descriptor(
......@@ -453,7 +465,7 @@ class ScopedConvolutionDescriptor {
const std::vector<int>& dilations) {
PADDLE_ENFORCE_EQ(pads.size(),
strides.size(),
platform::errors::InvalidArgument(
phi::errors::InvalidArgument(
"The size of pads and strides should be equal. But "
"received size of pads is %d, size of strides is %d.",
pads.size(),
......@@ -461,7 +473,7 @@ class ScopedConvolutionDescriptor {
PADDLE_ENFORCE_EQ(
pads.size(),
dilations.size(),
platform::errors::InvalidArgument(
phi::errors::InvalidArgument(
"The size of pads and dilations should be equal. But received size "
"of pads is %d, size of dilations is %d.",
pads.size(),
......@@ -470,13 +482,13 @@ class ScopedConvolutionDescriptor {
cudnnDataType_t compute_type =
(type == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT;
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnSetConvolutionNdDescriptor(desc_,
pads.size(),
pads.data(),
strides.data(),
dilations.data(),
CUDNN_CROSS_CORRELATION,
compute_type));
phi::dynload::cudnnSetConvolutionNdDescriptor(desc_,
pads.size(),
pads.data(),
strides.data(),
dilations.data(),
CUDNN_CROSS_CORRELATION,
compute_type));
return desc_;
}
......@@ -496,10 +508,12 @@ class ScopedConvolutionDescriptor {
class ScopedPoolingDescriptor {
public:
ScopedPoolingDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreatePoolingDescriptor(&desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnCreatePoolingDescriptor(&desc_));
}
~ScopedPoolingDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyPoolingDescriptor(desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnDestroyPoolingDescriptor(desc_));
}
inline cudnnPoolingDescriptor_t descriptor(const PoolingMode& mode,
......@@ -508,7 +522,7 @@ class ScopedPoolingDescriptor {
const std::vector<int>& strides) {
PADDLE_ENFORCE_EQ(kernel.size(),
pads.size(),
platform::errors::InvalidArgument(
phi::errors::InvalidArgument(
"The size of kernel and pads should be equal. But "
"received size of kernel is %d, size of pads is %d.",
kernel.size(),
......@@ -516,12 +530,12 @@ class ScopedPoolingDescriptor {
PADDLE_ENFORCE_EQ(
kernel.size(),
strides.size(),
platform::errors::InvalidArgument(
phi::errors::InvalidArgument(
"The size of kernel and strides should be equal. But "
"received size of kernel is %d, size of strides is %d.",
kernel.size(),
strides.size()));
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetPoolingNdDescriptor(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetPoolingNdDescriptor(
desc_,
(GetPoolingMode(mode)),
CUDNN_PROPAGATE_NAN, // Always propagate nans.
......@@ -541,18 +555,23 @@ class ScopedSpatialTransformerDescriptor {
public:
ScopedSpatialTransformerDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnCreateSpatialTransformerDescriptor(&desc_));
phi::dynload::cudnnCreateSpatialTransformerDescriptor(&desc_));
}
~ScopedSpatialTransformerDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnDestroySpatialTransformerDescriptor(desc_));
phi::dynload::cudnnDestroySpatialTransformerDescriptor(desc_));
}
template <typename T>
inline cudnnSpatialTransformerDescriptor_t descriptor(const int nbDims,
const int dimA[]) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetSpatialTransformerNdDescriptor(
desc_, CUDNN_SAMPLER_BILINEAR, CudnnDataType<T>::type, nbDims, dimA));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnSetSpatialTransformerNdDescriptor(
desc_,
CUDNN_SAMPLER_BILINEAR,
CudnnDataType<T>::type,
nbDims,
dimA));
return desc_;
}
......@@ -565,11 +584,11 @@ class ScopedActivationDescriptor {
public:
ScopedActivationDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnCreateActivationDescriptor(&desc_));
phi::dynload::cudnnCreateActivationDescriptor(&desc_));
}
~ScopedActivationDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnDestroyActivationDescriptor(desc_));
phi::dynload::cudnnDestroyActivationDescriptor(desc_));
}
template <typename T>
......@@ -602,11 +621,11 @@ class ScopedActivationDescriptor {
mode = CUDNN_ACTIVATION_TANH;
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
PADDLE_THROW(phi::errors::Unimplemented(
"Unrecognized CUDNN activation mode: %d.",
static_cast<int>(activation_mode)));
}
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetActivationDescriptor(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetActivationDescriptor(
desc_, mode, CUDNN_NOT_PROPAGATE_NAN, relu_ceiling));
return desc_;
}
......@@ -620,16 +639,18 @@ class ScopedActivationDescriptor {
class ScopedCTCLossDescriptor {
public:
ScopedCTCLossDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnCreateCTCLossDescriptor(&desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnCreateCTCLossDescriptor(&desc_));
}
~ScopedCTCLossDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnDestroyCTCLossDescriptor(desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::cudnnDestroyCTCLossDescriptor(desc_));
}
template <typename T>
inline cudnnCTCLossDescriptor_t descriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::cudnnSetCTCLossDescriptor(desc_, CudnnDataType<T>::type));
phi::dynload::cudnnSetCTCLossDescriptor(desc_, CudnnDataType<T>::type));
return desc_;
}
......@@ -639,5 +660,6 @@ class ScopedCTCLossDescriptor {
};
#endif
} // namespace platform
} // namespace paddle
} // namespace gpu
} // namespace backends
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#ifdef PADDLE_WITH_HIP
#include "paddle/phi/backends/gpu/rocm/miopen_desc.h"
#include "paddle/phi/backends/gpu/rocm/miopen_helper.h"
#else // CUDA
#include "paddle/phi/backends/gpu/cuda/cudnn_desc.h"
#include "paddle/phi/backends/gpu/cuda/cudnn_helper.h"
#endif
#endif
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -23,22 +23,12 @@
#include <string>
#include <vector>
#include "paddle/fluid/platform/device/gpu/rocm/miopen_helper.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/backends/gpu/rocm/miopen_helper.h"
#include "paddle/phi/core/utils/data_type.h"
namespace phi {
class DenseTensor;
} // namespace phi
namespace paddle {
namespace platform {
template <typename T>
inline miopenDataType_t ToCudnnDataType(const T& t) {
auto type = framework::ToDataType(t);
return ToCudnnDataType(phi::TransToPhiDataType(type));
}
namespace backends {
namespace gpu {
inline std::vector<int> TransformDimOrder(const std::vector<int>& dims) {
std::vector<int> transformed_dims(dims.begin(), dims.end());
......@@ -63,7 +53,6 @@ inline std::vector<int> TransformDimOrder(const std::vector<int>& dims) {
return transformed_dims;
}
template <>
inline miopenDataType_t ToCudnnDataType(const phi::DataType& t) {
miopenDataType_t type = miopenFloat;
switch (t) {
......@@ -86,7 +75,7 @@ class ActivationDescriptor {
void operator()(T* t) {
if (t != nullptr) {
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenDestroyActivationDescriptor(t));
phi::dynload::miopenDestroyActivationDescriptor(t));
t = nullptr;
}
}
......@@ -94,12 +83,12 @@ class ActivationDescriptor {
ActivationDescriptor() {
T* raw_ptr;
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenCreateActivationDescriptor(&raw_ptr));
phi::dynload::miopenCreateActivationDescriptor(&raw_ptr));
desc_.reset(raw_ptr);
}
template <typename T>
void set(miopenActivationMode_t mode, const T& coef) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetActivationDescriptor(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetActivationDescriptor(
desc_.get(), mode, static_cast<double>(coef), 0.0, 0.0));
}
......@@ -116,14 +105,16 @@ class TensorDescriptor {
struct Deleter {
void operator()(T* t) {
if (t != nullptr) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyTensorDescriptor(t));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenDestroyTensorDescriptor(t));
t = nullptr;
}
}
};
TensorDescriptor() {
T* raw_ptr;
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateTensorDescriptor(&raw_ptr));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenCreateTensorDescriptor(&raw_ptr));
desc_.reset(raw_ptr);
}
T* desc() { return desc_.get(); }
......@@ -140,7 +131,7 @@ class TensorDescriptor {
if (groups > 1) {
dims_with_group[1] = dims_with_group[1] / groups;
}
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor(
(miopenTensorDescriptor_t)(desc_.get()),
ToCudnnDataType(tensor.dtype()),
static_cast<int>(dims_with_group.size()),
......@@ -150,10 +141,10 @@ class TensorDescriptor {
void set(const phi::DenseTensor& tensor, const miopenTensorFormat_t format) {
const int groups = 1;
PADDLE_ENFORCE_EQ(format,
MIOPEN_TENSOR_NCHW,
platform::errors::InvalidArgument(
"format should ONLY be NCHW in MIOPEN."));
PADDLE_ENFORCE_EQ(
format,
MIOPEN_TENSOR_NCHW,
phi::errors::InvalidArgument("format should ONLY be NCHW in MIOPEN."));
auto dims = phi::vectorize<int>(tensor.dims());
std::vector<int> strides(dims.size());
strides[dims.size() - 1] = 1;
......@@ -164,7 +155,7 @@ class TensorDescriptor {
if (groups > 1) {
dims_with_group[1] = dims_with_group[1] / groups;
}
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor(
(miopenTensorDescriptor_t)(desc_.get()),
ToCudnnDataType(tensor.dtype()),
static_cast<int>(dims_with_group.size()),
......@@ -182,14 +173,16 @@ class FilterDescriptor {
struct Deleter {
void operator()(T* t) {
if (t != nullptr) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyTensorDescriptor(t));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenDestroyTensorDescriptor(t));
t = nullptr;
}
}
};
FilterDescriptor() {
T* raw_ptr;
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateTensorDescriptor(&raw_ptr));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenCreateTensorDescriptor(&raw_ptr));
desc_.reset(raw_ptr);
}
T* desc() { return desc_.get(); }
......@@ -198,10 +191,10 @@ class FilterDescriptor {
void set(const phi::DenseTensor& tensor,
const miopenTensorFormat_t format,
const int groups = 1) {
PADDLE_ENFORCE_EQ(format,
MIOPEN_TENSOR_NCHW,
platform::errors::InvalidArgument(
"format should ONLY be NCHW in MIOPEN."));
PADDLE_ENFORCE_EQ(
format,
MIOPEN_TENSOR_NCHW,
phi::errors::InvalidArgument("format should ONLY be NCHW in MIOPEN."));
auto dims = phi::vectorize<int>(tensor.dims());
std::vector<int> strides(dims.size());
strides[dims.size() - 1] = 1;
......@@ -212,7 +205,7 @@ class FilterDescriptor {
if (groups > 1) {
dims_with_group[1] = dims_with_group[1] / groups;
}
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor(
(miopenTensorDescriptor_t)(desc_.get()),
ToCudnnDataType(tensor.dtype()),
static_cast<int>(dims_with_group.size()),
......@@ -231,7 +224,7 @@ class ConvolutionDescriptor {
void operator()(T* t) {
if (t != nullptr) {
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenDestroyConvolutionDescriptor(t));
phi::dynload::miopenDestroyConvolutionDescriptor(t));
t = nullptr;
}
}
......@@ -239,7 +232,7 @@ class ConvolutionDescriptor {
ConvolutionDescriptor() {
T* raw_ptr;
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenCreateConvolutionDescriptor(&raw_ptr));
phi::dynload::miopenCreateConvolutionDescriptor(&raw_ptr));
desc_.reset(raw_ptr);
}
T* desc() { return desc_.get(); }
......@@ -251,21 +244,21 @@ class ConvolutionDescriptor {
const std::vector<int>& dilations,
bool allow_tf32,
const int groups = 1) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenInitConvolutionNdDescriptor(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenInitConvolutionNdDescriptor(
(miopenConvolutionDescriptor_t)desc_.get(),
static_cast<int>(pads.size()),
const_cast<int*>(pads.data()),
const_cast<int*>(strides.data()),
const_cast<int*>(dilations.data()),
miopenConvolution));
PADDLE_ENFORCE_GPU_SUCCESS(
platform::dynload::miopenSetConvolutionGroupCount(
(miopenConvolutionDescriptor_t)desc_.get(), groups));
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetConvolutionGroupCount(
(miopenConvolutionDescriptor_t)desc_.get(), groups));
}
private:
std::unique_ptr<T, Deleter> desc_;
};
} // namespace platform
} // namespace paddle
} // namespace gpu
} // namespace backends
} // namespace phi
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
......@@ -17,20 +17,24 @@ limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/device/gpu/gpu_types.h"
#include "paddle/fluid/platform/dynload/miopen.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/phi/backends/dynload/miopen.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h"
#include "paddle/phi/core/macros.h"
// MIOPEN do not have epslion definition
#define CUDNN_BN_MIN_EPSILON 1e-05
DECLARE_bool(cudnn_deterministic);
namespace paddle {
namespace platform {
namespace phi {
namespace backends {
namespace gpu {
inline const char* miopenGetErrorString(miopenStatus_t status) {
switch (status) {
case miopenStatusSuccess:
......@@ -95,7 +99,7 @@ inline miopenPoolingMode_t GetPoolingMode(const PoolingMode& mode) {
return miopenPoolingMax;
default:
PADDLE_THROW(
platform::errors::Unimplemented("Unexpected MIOPEN pooling mode."));
phi::errors::Unimplemented("Unexpected MIOPEN pooling mode."));
}
}
......@@ -115,7 +119,7 @@ inline ActivationMode StringToActivationMode(const std::string& str) {
} else if (str == "bandpass") {
return ActivationMode::kBandPass;
} else {
PADDLE_THROW(platform::errors::Unimplemented(
PADDLE_THROW(phi::errors::Unimplemented(
"Unknown MIOPEN activation string: %s.", str));
}
}
......@@ -124,7 +128,7 @@ template <typename T>
class CudnnDataType;
template <>
class CudnnDataType<float16> {
class CudnnDataType<phi::dtype::float16> {
public:
static const miopenDataType_t type = miopenHalf;
// The scaling param type is float for HALF and FLOAT tensors
......@@ -141,7 +145,7 @@ class CudnnDataType<float16> {
};
template <>
class CudnnDataType<bfloat16> {
class CudnnDataType<phi::dtype::bfloat16> {
public:
static const miopenDataType_t type = miopenBFloat16;
// The scaling param type is float for HALF and FLOAT tensors
......@@ -184,7 +188,7 @@ inline miopenTensorFormat_t GetCudnnTensorFormat(const DataLayout& order) {
case DataLayout::kNDHWC:
return MIOPEN_TENSOR_NHWC;
default:
PADDLE_THROW(platform::errors::Unimplemented(
PADDLE_THROW(phi::errors::Unimplemented(
"MIOPEN has no equivalent dataLayout for input order."));
}
return MIOPEN_TENSOR_NCHW;
......@@ -193,10 +197,12 @@ inline miopenTensorFormat_t GetCudnnTensorFormat(const DataLayout& order) {
class ScopedTensorDescriptor {
public:
ScopedTensorDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateTensorDescriptor(&desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenCreateTensorDescriptor(&desc_));
}
~ScopedTensorDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyTensorDescriptor(desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenDestroyTensorDescriptor(desc_));
}
inline miopenTensorDescriptor_t descriptor(const miopenTensorFormat_t format,
......@@ -217,19 +223,19 @@ class ScopedTensorDescriptor {
}
// MIOPEN ONLY support data layout of NCHW
PADDLE_ENFORCE_EQ(format,
MIOPEN_TENSOR_NCHW,
platform::errors::InvalidArgument(
"format should ONLY be NCHW in MIOPEN."));
PADDLE_ENFORCE_EQ(
format,
MIOPEN_TENSOR_NCHW,
phi::errors::InvalidArgument("format should ONLY be NCHW in MIOPEN."));
if (dims.size() == 4) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor(
desc_,
type,
dims_with_group.size(),
const_cast<int*>(dims_with_group.data()),
const_cast<int*>(strides.data())));
} else if (dims.size() == 5) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor(
desc_,
type,
dims_with_group.size(),
......@@ -250,12 +256,12 @@ class ScopedTensorDescriptor {
inline miopenTensorDescriptor_t descriptor(const miopenDataType_t miopen_type,
const std::vector<int>& dim,
const std::vector<int>& stride) {
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenSetTensorDescriptor(desc_,
miopen_type,
dim.size(),
const_cast<int*>(dim.data()),
const_cast<int*>(stride.data())));
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor(
desc_,
miopen_type,
dim.size(),
const_cast<int*>(dim.data()),
const_cast<int*>(stride.data())));
return desc_;
}
......@@ -275,14 +281,16 @@ class ScopedTensorDescriptor {
class ScopedDropoutDescriptor {
public:
ScopedDropoutDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateDropoutDescriptor(&desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenCreateDropoutDescriptor(&desc_));
}
~ScopedDropoutDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyDropoutDescriptor(desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenDestroyDropoutDescriptor(desc_));
}
inline miopenDropoutDescriptor_t descriptor(const miopenHandle_t& handle,
const platform::Place& place,
const phi::Place& place,
bool initialized,
float dropout_prob_,
phi::DenseTensor* dropout_state_,
......@@ -290,42 +298,42 @@ class ScopedDropoutDescriptor {
size_t state_size) {
if (dropout_state_ == nullptr) { // for no dropout or test
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenSetDropoutDescriptor(desc_,
handle,
0 /* dropout */,
nullptr,
0 /* state_size */,
0 /* seed */,
false,
false,
MIOPEN_RNG_PSEUDO_XORWOW));
phi::dynload::miopenSetDropoutDescriptor(desc_,
handle,
0 /* dropout */,
nullptr,
0 /* state_size */,
0 /* seed */,
false,
false,
MIOPEN_RNG_PSEUDO_XORWOW));
return desc_;
}
auto* dropout_state_data = dropout_state_->data<uint8_t>();
if (!initialized) {
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenSetDropoutDescriptor(desc_,
handle,
dropout_prob_,
dropout_state_data,
state_size,
seed,
false,
false,
MIOPEN_RNG_PSEUDO_XORWOW));
phi::dynload::miopenSetDropoutDescriptor(desc_,
handle,
dropout_prob_,
dropout_state_data,
state_size,
seed,
false,
false,
MIOPEN_RNG_PSEUDO_XORWOW));
} else {
auto dropout_state_dims = dropout_state_->dims();
state_size = dropout_state_dims[0];
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenRestoreDropoutDescriptor(desc_,
handle,
dropout_prob_,
dropout_state_data,
state_size,
0,
false,
false,
MIOPEN_RNG_PSEUDO_XORWOW));
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenRestoreDropoutDescriptor(
desc_,
handle,
dropout_prob_,
dropout_state_data,
state_size,
0,
false,
false,
MIOPEN_RNG_PSEUDO_XORWOW));
}
return desc_;
}
......@@ -339,10 +347,10 @@ class ScopedDropoutDescriptor {
class ScopedRNNDescriptor {
public:
ScopedRNNDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateRNNDescriptor(&desc_));
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenCreateRNNDescriptor(&desc_));
}
~ScopedRNNDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyRNNDescriptor(desc_));
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenDestroyRNNDescriptor(desc_));
}
inline miopenRNNDescriptor_t desc() { return desc_; }
......@@ -355,10 +363,12 @@ class ScopedRNNDescriptor {
class ScopedFilterDescriptor {
public:
ScopedFilterDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateTensorDescriptor(&desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenCreateTensorDescriptor(&desc_));
}
~ScopedFilterDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyTensorDescriptor(desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenDestroyTensorDescriptor(desc_));
}
inline miopenTensorDescriptor_t descriptor(const miopenTensorFormat_t format,
......@@ -379,7 +389,7 @@ class ScopedFilterDescriptor {
for (int k = kernel_with_group.size() - 2; k >= 0; k--) {
stride_dim[k] = stride_dim[k + 1] * kernel_with_group[k + 1];
}
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor(
desc_,
type,
kernel_with_group.size(),
......@@ -407,11 +417,11 @@ class ScopedConvolutionDescriptor {
public:
ScopedConvolutionDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenCreateConvolutionDescriptor(&desc_));
phi::dynload::miopenCreateConvolutionDescriptor(&desc_));
}
~ScopedConvolutionDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenDestroyConvolutionDescriptor(desc_));
phi::dynload::miopenDestroyConvolutionDescriptor(desc_));
}
inline miopenConvolutionDescriptor_t descriptor(
......@@ -421,7 +431,7 @@ class ScopedConvolutionDescriptor {
const std::vector<int>& dilations) {
PADDLE_ENFORCE_EQ(pads.size(),
strides.size(),
platform::errors::InvalidArgument(
phi::errors::InvalidArgument(
"The size of pads and strides should be equal. But "
"received size of pads is %d, size of strides is %d.",
pads.size(),
......@@ -429,12 +439,12 @@ class ScopedConvolutionDescriptor {
PADDLE_ENFORCE_EQ(
pads.size(),
dilations.size(),
platform::errors::InvalidArgument(
phi::errors::InvalidArgument(
"The size of pads and dilations should be equal. But received size "
"of pads is %d, size of dilations is %d.",
pads.size(),
dilations.size()));
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenInitConvolutionNdDescriptor(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenInitConvolutionNdDescriptor(
desc_,
pads.size(),
const_cast<int*>(pads.data()),
......@@ -460,10 +470,12 @@ class ScopedConvolutionDescriptor {
class ScopedPoolingDescriptor {
public:
ScopedPoolingDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreatePoolingDescriptor(&desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenCreatePoolingDescriptor(&desc_));
}
~ScopedPoolingDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyPoolingDescriptor(desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenDestroyPoolingDescriptor(desc_));
}
inline miopenPoolingDescriptor_t descriptor(const PoolingMode& mode,
......@@ -472,7 +484,7 @@ class ScopedPoolingDescriptor {
const std::vector<int>& strides) {
PADDLE_ENFORCE_EQ(kernel.size(),
pads.size(),
platform::errors::InvalidArgument(
phi::errors::InvalidArgument(
"The size of kernel and pads should be equal. But "
"received size of kernel is %d, size of pads is %d.",
kernel.size(),
......@@ -480,12 +492,12 @@ class ScopedPoolingDescriptor {
PADDLE_ENFORCE_EQ(
kernel.size(),
strides.size(),
platform::errors::InvalidArgument(
phi::errors::InvalidArgument(
"The size of kernel and strides should be equal. But "
"received size of kernel is %d, size of strides is %d.",
kernel.size(),
strides.size()));
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetNdPoolingDescriptor(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetNdPoolingDescriptor(
desc_,
GetPoolingMode(mode),
kernel.size(),
......@@ -504,11 +516,11 @@ class ScopedActivationDescriptor {
public:
ScopedActivationDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenCreateActivationDescriptor(&desc_));
phi::dynload::miopenCreateActivationDescriptor(&desc_));
}
~ScopedActivationDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(
dynload::miopenDestroyActivationDescriptor(desc_));
phi::dynload::miopenDestroyActivationDescriptor(desc_));
}
template <typename T>
......@@ -539,11 +551,11 @@ class ScopedActivationDescriptor {
mode = miopenActivationTANH;
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
PADDLE_THROW(phi::errors::Unimplemented(
"Unrecognized MIOPEN activation mode: %d.",
static_cast<int>(activation_mode)));
}
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetActivationDescriptor(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetActivationDescriptor(
desc_, mode, relu_ceiling, 0.0, 0.0));
return desc_;
}
......@@ -556,15 +568,17 @@ class ScopedActivationDescriptor {
class ScopedCTCLossDescriptor {
public:
ScopedCTCLossDescriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreateCTCLossDescriptor(&desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenCreateCTCLossDescriptor(&desc_));
}
~ScopedCTCLossDescriptor() PADDLE_MAY_THROW {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenDestroyCTCLossDescriptor(desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
phi::dynload::miopenDestroyCTCLossDescriptor(desc_));
}
template <typename T>
inline miopenCTCLossDescriptor_t descriptor() {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetCTCLossDescriptor(
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetCTCLossDescriptor(
desc_, CudnnDataType<T>::type, 0, false));
return desc_;
}
......@@ -574,5 +588,6 @@ class ScopedCTCLossDescriptor {
DISABLE_COPY_AND_ASSIGN(ScopedCTCLossDescriptor);
};
} // namespace platform
} // namespace paddle
} // namespace gpu
} // namespace backends
} // namespace phi
......@@ -14,9 +14,9 @@ limitations under the License. */
#include "paddle/phi/kernels/funcs/cross_entropy.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_device_function.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_primitives.h"
#include "paddle/phi/core/utils/data_type.h"
#include "paddle/phi/kernels/funcs/math.h"
......
......@@ -14,9 +14,9 @@
#include "paddle/fluid/operators/layout_utils.h"
#include "paddle/fluid/operators/norm_utils.cu.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/fluid/platform/flags.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/kernel_registry.h"
......@@ -37,7 +37,7 @@ DECLARE_bool(cudnn_batchnorm_spatial_persistent);
namespace phi {
template <typename T>
using CudnnDataType = paddle::platform::CudnnDataType<T>;
using CudnnDataType = phi::backends::gpu::CudnnDataType<T>;
template <typename T>
using BatchNormParamType = typename CudnnDataType<T>::BatchNormParamType;
......@@ -644,7 +644,7 @@ void BatchNormGradRawKernel(const Context &ctx,
C,
scale.dims()[0]));
auto dtype = paddle::platform::CudnnDataType<T>::type;
auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
#ifdef PADDLE_WITH_HIP
auto compute_format =
data_layout == DataLayout::kNHWC ? DataLayout::kNHWC : DataLayout::kNCHW;
......
......@@ -22,9 +22,9 @@ namespace cub = hipcub;
#include "paddle/fluid/operators/layout_utils.h"
#include "paddle/fluid/operators/norm_utils.cu.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/fluid/platform/flags.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/kernel_registry.h"
......@@ -45,7 +45,7 @@ DECLARE_bool(cudnn_batchnorm_spatial_persistent);
namespace phi {
template <typename T>
using CudnnDataType = paddle::platform::CudnnDataType<T>;
using CudnnDataType = phi::backends::gpu::CudnnDataType<T>;
template <typename T>
using BatchNormParamType = typename CudnnDataType<T>::BatchNormParamType;
......@@ -603,7 +603,7 @@ void BatchNormKernel(const Context &ctx,
int N, C, H, W, D;
phi::funcs::ExtractNCWHD(x_dims, data_layout, &N, &C, &H, &W, &D);
auto dtype = paddle::platform::CudnnDataType<T>::type;
auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
#ifdef PADDLE_WITH_HIP
auto compute_format =
......
......@@ -23,8 +23,8 @@ namespace cub = hipcub;
#endif
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_device_function.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/tensor_utils.h"
......
......@@ -23,8 +23,8 @@ namespace cub = hipcub;
#endif
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_device_function.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/tensor_utils.h"
......@@ -772,10 +772,10 @@ static void SoftmaxWithCrossEntropySoftLabel(const GPUContext& dev_ctx,
: MIOPEN_SOFTMAX_MODE_CHANNEL;
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxForward_V2(
handle,
paddle::platform::CudnnDataType<T>::kOne(),
phi::backends::gpu::CudnnDataType<T>::kOne(),
descp,
logits_data,
paddle::platform::CudnnDataType<T>::kZero(),
phi::backends::gpu::CudnnDataType<T>::kZero(),
descp,
softmax_data,
MIOPEN_SOFTMAX_LOG,
......@@ -787,10 +787,10 @@ static void SoftmaxWithCrossEntropySoftLabel(const GPUContext& dev_ctx,
handle,
CUDNN_SOFTMAX_LOG,
mode,
paddle::platform::CudnnDataType<T>::kOne(),
phi::backends::gpu::CudnnDataType<T>::kOne(),
descp,
logits_data,
paddle::platform::CudnnDataType<T>::kZero(),
phi::backends::gpu::CudnnDataType<T>::kZero(),
descp,
softmax_data));
#endif
......@@ -1206,10 +1206,10 @@ static void SoftmaxWithCrossEntropyHardLabel(const GPUContext& dev_ctx,
: MIOPEN_SOFTMAX_MODE_CHANNEL;
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxForward_V2(
handle,
paddle::platform::CudnnDataType<T>::kOne(),
phi::backends::gpu::CudnnDataType<T>::kOne(),
descp,
logits_data,
paddle::platform::CudnnDataType<T>::kZero(),
phi::backends::gpu::CudnnDataType<T>::kZero(),
descp,
softmax_data,
MIOPEN_SOFTMAX_LOG,
......@@ -1221,10 +1221,10 @@ static void SoftmaxWithCrossEntropyHardLabel(const GPUContext& dev_ctx,
handle,
CUDNN_SOFTMAX_LOG,
mode,
paddle::platform::CudnnDataType<T>::kOne(),
phi::backends::gpu::CudnnDataType<T>::kOne(),
descp,
logits_data,
paddle::platform::CudnnDataType<T>::kZero(),
phi::backends::gpu::CudnnDataType<T>::kZero(),
descp,
softmax_data));
#endif
......
......@@ -26,12 +26,12 @@
namespace cub = hipcub;
#endif
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
namespace phi {
template <typename T>
using CudnnDataType = paddle::platform::CudnnDataType<T>;
using CudnnDataType = phi::backends::gpu::CudnnDataType<T>;
template <typename T>
using BatchNormParamType = typename CudnnDataType<T>::BatchNormParamType;
......
......@@ -15,7 +15,7 @@
#pragma once
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/dense_tensor.h"
......@@ -64,7 +64,7 @@ class RNNDescriptors {
size_t *reserve_size,
DenseTensor *dropout_state) {
int numDirections = is_bidirec_ ? 2 : 1;
gpuDnnDataType_t cudnn_type = paddle::platform::CudnnDataType<T>::type;
gpuDnnDataType_t cudnn_type = phi::backends::gpu::CudnnDataType<T>::type;
// ------------------- cudnn x, y descriptors ---------------------
std::vector<int> dims_x = {batch_size_, input_size_, 1};
std::vector<int> strides_x = {input_size_, 1, 1};
......@@ -179,7 +179,7 @@ class RNNDescriptors {
phi::errors::InvalidArgument(
"The cudnn rnn and setting weight size should be same."));
// ------------------- cudnn weight descriptors ---------------------
auto layout = paddle::platform::DataLayout::kNCHW;
auto layout = phi::backends::gpu::DataLayout::kNCHW;
int dim_tmp = weights_size_ / sizeof(T);
std::vector<int> dim_w = {dim_tmp, 1, 1};
weight_desc_.descriptor<T>(layout, dim_w);
......@@ -250,19 +250,19 @@ class RNNDescriptors {
std::vector<cudnnTensorDescriptor_t> y_descs_;
#endif
paddle::platform::ScopedTensorDescriptor x_desc_;
paddle::platform::ScopedTensorDescriptor y_desc_;
phi::backends::gpu::ScopedTensorDescriptor x_desc_;
phi::backends::gpu::ScopedTensorDescriptor y_desc_;
#if defined(PADDLE_WITH_CUDA) && CUDNN_VERSION >= 7201
paddle::platform::ScopedRNNTensorDescriptor x_seq_desc_;
paddle::platform::ScopedRNNTensorDescriptor y_seq_desc_;
phi::backends::gpu::ScopedRNNTensorDescriptor x_seq_desc_;
phi::backends::gpu::ScopedRNNTensorDescriptor y_seq_desc_;
#endif
paddle::platform::ScopedTensorDescriptor init_h_desc_;
paddle::platform::ScopedTensorDescriptor init_c_desc_;
paddle::platform::ScopedTensorDescriptor last_h_desc_;
paddle::platform::ScopedTensorDescriptor last_c_desc_;
paddle::platform::ScopedDropoutDescriptor dropout_desc_;
paddle::platform::ScopedFilterDescriptor weight_desc_;
paddle::platform::ScopedRNNDescriptor rnn_desc_;
phi::backends::gpu::ScopedTensorDescriptor init_h_desc_;
phi::backends::gpu::ScopedTensorDescriptor init_c_desc_;
phi::backends::gpu::ScopedTensorDescriptor last_h_desc_;
phi::backends::gpu::ScopedTensorDescriptor last_c_desc_;
phi::backends::gpu::ScopedDropoutDescriptor dropout_desc_;
phi::backends::gpu::ScopedFilterDescriptor weight_desc_;
phi::backends::gpu::ScopedRNNDescriptor rnn_desc_;
};
template <typename T, typename Type>
......
......@@ -31,15 +31,15 @@ namespace cub = hipcub;
#include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h"
#endif
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/kernels/funcs/norm_utils.h"
namespace phi {
template <typename T>
using CudnnDataType = paddle::platform::CudnnDataType<T>;
using CudnnDataType = phi::backends::gpu::CudnnDataType<T>;
template <typename T>
using BatchNormParamType = typename CudnnDataType<T>::BatchNormParamType;
......
......@@ -15,11 +15,11 @@
#ifndef PADDLE_WITH_HIP
#include "paddle/phi/kernels/affine_grid_grad_kernel.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_device_function.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_primitives.h"
#include "paddle/phi/common/int_array.h"
#include "paddle/phi/core/kernel_registry.h"
......@@ -27,7 +27,7 @@
namespace phi {
using ScopedSpatialTransformerDescriptor =
paddle::platform::ScopedSpatialTransformerDescriptor;
phi::backends::gpu::ScopedSpatialTransformerDescriptor;
template <typename T, typename Context>
void AffineGridGradCudnnKernel(const Context& dev_ctx,
......
......@@ -15,11 +15,11 @@
#ifndef PADDLE_WITH_HIP
#include "paddle/phi/kernels/affine_grid_kernel.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_device_function.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_primitives.h"
#include "paddle/phi/common/int_array.h"
#include "paddle/phi/core/kernel_registry.h"
......@@ -27,7 +27,7 @@
namespace phi {
using ScopedSpatialTransformerDescriptor =
paddle::platform::ScopedSpatialTransformerDescriptor;
phi::backends::gpu::ScopedSpatialTransformerDescriptor;
template <typename T, typename Context>
void AffineGridCudnnKernel(const Context& dev_ctx,
......
......@@ -17,8 +17,8 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h"
#include "paddle/phi/backends/dynload/cudnn_frontend.h"
#include "paddle/phi/backends/gpu/cuda/cudnn_desc.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/utils/data_type.h"
......@@ -86,7 +86,8 @@ class CudnnFrontendConvHelper {
cudnnTensorFormat_t layout_format) {
auto transformed_dims = phi::vectorize<int64_t>(tensor->dims());
if (layout_format == CUDNN_TENSOR_NHWC) {
transformed_dims = paddle::platform::TransformDimOrder(transformed_dims);
transformed_dims =
phi::backends::gpu::TransformDimOrder(transformed_dims);
}
std::vector<int64_t> strides =
GenerateStrides(transformed_dims, layout_format);
......@@ -95,7 +96,7 @@ class CudnnFrontendConvHelper {
.setStrides(strides.size(), strides.data())
.setId(id)
.setAlignment(GetAlignment(tensor))
.setDataType(paddle::platform::ToCudnnDataType(tensor->dtype()))
.setDataType(phi::backends::gpu::ToCudnnDataType(tensor->dtype()))
.build();
}
......
......@@ -559,7 +559,7 @@ struct SearchAlgorithmBase<ConvKind::kBackwardFilter> {
size_t workspace_size_limit =
CalcWorkspaceLimitInBytes(UseFixedWorkspace());
auto workspace_handle = ctx.cudnn_workspace_handle();
if (paddle::platform::CudnnDataType<T>::type != CUDNN_DATA_HALF) {
if (phi::backends::gpu::CudnnDataType<T>::type != CUDNN_DATA_HALF) {
size_t max_workspace_size =
GetMaxWorkspaceSize(args, workspace_size_limit);
VLOG(3) << "max_workspace_size=" << ToMegaBytes(max_workspace_size)
......@@ -674,7 +674,7 @@ struct SearchAlgorithm : public SearchAlgorithmBase<CK> {
bool enable_autotune = true) {
SearchResult<AlgoT> result;
bool use_autotune = false;
auto dtype = paddle::platform::CudnnDataType<T>::type;
auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
SetConvMathType(ctx, dtype, args.cdesc);
if (deterministic) {
......@@ -734,7 +734,7 @@ struct SearchAlgorithm : public SearchAlgorithmBase<CK> {
static void SetConvMathType(
const phi::GPUContext& ctx,
cudnnDataType_t dtype,
const paddle::platform::ConvolutionDescriptor& cdesc) {
const phi::backends::gpu::ConvolutionDescriptor& cdesc) {
#if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1)
if (ctx.GetComputeCapability() >= 70 && dtype == CUDNN_DATA_HALF) {
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType(
......
......@@ -20,6 +20,7 @@ limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/memory/memory.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/kernels/autotune/cache.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
......@@ -28,11 +29,11 @@ limitations under the License. */
namespace phi {
using GPUDNNDataLayout = paddle::platform::DataLayout;
using GPUDNNDataLayout = phi::backends::gpu::DataLayout;
template <typename T>
using ScalingParamType =
typename paddle::platform::CudnnDataType<T>::ScalingParamType;
typename phi::backends::gpu::CudnnDataType<T>::ScalingParamType;
enum class ConvKind { kForward = 1, kBackwardData = 2, kBackwardFilter = 3 };
......@@ -96,10 +97,10 @@ static std::ostream& operator<<(std::ostream& out, const std::vector<T>& v) {
template <typename HandleT, typename DataT>
struct ConvArgsBase {
HandleT handle;
paddle::platform::TensorDescriptor idesc;
paddle::platform::TensorDescriptor odesc;
paddle::platform::FilterDescriptor wdesc;
paddle::platform::ConvolutionDescriptor cdesc;
phi::backends::gpu::TensorDescriptor idesc;
phi::backends::gpu::TensorDescriptor odesc;
phi::backends::gpu::FilterDescriptor wdesc;
phi::backends::gpu::ConvolutionDescriptor cdesc;
const phi::DenseTensor* x = nullptr;
const phi::DenseTensor* w = nullptr;
......
......@@ -18,7 +18,7 @@ limitations under the License. */
#include <unordered_map>
#include <vector>
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
DECLARE_int64(conv_workspace_size_limit);
DECLARE_bool(cudnn_exhaustive_search);
......
......@@ -52,8 +52,8 @@ void ConvCudnnGradKernelImplV7(
const std::vector<int>& strides,
const std::vector<int>& padding_common,
const std::vector<int>& dilations,
paddle::platform::DataLayout compute_format,
paddle::platform::DataLayout layout,
phi::backends::gpu::DataLayout compute_format,
phi::backends::gpu::DataLayout layout,
bool use_addto,
bool exhaustive_search,
bool deterministic,
......@@ -70,8 +70,8 @@ void ConvCudnnGradKernelImplV7(
auto handle = ctx.cudnn_handle();
auto workspace_handle = ctx.cudnn_workspace_handle();
auto dtype = paddle::platform::CudnnDataType<T>::type;
auto layout_tensor = paddle::platform::GetCudnnTensorFormat(layout);
auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
auto layout_tensor = phi::backends::gpu::GetCudnnTensorFormat(layout);
ConvArgs args1{handle,
transformed_input_grad,
......@@ -96,16 +96,16 @@ void ConvCudnnGradKernelImplV7(
int i_n, i_c, i_d, i_h, i_w;
int o_n, o_c, o_d, o_h, o_w;
if (compute_format == paddle::platform::DataLayout::kNHWC) {
if (compute_format == phi::backends::gpu::DataLayout::kNHWC) {
GetNCDHW(transformed_input->dims(),
paddle::platform::DataLayout::kNHWC,
phi::backends::gpu::DataLayout::kNHWC,
&i_n,
&i_c,
&i_d,
&i_h,
&i_w);
GetNCDHW(transformed_output_grad_channel->dims(),
paddle::platform::DataLayout::kNHWC,
phi::backends::gpu::DataLayout::kNHWC,
&o_n,
&o_c,
&o_d,
......@@ -113,14 +113,14 @@ void ConvCudnnGradKernelImplV7(
&o_w);
} else {
GetNCDHW(transformed_input->dims(),
paddle::platform::DataLayout::kNCHW,
phi::backends::gpu::DataLayout::kNCHW,
&i_n,
&i_c,
&i_d,
&i_h,
&i_w);
GetNCDHW(transformed_output_grad_channel->dims(),
paddle::platform::DataLayout::kNCHW,
phi::backends::gpu::DataLayout::kNCHW,
&o_n,
&o_c,
&o_d,
......@@ -347,7 +347,7 @@ void ConvCudnnGradKernelImplV8(
const std::vector<int>& strides,
const std::vector<int>& padding_common,
const std::vector<int>& dilations,
paddle::platform::DataLayout layout,
phi::backends::gpu::DataLayout layout,
bool use_addto,
bool exhaustive_search,
bool deterministic,
......@@ -363,8 +363,8 @@ void ConvCudnnGradKernelImplV8(
cudnnHandle_t handle = const_cast<cudnnHandle_t>(ctx.cudnn_handle());
auto workspace_handle = ctx.cudnn_workspace_handle();
auto dtype = paddle::platform::CudnnDataType<T>::type;
auto layout_format = paddle::platform::GetCudnnTensorFormat(layout);
auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
auto layout_format = phi::backends::gpu::GetCudnnTensorFormat(layout);
if (input_grad) {
CudnnConvBwdDataV8<T>(transformed_output_grad_channel,
......@@ -449,11 +449,11 @@ void ConvCudnnGradKernel(const Context& ctx,
const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
auto dtype = paddle::platform::CudnnDataType<T>::type;
auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
#ifdef PADDLE_WITH_HIP
// HIP MIOPEN ONLY SUPPORT NCHW format
auto compute_format = paddle::platform::DataLayout::kNCHW;
auto compute_format = phi::backends::gpu::DataLayout::kNCHW;
#else
#if CUDNN_VERSION_MIN(8, 1, 0)
const bool compute_in_nhwc =
......@@ -463,13 +463,13 @@ void ConvCudnnGradKernel(const Context& ctx,
const bool compute_in_nhwc = dtype == CUDNN_DATA_HALF && IsVoltaOrLater(ctx);
#endif
auto compute_format = compute_in_nhwc && channel_last
? paddle::platform::DataLayout::kNHWC
: paddle::platform::DataLayout::kNCHW;
? phi::backends::gpu::DataLayout::kNHWC
: phi::backends::gpu::DataLayout::kNCHW;
#endif
VLOG(3) << "Compute ConvGradOp with cuDNN:"
<< " data_format=" << data_format << " compute_format="
<< (compute_format == paddle::platform::DataLayout::kNHWC ? "NHWC"
: "NCHW");
<< (compute_format == phi::backends::gpu::DataLayout::kNHWC ? "NHWC"
: "NCHW");
// transform Tensor
DenseTensor transformed_input_channel(input.type());
......@@ -478,7 +478,7 @@ void ConvCudnnGradKernel(const Context& ctx,
DenseTensor transformed_filter_channel(filter.type());
DenseTensor transformed_filter_grad_channel(filter.type());
if (channel_last && compute_format == paddle::platform::DataLayout::kNCHW) {
if (channel_last && compute_format == phi::backends::gpu::DataLayout::kNCHW) {
VLOG(3) << "Transform input, output_grad, input_grad and tensor from "
"NHWC to NCHW.";
ResizeToChannelFirst<Context, T>(ctx, &input, &transformed_input_channel);
......@@ -507,7 +507,7 @@ void ConvCudnnGradKernel(const Context& ctx,
}
}
if (compute_format == paddle::platform::DataLayout::kNHWC) {
if (compute_format == phi::backends::gpu::DataLayout::kNHWC) {
VLOG(3) << "Transform filter and filter_grad tensor from NCHW to NHWC.";
ResizeToChannelLast<Context, T>(ctx, &filter, &transformed_filter_channel);
TransToChannelLast<Context, T>(ctx, &filter, &transformed_filter_channel);
......@@ -528,7 +528,7 @@ void ConvCudnnGradKernel(const Context& ctx,
auto filter_dims = transformed_filter_channel.dims();
DDim in_data_dims;
DDim filter_data_dims;
if (compute_format == paddle::platform::DataLayout::kNCHW) {
if (compute_format == phi::backends::gpu::DataLayout::kNCHW) {
in_data_dims = slice_ddim(in_dims, 2, in_dims.size());
filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size());
} else {
......@@ -553,7 +553,7 @@ void ConvCudnnGradKernel(const Context& ctx,
std::vector<int> padding_diff(data_dim);
std::vector<int> new_input_shape_vec(data_dim + 2);
new_input_shape_vec[0] = transformed_input_channel.dims()[0];
if (compute_format == paddle::platform::DataLayout::kNCHW) {
if (compute_format == phi::backends::gpu::DataLayout::kNCHW) {
new_input_shape_vec[1] = transformed_input_channel.dims()[1];
} else {
new_input_shape_vec[data_dim + 1] =
......@@ -563,14 +563,14 @@ void ConvCudnnGradKernel(const Context& ctx,
for (size_t i = 0; i < data_dim; ++i) {
padding_diff[i] = std::abs(paddings[2 * i] - paddings[2 * i + 1]);
padding_common[i] = std::min(paddings[2 * i], paddings[2 * i + 1]);
if (compute_format == paddle::platform::DataLayout::kNCHW) {
if (compute_format == phi::backends::gpu::DataLayout::kNCHW) {
new_input_shape_vec[i + 2] =
transformed_input_channel.dims()[i + 2] + padding_diff[i];
} else {
new_input_shape_vec[i + 1] =
transformed_input_channel.dims()[i + 1] + padding_diff[i];
}
if (compute_format == paddle::platform::DataLayout::kNCHW) {
if (compute_format == phi::backends::gpu::DataLayout::kNCHW) {
input_pad[2 * i + 4] = paddings[2 * i] - padding_common[i];
input_pad[2 * i + 4 + 1] = paddings[2 * i + 1] - padding_common[i];
} else {
......@@ -624,15 +624,15 @@ void ConvCudnnGradKernel(const Context& ctx,
}
}
}
paddle::platform::DataLayout layout =
compute_format == paddle::platform::DataLayout::kNHWC
? paddle::platform::DataLayout::kNHWC
: paddle::platform::DataLayout::kNCHW;
phi::backends::gpu::DataLayout layout =
compute_format == phi::backends::gpu::DataLayout::kNHWC
? phi::backends::gpu::DataLayout::kNHWC
: phi::backends::gpu::DataLayout::kNCHW;
// TODO(phlrain): replace paddle::platform::DataLaytout to phi::DataLayout
if (transformed_input.dims().size() == 5) {
layout = compute_format == paddle::platform::DataLayout::kNHWC
? paddle::platform::DataLayout::kNDHWC
: paddle::platform::DataLayout::kNCDHW;
layout = compute_format == phi::backends::gpu::DataLayout::kNHWC
? phi::backends::gpu::DataLayout::kNDHWC
: phi::backends::gpu::DataLayout::kNCDHW;
}
#ifdef PADDLE_WITH_CUDNN_FRONTEND
......@@ -717,14 +717,15 @@ void ConvCudnnGradKernel(const Context& ctx,
}
}
if (channel_last && compute_format == paddle::platform::DataLayout::kNCHW) {
if (channel_last &&
compute_format == phi::backends::gpu::DataLayout::kNCHW) {
TransToChannelLast<Context, T>(
ctx, &transformed_input_grad_channel, input_grad);
}
}
if (filter_grad) {
if (compute_format == paddle::platform::DataLayout::kNHWC) {
if (compute_format == phi::backends::gpu::DataLayout::kNHWC) {
TransToChannelFirst<Context, T>(
ctx, &transformed_filter_grad_channel, filter_grad);
}
......@@ -1008,11 +1009,11 @@ void ConvCudnnGradGradKernel(
c_group = groups;
groups = 1;
#endif
auto dtype = paddle::platform::CudnnDataType<T>::type;
auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
auto handle = ctx.cudnn_handle();
auto layout = paddle::platform::GetCudnnTensorFormat(
paddle::platform::DataLayout::kNCHW);
auto layout = phi::backends::gpu::GetCudnnTensorFormat(
phi::backends::gpu::DataLayout::kNCHW);
ConvArgs args1{handle,
&transformed_ddX,
......@@ -1023,7 +1024,7 @@ void ConvCudnnGradGradKernel(
dilations,
dtype,
groups,
paddle::platform::DataLayout::kNCHW};
phi::backends::gpu::DataLayout::kNCHW};
ConvArgs args2{handle,
&transformed_X,
ddW,
......@@ -1033,7 +1034,7 @@ void ConvCudnnGradGradKernel(
dilations,
dtype,
groups,
paddle::platform::DataLayout::kNCHW};
phi::backends::gpu::DataLayout::kNCHW};
ConvArgs args3{handle,
&transformed_ddX,
dW,
......@@ -1043,7 +1044,7 @@ void ConvCudnnGradGradKernel(
dilations,
dtype,
groups,
paddle::platform::DataLayout::kNCHW};
phi::backends::gpu::DataLayout::kNCHW};
ConvArgs args4{handle,
&transformed_dX,
ddW,
......@@ -1053,7 +1054,7 @@ void ConvCudnnGradGradKernel(
dilations,
dtype,
groups,
paddle::platform::DataLayout::kNCHW};
phi::backends::gpu::DataLayout::kNCHW};
#ifdef PADDLE_WITH_HIP
SearchResult<miopenConvFwdAlgorithm_t> fwd_result1;
......
......@@ -50,8 +50,8 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input,
const std::vector<int>& strides,
const std::vector<int>& padding_common,
const std::vector<int>& dilations,
paddle::platform::DataLayout compute_format,
paddle::platform::DataLayout layout,
phi::backends::gpu::DataLayout compute_format,
phi::backends::gpu::DataLayout layout,
bool exhaustive_search,
bool deterministic,
int groups,
......@@ -63,8 +63,8 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input,
auto handle = ctx.cudnn_handle();
auto workspace_handle = ctx.cudnn_workspace_handle();
auto layout_format = paddle::platform::GetCudnnTensorFormat(layout);
auto dtype = paddle::platform::CudnnDataType<T>::type;
auto layout_format = phi::backends::gpu::GetCudnnTensorFormat(layout);
auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
// ------------------- cudnn descriptors ---------------------
ConvArgs args{handle,
......@@ -113,16 +113,16 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input,
int i_n, i_c, i_d, i_h, i_w;
int o_n, o_c, o_d, o_h, o_w;
if (compute_format == paddle::platform::DataLayout::kNHWC) {
if (compute_format == phi::backends::gpu::DataLayout::kNHWC) {
GetNCDHW(transformed_input->dims(),
paddle::platform::DataLayout::kNHWC,
phi::backends::gpu::DataLayout::kNHWC,
&i_n,
&i_c,
&i_d,
&i_h,
&i_w);
GetNCDHW(transformed_output->dims(),
paddle::platform::DataLayout::kNHWC,
phi::backends::gpu::DataLayout::kNHWC,
&o_n,
&o_c,
&o_d,
......@@ -130,14 +130,14 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input,
&o_w);
} else {
GetNCDHW(transformed_input->dims(),
paddle::platform::DataLayout::kNCHW,
phi::backends::gpu::DataLayout::kNCHW,
&i_n,
&i_c,
&i_d,
&i_h,
&i_w);
GetNCDHW(transformed_output->dims(),
paddle::platform::DataLayout::kNCHW,
phi::backends::gpu::DataLayout::kNCHW,
&o_n,
&o_c,
&o_d,
......@@ -227,7 +227,7 @@ void ConvCudnnKernelImplV8(const DenseTensor* input_tensor,
const std::vector<int>& strides,
const std::vector<int>& padding_common,
const std::vector<int>& dilations,
paddle::platform::DataLayout layout,
phi::backends::gpu::DataLayout layout,
bool exhaustive_search,
bool deterministic,
int groups,
......@@ -247,8 +247,8 @@ void ConvCudnnKernelImplV8(const DenseTensor* input_tensor,
cudnnHandle_t handle = const_cast<cudnnHandle_t>(ctx.cudnn_handle());
auto workspace_handle = ctx.cudnn_workspace_handle();
auto layout_format = paddle::platform::GetCudnnTensorFormat(layout);
auto dtype = paddle::platform::CudnnDataType<T>::type;
auto layout_format = phi::backends::gpu::GetCudnnTensorFormat(layout);
auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
float alpha = 1.0f;
float beta = 0.0f;
......@@ -368,11 +368,11 @@ void ConvCudnnKernel(const Context& ctx,
"FLAGS_cudnn_deterministic True at same time."));
const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
auto dtype = paddle::platform::CudnnDataType<T>::type;
auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
#ifdef PADDLE_WITH_HIP
// HIP MIOPEN ONLY SUPPORT NCHW format
auto compute_format = paddle::platform::DataLayout::kNCHW;
auto compute_format = phi::backends::gpu::DataLayout::kNCHW;
#else
#if CUDNN_VERSION_MIN(8, 1, 0)
// Tensor Core introduced from Volta GPUs supports more faster conv op
......@@ -388,20 +388,20 @@ void ConvCudnnKernel(const Context& ctx,
// We will only do data format conversion from NHWC to NCHW.
// cudnn will convert NCHW to NHWC automatically on Tensor Core.
auto compute_format = compute_in_nhwc && channel_last
? paddle::platform::DataLayout::kNHWC
: paddle::platform::DataLayout::kNCHW;
? phi::backends::gpu::DataLayout::kNHWC
: phi::backends::gpu::DataLayout::kNCHW;
#endif
VLOG(3) << "Compute ConvOp with cuDNN:"
<< " data_format=" << data_format << " compute_format="
<< (compute_format == paddle::platform::DataLayout::kNHWC ? "NHWC"
: "NCHW");
<< (compute_format == phi::backends::gpu::DataLayout::kNHWC ? "NHWC"
: "NCHW");
// ------------ transformed tensor -----------
DenseTensor transformed_input_channel(input.type());
DenseTensor transformed_output(output->type());
DenseTensor transformed_filter_channel(filter.type());
if (channel_last && compute_format == paddle::platform::DataLayout::kNCHW) {
if (channel_last && compute_format == phi::backends::gpu::DataLayout::kNCHW) {
VLOG(3) << "Transform input tensor from NHWC to NCHW.";
ResizeToChannelFirst<Context, T>(ctx, &input, &transformed_input_channel);
TransToChannelFirst<Context, T>(ctx, &input, &transformed_input_channel);
......@@ -412,7 +412,7 @@ void ConvCudnnKernel(const Context& ctx,
transformed_input_channel.ShareDataWith(input);
transformed_output.ShareDataWith(*output);
}
if (compute_format == paddle::platform::DataLayout::kNHWC) {
if (compute_format == phi::backends::gpu::DataLayout::kNHWC) {
VLOG(3) << "Transform filter tensor from NCHW to NHWC.";
ResizeToChannelLast<Context, T>(ctx, &filter, &transformed_filter_channel);
TransToChannelLast<Context, T>(ctx, &filter, &transformed_filter_channel);
......@@ -426,7 +426,7 @@ void ConvCudnnKernel(const Context& ctx,
DDim in_data_dims;
DDim filter_data_dims;
if (compute_format == paddle::platform::DataLayout::kNCHW) {
if (compute_format == phi::backends::gpu::DataLayout::kNCHW) {
in_data_dims = slice_ddim(in_dims, 2, in_dims.size());
filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size());
} else {
......@@ -448,7 +448,7 @@ void ConvCudnnKernel(const Context& ctx,
std::vector<int> new_input_shape_vec(data_dim + 2);
new_input_shape_vec[0] = transformed_input_channel.dims()[0];
if (compute_format == paddle::platform::DataLayout::kNCHW) {
if (compute_format == phi::backends::gpu::DataLayout::kNCHW) {
new_input_shape_vec[1] = transformed_input_channel.dims()[1];
} else {
new_input_shape_vec[data_dim + 1] =
......@@ -459,14 +459,14 @@ void ConvCudnnKernel(const Context& ctx,
for (size_t i = 0; i < data_dim; ++i) {
padding_diff[i] = std::abs(paddings[2 * i] - paddings[2 * i + 1]);
padding_common[i] = std::min(paddings[2 * i], paddings[2 * i + 1]);
if (compute_format == paddle::platform::DataLayout::kNCHW) {
if (compute_format == phi::backends::gpu::DataLayout::kNCHW) {
new_input_shape_vec[i + 2] =
transformed_input_channel.dims()[i + 2] + padding_diff[i];
} else {
new_input_shape_vec[i + 1] =
transformed_input_channel.dims()[i + 1] + padding_diff[i];
}
if (compute_format == paddle::platform::DataLayout::kNCHW) {
if (compute_format == phi::backends::gpu::DataLayout::kNCHW) {
input_pad[2 * i + 4] = paddings[2 * i] - padding_common[i];
input_pad[2 * i + 4 + 1] = paddings[2 * i + 1] - padding_common[i];
} else {
......@@ -513,14 +513,14 @@ void ConvCudnnKernel(const Context& ctx,
}
}
paddle::platform::DataLayout layout =
compute_format == paddle::platform::DataLayout::kNHWC
? paddle::platform::DataLayout::kNHWC
: paddle::platform::DataLayout::kNCHW;
phi::backends::gpu::DataLayout layout =
compute_format == phi::backends::gpu::DataLayout::kNHWC
? phi::backends::gpu::DataLayout::kNHWC
: phi::backends::gpu::DataLayout::kNCHW;
if (transformed_input.dims().size() == 5) {
layout = compute_format == paddle::platform::DataLayout::kNHWC
? paddle::platform::DataLayout::kNDHWC
: paddle::platform::DataLayout::kNCDHW;
layout = compute_format == phi::backends::gpu::DataLayout::kNHWC
? phi::backends::gpu::DataLayout::kNDHWC
: phi::backends::gpu::DataLayout::kNCDHW;
}
#ifdef PADDLE_WITH_CUDNN_FRONTEND
......@@ -564,7 +564,7 @@ void ConvCudnnKernel(const Context& ctx,
&transformed_output);
#endif
if (channel_last && compute_format == paddle::platform::DataLayout::kNCHW) {
if (channel_last && compute_format == phi::backends::gpu::DataLayout::kNCHW) {
TransToChannelLast<Context, T>(ctx, &transformed_output, output);
}
}
......
......@@ -28,16 +28,16 @@ limitations under the License. */
#include "paddle/phi/kernels/transpose_kernel.h"
#ifdef PADDLE_WITH_HIP
#include "paddle/fluid/platform/device/gpu/rocm/miopen_helper.h"
#include "paddle/phi/backends/gpu/rocm/miopen_helper.h"
#include "paddle/phi/kernels/gpudnn/conv_miopen_helper.h"
#else
#include "paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h"
#include "paddle/phi/backends/gpu/cuda/cudnn_helper.h"
#include "paddle/phi/kernels/gpudnn/conv_cudnn_v7.h"
#endif
namespace phi {
using GPUDNNDataLayout = paddle::platform::DataLayout;
using GPUDNNDataLayout = phi::backends::gpu::DataLayout;
template <typename T, typename Context>
void ConvTransposeGradRawGPUDNNKernel(const Context& ctx,
......@@ -171,7 +171,7 @@ void ConvTransposeGradRawGPUDNNKernel(const Context& ctx,
groups = 1;
#endif
auto dtype = paddle::platform::CudnnDataType<T>::type;
auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
auto handle = ctx.cudnn_handle();
ConvArgs args1{handle,
......@@ -203,7 +203,7 @@ void ConvTransposeGradRawGPUDNNKernel(const Context& ctx,
SearchResult<cudnnConvolutionBwdFilterAlgo_t> filter_result;
#endif
auto layout_tensor = paddle::platform::GetCudnnTensorFormat(layout);
auto layout_tensor = phi::backends::gpu::GetCudnnTensorFormat(layout);
size_t workspace_size = 0;
bool deterministic = FLAGS_cudnn_deterministic;
T* dx_data = nullptr;
......@@ -616,10 +616,11 @@ void Conv2dTransposeDoubleGradGPUDNNKernel(
c_group = groups;
groups = 1;
#endif
auto dtype = paddle::platform::CudnnDataType<T>::type;
auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
auto handle = ctx.cudnn_handle();
auto layout = paddle::platform::GetCudnnTensorFormat(GPUDNNDataLayout::kNCHW);
auto layout =
phi::backends::gpu::GetCudnnTensorFormat(GPUDNNDataLayout::kNCHW);
ConvArgs args1{handle,
&transformed_ddout_channel,
......
......@@ -26,16 +26,16 @@ limitations under the License. */
#include "paddle/phi/kernels/transpose_kernel.h"
#ifdef PADDLE_WITH_HIP
#include "paddle/fluid/platform/device/gpu/rocm/miopen_helper.h"
#include "paddle/phi/backends/gpu/rocm/miopen_helper.h"
#include "paddle/phi/kernels/gpudnn/conv_miopen_helper.h"
#else
#include "paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h"
#include "paddle/phi/backends/gpu/cuda/cudnn_helper.h"
#include "paddle/phi/kernels/gpudnn/conv_cudnn_v7.h"
#endif
namespace phi {
using GPUDNNDataLayout = paddle::platform::DataLayout;
using GPUDNNDataLayout = phi::backends::gpu::DataLayout;
template <typename T, typename Context>
void ConvTransposeRawGPUDNNKernel(const Context& ctx,
......@@ -194,10 +194,10 @@ void ConvTransposeRawGPUDNNKernel(const Context& ctx,
#endif
// ------------------- cudnn conv algorithm ---------------------
auto handle = ctx.cudnn_handle();
auto layout_tensor = paddle::platform::GetCudnnTensorFormat(layout);
auto layout_tensor = phi::backends::gpu::GetCudnnTensorFormat(layout);
bool deterministic = FLAGS_cudnn_deterministic;
auto dtype = paddle::platform::CudnnDataType<T>::type;
auto dtype = phi::backends::gpu::CudnnDataType<T>::type;
// ------------------- cudnn descriptors ---------------------
ConvArgs args{handle,
&transformed_out,
......
......@@ -16,18 +16,18 @@ limitations under the License. */
#include <string>
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
namespace phi {
using GPUDNNDataLayout = paddle::platform::DataLayout;
using PoolingMode = paddle::platform::PoolingMode;
using ScopedPoolingDescriptor = paddle::platform::ScopedPoolingDescriptor;
using ScopedTensorDescriptor = paddle::platform::ScopedTensorDescriptor;
using GPUDNNDataLayout = phi::backends::gpu::DataLayout;
using PoolingMode = phi::backends::gpu::PoolingMode;
using ScopedPoolingDescriptor = phi::backends::gpu::ScopedPoolingDescriptor;
using ScopedTensorDescriptor = phi::backends::gpu::ScopedTensorDescriptor;
template <typename T>
using ScalingParamType =
typename paddle::platform::CudnnDataType<T>::ScalingParamType;
typename phi::backends::gpu::CudnnDataType<T>::ScalingParamType;
inline GPUDNNDataLayout GetLayoutFromStr(std::string data_format) {
if (data_format == "NHWC") {
......
......@@ -14,8 +14,8 @@ limitations under the License. */
#include "paddle/phi/kernels/pool_grad_kernel.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/pooling.h"
......
......@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/phi/kernels/pool_kernel.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/pooling.h"
......
......@@ -24,8 +24,8 @@ limitations under the License. */
#include "paddle/phi/kernels/primitive/kernel_primitives.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_device_function.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#define MATRIX_SOFTMAX_ALIGN_BYTES 16
#define MATRIX_SOFTMAX_THREAHOLD 100000
......@@ -55,8 +55,8 @@ limitations under the License. */
namespace phi {
using ScopedTensorDescriptor = paddle::platform::ScopedTensorDescriptor;
using GPUDNNDataLayout = paddle::platform::DataLayout;
using ScopedTensorDescriptor = phi::backends::gpu::ScopedTensorDescriptor;
using GPUDNNDataLayout = phi::backends::gpu::DataLayout;
// Vectorization trait 4 * sizeof(T)
template <typename T>
......@@ -1065,10 +1065,10 @@ void SoftmaxForwardCudnnKernel(const GPUContext& dev_ctx,
auto algo = log_mode ? MIOPEN_SOFTMAX_LOG : MIOPEN_SOFTMAX_ACCURATE;
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxForward_V2(
handle,
paddle::platform::CudnnDataType<T>::kOne(),
phi::backends::gpu::CudnnDataType<T>::kOne(),
desc,
x_data,
paddle::platform::CudnnDataType<T>::kZero(),
phi::backends::gpu::CudnnDataType<T>::kZero(),
desc,
out_data,
algo,
......@@ -1082,10 +1082,10 @@ void SoftmaxForwardCudnnKernel(const GPUContext& dev_ctx,
handle,
algo,
mode,
paddle::platform::CudnnDataType<T>::kOne(),
phi::backends::gpu::CudnnDataType<T>::kOne(),
desc,
x_data,
paddle::platform::CudnnDataType<T>::kZero(),
phi::backends::gpu::CudnnDataType<T>::kZero(),
desc,
out_data));
#endif
......@@ -1137,12 +1137,12 @@ void SoftmaxBackwardCudnnKernel(const GPUContext& dev_ctx,
auto algo = log_mode ? MIOPEN_SOFTMAX_LOG : MIOPEN_SOFTMAX_ACCURATE;
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxBackward_V2(
handle,
paddle::platform::CudnnDataType<T>::kOne(),
phi::backends::gpu::CudnnDataType<T>::kOne(),
desc,
out_data,
desc,
dout_data,
paddle::platform::CudnnDataType<T>::kZero(),
phi::backends::gpu::CudnnDataType<T>::kZero(),
desc,
dx_data,
algo,
......@@ -1156,12 +1156,12 @@ void SoftmaxBackwardCudnnKernel(const GPUContext& dev_ctx,
handle,
algo,
mode,
paddle::platform::CudnnDataType<T>::kOne(),
phi::backends::gpu::CudnnDataType<T>::kOne(),
desc,
out_data,
desc,
dout_data,
paddle::platform::CudnnDataType<T>::kZero(),
phi::backends::gpu::CudnnDataType<T>::kZero(),
desc,
dx_data));
#endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册