未验证 提交 1623f1b4 编写于 作者: H HongyuJia 提交者: GitHub

[Phi Support CuDNN] Support ALL CuDNN (#47865)

* support default use_gpudnn=True

* fully support cudnn in phi

* add header file

* add white_list, verify accuracy

* phi support all cudnn

* opt affine_grad

* try different arches of pretrained_model

* try different arches of pretrained_model

* add debug string

* debug eager_method

* add debug string, pass all local ctest

* polish all debug code

* delete use_cudnn relevant code autogen

* fix depthwise_conv2d

* Share all other members of Tensor except use_cudnn

* polish codes according to review opinion

* polish codes according to review opinion, fix bug

* polish codes according to review opinion, opt performance

* polish codes according to review opinion, fix pooling.py
上级 6bdf1261
......@@ -54,6 +54,7 @@ typedef SSIZE_T ssize_t;
#include "paddle/fluid/memory/allocation/mmap_allocator.h"
#include "paddle/fluid/pybind/tensor_py.h"
#include "paddle/phi/core/ddim.h"
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace paddle {
......@@ -1444,6 +1445,43 @@ static PyObject* tensor__copy_gradient_from(TensorObject* self,
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static PyObject* tensor__use_cudnn(TensorObject* self,
PyObject* args,
PyObject* kwargs) {
EAGER_TRY
PADDLE_ENFORCE(self->tensor.defined() && self->tensor.is_dense_tensor(),
paddle::platform::errors::Fatal(
"function _use_cudnn is only effective for DenseTensor"));
bool use_cudnn = pybind::CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 0), 0);
// Set the same use_cudnn attribute, return directly
phi::DenseTensor* dense_tensor =
static_cast<phi::DenseTensor*>(self->tensor.impl().get());
phi::DenseTensorMeta* dense_tensor_meta =
phi::DenseTensorUtils::GetMutableMeta(dense_tensor);
if (use_cudnn == dense_tensor_meta->use_cudnn) {
return ToPyObject(self->tensor);
}
// Share all other members of Tensor except use_cudnn
phi::DenseTensorMeta target_dense_meta = *dense_tensor_meta;
target_dense_meta.use_cudnn = use_cudnn;
phi::DenseTensor target_dense_tensor;
target_dense_tensor.ShareDataWith(*dense_tensor);
target_dense_tensor.set_meta(target_dense_meta);
// Construct returned tensor
paddle::experimental::Tensor target_tensor(
std::make_shared<phi::DenseTensor>(target_dense_tensor),
self->tensor.name());
target_tensor.set_autograd_meta(self->tensor.mutable_autograd_meta());
VLOG(4) << "Tensor: " << target_tensor.name()
<< " set use_cudnn = " << use_cudnn;
return ToPyObject(target_tensor);
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static PyObject* tensor_method_set_vocab(TensorObject* self,
PyObject* args,
PyObject* kwargs) {
......@@ -2010,6 +2048,10 @@ PyMethodDef variable_methods[] = {
(PyCFunction)(void (*)(void))tensor__copy_gradient_from,
METH_VARARGS | METH_KEYWORDS,
NULL},
{"_tensor_use_cudnn",
(PyCFunction)(void (*)(void))tensor__use_cudnn,
METH_VARARGS | METH_KEYWORDS,
NULL},
/** the methods to adapt old dygraph, will be removed in the future **/
{"set_string_list",
(PyCFunction)(void (*)(void))tensor_method_set_string_list,
......
......@@ -54,14 +54,11 @@ bool HasAllocation(const phi::TensorBase& t) {
BackendSet GetTensorBackendSet(const phi::TensorBase& t) {
if (HasAllocation(t) && t.place().GetType() != AllocationType::UNDEFINED) {
BackendSet backend_set(phi::TransToPhiBackend(t.place()));
switch (t.layout()) {
case DataLayout::ONEDNN:
backend_set = backend_set | BackendSet(Backend::ONEDNN);
break;
default:
// do nothing
break;
phi::Backend backend_key = phi::TransToPhiBackend(t.place());
BackendSet backend_set(backend_key);
if (backend_key == Backend::GPU && phi::DenseTensor::classof(&t) &&
static_cast<const phi::DenseTensor&>(t).meta().use_cudnn) {
backend_set = backend_set | BackendSet(Backend::GPUDNN);
}
return backend_set;
}
......@@ -126,7 +123,13 @@ Backend ParseBackend(const Place& place) {
return phi::TransToPhiBackend(place);
}
Backend ParseBackend(const Tensor& tensor) {
return phi::TransToPhiBackend(tensor.place());
Backend backend_key = phi::TransToPhiBackend(tensor.place());
if (backend_key == Backend::GPU &&
phi::DenseTensor::classof(tensor.impl().get()) &&
static_cast<phi::DenseTensor*>(tensor.impl().get())->meta().use_cudnn) {
return Backend::GPUDNN;
}
return backend_key;
}
Backend ParseBackendWithInputOrder(const Place& place, const Tensor& tensor) {
......
......@@ -90,6 +90,7 @@ struct ArgsIterator {
struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
KernelKeySet key_set;
bool disable_cudnn = false;
// this dtype_set is used for cache multi-inputs dtype and used for
// data_promote
DataTypeSet dtype_set{DataType::UNDEFINED};
......@@ -97,11 +98,19 @@ struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
// TODO(chenweihang): deal with multiple diff input Tensors
// TODO(chenweihang): add global device guard method to set backend
inline void AssignKernelKeySet(const phi::TensorBase& tensor) {
key_set.backend_set =
key_set.backend_set | detail::GetTensorBackendSet(tensor);
// assign Backend
BackendSet tensor_backend_set = detail::GetTensorBackendSet(tensor);
key_set.backend_set = key_set.backend_set | tensor_backend_set;
// tensor's attribute use_cudnn=False, explicitly disable cudnn kernel
if (tensor_backend_set == BackendSet(Backend::GPU) || disable_cudnn) {
disable_cudnn = true;
key_set.backend_set = key_set.backend_set - BackendSet(Backend::GPUDNN);
}
// assign DataLayout
phi::DataLayout tensor_layout = tensor.layout();
key_set.layout =
tensor_layout > key_set.layout ? tensor_layout : key_set.layout;
// assign DataType
key_set.dtype = tensor.dtype();
dtype_set = dtype_set | DataTypeSet(key_set.dtype);
auto promote_result = PromoteTypes(dtype_set);
......
......@@ -307,7 +307,6 @@ class BaseAPI:
'backend': None,
'layout': None,
'data_type': None,
'use_gpudnn': 'false',
'dispatch': {},
}
if 'backend' in kernel_config and len(kernel_config['backend']) > 0:
......@@ -318,10 +317,6 @@ class BaseAPI:
kernel['data_type'] = kernel_config['data_type']
if 'param' in kernel_config:
kernel['param'] = kernel_config['param']
if 'use_gpudnn' in kernel_config:
kernel['use_gpudnn'] = kernel_config['use_gpudnn']
if isinstance(kernel['use_gpudnn'], bool):
kernel['use_gpudnn'] = str(kernel['use_gpudnn']).lower()
kernel_funcs = re.compile(r'([a-zA-Z0-9_]+)\s*({[^}]+})?').findall(
kernel_config['func']
)
......@@ -1124,15 +1119,10 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d
for kernel_out in outputs_args:
fallback_kernel_output_trans += f"""
{code_indent} TransDataBackend({kernel_out}, kernel_backend, {kernel_out});"""
cudnn_args = (
''
if self.kernel['use_gpudnn'] == 'false'
else ', ' + self.kernel['use_gpudnn']
)
return f"""
{code_indent} VLOG(6) << "{self.api} API kernel key: [" << kernel_backend << ", " << kernel_layout << ", "<< kernel_data_type << "]";
{code_indent} auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
{code_indent} "{kernel_name}", {{kernel_backend, kernel_layout, kernel_data_type}}{cudnn_args});
{code_indent} "{kernel_name}", {{kernel_backend, kernel_layout, kernel_data_type}});
{code_indent} const auto& kernel = kernel_result.kernel;
{code_indent} VLOG(6) << "{kernel_name} kernel: " << kernel;
{code_indent} auto* dev_ctx = GetDeviceContextByBackend(kernel_result.has_fallback_cpu ? Backend::CPU : kernel_backend);
......
......@@ -67,8 +67,8 @@
func : addmm_grad
- backward_op : affine_grid_grad
forward : affine_grid (Tensor input, IntArray outputShape, bool align_corners=true, bool use_cudnn=true) -> Tensor(output)
args : (Tensor output_grad, IntArray outputShape, bool use_cudnn=true, bool align_corners=true)
forward : affine_grid (Tensor input, IntArray outputShape, bool align_corners=true) -> Tensor(output)
args : (Tensor input, Tensor output_grad, IntArray outputShape, bool align_corners=true)
output : Tensor(input_grad)
infer_meta :
func : AffineGridGradInferMeta
......@@ -76,7 +76,7 @@
kernel :
func : affine_grid_grad
param : [output_grad, outputShape, align_corners]
use_gpudnn: use_cudnn
no_need_buffer : input
- backward_op : amax_grad
forward: amax (Tensor x, int64_t[] axis={}, bool keepdim=false) -> Tensor(out)
......@@ -262,7 +262,6 @@
param : [input, filter]
kernel :
func : conv2d_grad
use_gpudnn : true
backward : conv2d_grad_grad
- backward_op : conv2d_grad_grad
......@@ -274,7 +273,6 @@
param: [input, filter, grad_out]
kernel :
func : conv2d_grad_grad
use_gpudnn : true
optional : grad_input_grad, grad_filter_grad
- backward_op : conv2d_transpose_double_grad
......@@ -285,7 +283,6 @@
func : Conv2dTransposeDoubleGradInferMeta
kernel :
func : conv2d_transpose_grad_grad
use_gpudnn : true
- backward_op : conv2d_transpose_grad
forward : conv2d_transpose(Tensor x, Tensor filter, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(out)
......@@ -295,7 +292,6 @@
func : Conv2dTransposeGradInferMeta
kernel :
func : conv2d_transpose_grad
use_gpudnn : true
backward : conv2d_transpose_double_grad
- backward_op : conv3d_double_grad
......@@ -307,7 +303,6 @@
param: [input, filter, grad_out]
kernel :
func : conv3d_double_grad
use_gpudnn : true
optional : grad_input_grad, grad_filter_grad
- backward_op : conv3d_grad
......@@ -319,7 +314,6 @@
param : [input, filter]
kernel :
func : conv3d_grad
use_gpudnn : true
backward : conv3d_double_grad
- backward_op : conv3d_transpose_grad
......@@ -330,7 +324,6 @@
func : ConvTransposeGradInferMeta
kernel :
func : conv3d_transpose_grad
use_gpudnn : true
- backward_op : crop_grad
forward : crop_tensor (Tensor x, IntArray shape, IntArray offsets) -> Tensor(out)
......@@ -401,7 +394,6 @@
kernel :
func : depthwise_conv2d_grad
param : [input, filter, out_grad, strides, paddings, padding_algorithm, groups, dilations, data_format]
use_gpudnn : True
backward : depthwise_conv2d_double_grad
- backward_op : depthwise_conv2d_transpose_grad
......@@ -1210,8 +1202,8 @@
func : pixel_shuffle_grad
- backward_op : pool2d_double_grad
forward : pool2d_grad(Tensor x, Tensor out, Tensor grad_out, IntArray kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm, bool use_gpudnn) -> Tensor(grad_x)
args : (Tensor grad_x_grad, IntArray kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm, bool use_gpudnn)
forward : pool2d_grad(Tensor x, Tensor out, Tensor grad_out, IntArray kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm) -> Tensor(grad_x)
args : (Tensor x, Tensor grad_x_grad, IntArray kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm)
output : Tensor(grad_out_grad)
infer_meta :
func : Pool2DInferMeta
......@@ -1219,11 +1211,11 @@
kernel :
func : pool2d_double_grad
param : [grad_x_grad, kernel_size, strides, paddings, ceil_mode, exclusive, data_format, pooling_type, global_pooling, adaptive, padding_algorithm]
use_gpudnn : use_gpudnn
no_need_buffer : x
- backward_op : pool2d_grad
forward : pool2d(Tensor x, IntArray kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm, bool use_gpudnn) -> Tensor(out)
args : (Tensor x, Tensor out, Tensor out_grad, IntArray kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm, bool use_gpudnn)
forward : pool2d(Tensor x, IntArray kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm) -> Tensor(out)
args : (Tensor x, Tensor out, Tensor out_grad, IntArray kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
......@@ -1231,12 +1223,11 @@
kernel :
func : pool2d_grad
param : [x, out, out_grad, kernel_size, strides, paddings, ceil_mode, exclusive, data_format, pooling_type, global_pooling, adaptive, padding_algorithm]
use_gpudnn : use_gpudnn
backward : pool2d_double_grad
- backward_op : pool3d_grad
forward : pool3d(Tensor x, int[] kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm, bool use_gpudnn) -> Tensor(out)
args : (Tensor x, Tensor out, Tensor out_grad, int[] kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm, bool use_gpudnn)
forward : pool3d(Tensor x, int[] kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm) -> Tensor(out)
args : (Tensor x, Tensor out, Tensor out_grad, int[] kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
......@@ -1244,7 +1235,6 @@
kernel :
func : pool3d_grad
param : [x, out, out_grad, kernel_size, strides, paddings, ceil_mode, exclusive, data_format, pooling_type, global_pooling, adaptive, padding_algorithm]
use_gpudnn : use_gpudnn
- backward_op : pow_double_grad
forward : pow_grad(Tensor x, Tensor grad_out, Scalar y) -> Tensor(grad_x)
......@@ -1601,7 +1591,6 @@
param : [out]
kernel :
func : softmax_grad
use_gpudnn : true
- backward_op : spectral_norm_grad
forward : spectral_norm (Tensor weight, Tensor u, Tensor v, int dim, int power_iters, float eps) -> Tensor(out)
......
......@@ -97,7 +97,7 @@
backward : addmm_grad
- op : affine_grid
args : (Tensor input, IntArray outputShape, bool align_corners=true, bool use_cudnn=true)
args : (Tensor input, IntArray outputShape, bool align_corners=true)
output : Tensor
infer_meta :
func : AffineGridInferMeta
......@@ -106,7 +106,6 @@
func : affine_grid
param : [input, outputShape, align_corners]
data_type : input
use_gpudnn: use_cudnn
backward : affine_grid_grad
- op : all
......@@ -431,7 +430,6 @@
func : ConvInferMeta
kernel :
func : conv2d
use_gpudnn : true
backward : conv2d_grad
- op : conv2d_transpose
......@@ -441,7 +439,6 @@
func : Conv2dTransposeInferMeta
kernel :
func : conv2d_transpose
use_gpudnn : true
backward : conv2d_transpose_grad
- op : conv3d
......@@ -451,7 +448,6 @@
func : Conv3DInferMeta
kernel :
func : conv3d
use_gpudnn : true
backward : conv3d_grad
- op : conv3d_transpose
......@@ -461,7 +457,6 @@
func : ConvTransposeInferMeta
kernel :
func : conv3d_transpose
use_gpudnn : true
backward : conv3d_transpose_grad
- op : copy_to
......@@ -540,7 +535,6 @@
kernel :
func : depthwise_conv2d
param : [x, filter, strides, paddings, padding_algorithm, groups, dilations, data_format]
use_gpudnn : true
backward : depthwise_conv2d_grad
- op : depthwise_conv2d_transpose
......@@ -1636,7 +1630,7 @@
backward : pixel_shuffle_grad
- op : pool2d
args : (Tensor x, IntArray kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm, bool use_gpudnn)
args : (Tensor x, IntArray kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm)
output : Tensor(out)
infer_meta :
func : Pool2DInferMeta
......@@ -1644,11 +1638,10 @@
kernel :
func : pool2d
param : [x, kernel_size, strides, paddings, ceil_mode, exclusive, data_format, pooling_type, global_pooling, adaptive, padding_algorithm]
use_gpudnn : use_gpudnn
backward : pool2d_grad
- op : pool3d
args : (Tensor x, int[] kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm, bool use_gpudnn)
args : (Tensor x, int[] kernel_size, int[] strides, int[] paddings, bool ceil_mode, bool exclusive, str data_format, str pooling_type, bool global_pooling, bool adaptive, str padding_algorithm)
output : Tensor(out)
infer_meta :
func : PoolInferMeta
......@@ -1656,7 +1649,6 @@
kernel :
func : pool3d
param : [x, kernel_size, strides, paddings, ceil_mode, exclusive, data_format, pooling_type, global_pooling, adaptive, padding_algorithm]
use_gpudnn : use_gpudnn
backward : pool3d_grad
- op : pow
......@@ -2048,7 +2040,6 @@
func : SoftmaxInferMeta
kernel :
func : softmax
use_gpudnn : true
inplace : (x -> out)
backward : softmax_grad
......
......@@ -200,9 +200,10 @@ void DenseTensor::set_meta(const DenseTensorMeta& meta) {
meta_.layout = meta.layout;
meta_.lod = meta.lod;
meta_.offset = meta.offset;
meta_.use_cudnn = meta.use_cudnn;
}
/* @jim19930609: This interface will be further modified util we finalized the
/* @jim19930609: This interface will be further modified until we finalized the
design for Allocator - Allocation
For now, we have to temporarily accommodate two independent use cases:
1. Designed behaviour: DenseTensor constructed with its underlying storage_
......
......@@ -357,6 +357,7 @@ DenseTensor& DenseTensor::ShareDataWith(const DenseTensor& src) {
meta_.dtype = src.meta_.dtype;
meta_.layout = src.meta_.layout;
meta_.offset = src.meta_.offset;
meta_.use_cudnn = src.meta_.use_cudnn;
storage_properties_ =
std::move(CopyStorageProperties(src.storage_properties_));
#ifdef PADDLE_WITH_MKLDNN
......
......@@ -106,17 +106,16 @@ bool KernelFactory::HasKernel(const std::string& kernel_name,
}
KernelResult KernelFactory::SelectKernelOrThrowError(
const std::string& kernel_name,
const KernelKey& kernel_key,
bool use_gpudnn) const {
const std::string& kernel_name, const KernelKey& const_kernel_key) const {
auto iter = kernels_.find(kernel_name);
PADDLE_ENFORCE_NE(
iter,
kernels_.end(),
phi::errors::NotFound("The kernel `%s` is not registered.", kernel_name));
KernelKey kernel_key = const_kernel_key;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (use_gpudnn && kernel_key.backend() == Backend::GPU) {
if (kernel_key.backend() == Backend::GPUDNN) {
auto kernel_iter = iter->second.find(
{Backend::GPUDNN, kernel_key.layout(), kernel_key.dtype()});
if (kernel_iter == iter->second.end() &&
......@@ -127,8 +126,8 @@ KernelResult KernelFactory::SelectKernelOrThrowError(
if (kernel_iter != iter->second.end()) {
return {kernel_iter->second, false};
}
VLOG(3) << "The cudnn kernel for [" << kernel_name
<< "] is not registered.";
kernel_key =
KernelKey(Backend::GPU, kernel_key.layout(), kernel_key.dtype());
}
#endif
auto kernel_iter = iter->second.find(kernel_key);
......
......@@ -274,8 +274,7 @@ class KernelFactory {
bool HasCompatiblePhiKernel(const std::string& op_type) const;
KernelResult SelectKernelOrThrowError(const std::string& kernel_name,
const KernelKey& kernel_key,
bool use_gpudnn = false) const;
const KernelKey& kernel_key) const;
bool HasKernel(const std::string& kernel_name,
const KernelKey& kernel_key) const;
......
......@@ -16,21 +16,29 @@ limitations under the License. */
namespace phi {
DenseTensorMeta::DenseTensorMeta() { use_cudnn = true; }
DenseTensorMeta::DenseTensorMeta(DataType dtype, const DDim& dims)
: dims(dims), dtype(dtype) {}
: dims(dims), dtype(dtype) {
use_cudnn = true;
}
DenseTensorMeta::DenseTensorMeta(DataType dtype,
const DDim& dims,
DataLayout layout,
size_t offset)
: dims(dims), dtype(dtype), layout(layout), offset(offset) {}
: dims(dims), dtype(dtype), layout(layout), offset(offset) {
use_cudnn = true;
}
DenseTensorMeta::DenseTensorMeta(DataType dtype,
const DDim& dims,
DataLayout layout,
const LoD& lod,
size_t offset)
: dims(dims), dtype(dtype), layout(layout), lod(lod), offset(offset) {}
: dims(dims), dtype(dtype), layout(layout), lod(lod), offset(offset) {
use_cudnn = true;
}
bool DenseTensorMeta::valid() const noexcept {
bool valid{true};
......
......@@ -48,7 +48,7 @@ using LoD = std::vector<std::vector<size_t>>;
struct DenseTensorMeta {
using DataType = paddle::experimental::DataType;
DenseTensorMeta() = default;
DenseTensorMeta();
DenseTensorMeta(DataType dtype, const DDim& dims);
DenseTensorMeta(DataType dtype,
const DDim& dims,
......@@ -65,6 +65,9 @@ struct DenseTensorMeta {
bool valid() const noexcept;
bool is_scalar{false};
/// \brief Determine whether using CuDNN speed-up library in the new dygraph.
/// It maybe also support MKLDNN library in the near future.
bool use_cudnn{true};
DDim dims;
DataType dtype{DataType::UNDEFINED};
DataLayout layout{DataLayout::NCHW};
......@@ -73,9 +76,10 @@ struct DenseTensorMeta {
};
inline bool operator==(const DenseTensorMeta& lhs, const DenseTensorMeta& rhs) {
return (lhs.is_scalar == rhs.is_scalar) && (lhs.dims == rhs.dims) &&
(lhs.dtype == rhs.dtype) && (lhs.layout == rhs.layout) &&
(lhs.lod == rhs.lod) && (lhs.offset == rhs.offset);
return (lhs.is_scalar == rhs.is_scalar) && lhs.use_cudnn == rhs.use_cudnn &&
(lhs.dims == rhs.dims) && (lhs.dtype == rhs.dtype) &&
(lhs.layout == rhs.layout) && (lhs.lod == rhs.lod) &&
(lhs.offset == rhs.offset);
}
struct StringTensorMeta {
......
......@@ -672,6 +672,7 @@ class Pool2D(layers.Layer):
def forward(self, input):
if _non_static_mode():
if not self._use_mkldnn and in_dygraph_mode():
input = input._use_cudnn(self._use_cudnn)
return _C_ops.pool2d(
input,
self._pool_size,
......@@ -684,7 +685,6 @@ class Pool2D(layers.Layer):
self._global_pooling,
False,
"EXPLICIT",
self._use_cudnn,
)
attrs = (
......
......@@ -880,6 +880,10 @@ def monkey_patch_varbase():
def _clear_data(self):
self.get_tensor()._clear()
@framework.dygraph_only
def _use_cudnn(self, use_cudnn=True):
return self._tensor_use_cudnn(use_cudnn)
@framework.dygraph_only
def _uva(self, device_id=0):
'''
......@@ -1064,6 +1068,7 @@ def monkey_patch_varbase():
setattr(core.eager.Tensor, "_uva", _uva)
setattr(core.eager.Tensor, "_clear_data", _clear_data)
setattr(core.eager.Tensor, "__hash__", __hash__)
setattr(core.eager.Tensor, "_use_cudnn", _use_cudnn)
else:
setattr(core.VarBase, "__name__", "Tensor")
setattr(core.VarBase, "grad", grad)
......
......@@ -2196,6 +2196,7 @@ def pool2d(
pool_padding = update_padding(pool_padding, data_format)
if in_dygraph_mode():
input = input._use_cudnn(use_cudnn)
return _C_ops.pool2d(
input,
pool_size,
......@@ -2208,7 +2209,6 @@ def pool2d(
global_pooling,
False,
padding_algorithm,
use_cudnn,
)
op_type = 'pool2d'
helper = LayerHelper(op_type, **locals())
......
......@@ -897,6 +897,21 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase):
x._clear()
self.assertFalse(x._is_initialized())
def test_use_cudnn(self):
np_x = np.random.random((3, 8, 8))
with _test_eager_guard():
self.assertTrue(in_dygraph_mode())
x = paddle.to_tensor(np_x, dtype="float64")
y = x._use_cudnn(False)
np.testing.assert_array_equal(x.numpy(), y.numpy())
y = x._use_cudnn(True)
np.testing.assert_array_equal(x.numpy(), y.numpy())
self.assertFalse(in_dygraph_mode())
x = paddle.to_tensor(np_x, dtype="float64")
with self.assertRaises(AttributeError):
x = x._use_cudnn(False)
class EagerParamBaseUsageTestCase(unittest.TestCase):
def test_print(self):
......
......@@ -258,7 +258,6 @@ def avg_pool1d(
False,
False,
padding_algorithm,
True,
)
return squeeze(output, [2])
......@@ -407,7 +406,6 @@ def avg_pool2d(
False,
False,
padding_algorithm,
True,
)
else:
output = _legacy_C_ops.pool2d(
......@@ -561,7 +559,6 @@ def avg_pool3d(
False,
False,
padding_algorithm,
True,
)
elif _in_legacy_dygraph():
pool_out = _legacy_C_ops.pool3d(
......@@ -718,7 +715,6 @@ def max_pool1d(
False,
False,
padding_algorithm,
True,
)
return squeeze(pool_out, [2])
......@@ -1363,7 +1359,6 @@ def max_pool2d(
False,
False,
padding_algorithm,
True,
)
if _in_legacy_dygraph():
......@@ -1554,7 +1549,6 @@ def max_pool3d(
False,
False,
padding_algorithm,
True,
)
if _in_legacy_dygraph():
......@@ -1691,6 +1685,7 @@ def adaptive_avg_pool1d(x, output_size, name=None):
x = unsqueeze(x, [2])
if in_dygraph_mode():
x = x._use_cudnn(False)
pool_out = _C_ops.pool2d(
x,
pool_size,
......@@ -1703,7 +1698,6 @@ def adaptive_avg_pool1d(x, output_size, name=None):
False,
True,
"EXPLICIT",
False,
)
return squeeze(pool_out, [2])
if _in_legacy_dygraph():
......@@ -1828,6 +1822,7 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
output_size = utils._convert_to_tensor_list(output_size)
if in_dygraph_mode():
x = x._use_cudnn(False)
return _C_ops.pool2d(
x,
output_size,
......@@ -1840,7 +1835,6 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
False,
True,
"EXPLICIT",
False,
)
if _in_legacy_dygraph():
......@@ -1973,6 +1967,7 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
output_size[2] = in_w
if in_dygraph_mode():
x = x._use_cudnn(False)
return _C_ops.pool3d(
x,
output_size,
......@@ -1985,7 +1980,6 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
False,
True,
"EXPLICIT",
False,
)
elif _in_legacy_dygraph():
return _legacy_C_ops.pool3d(
......
......@@ -92,7 +92,8 @@ def affine_grid(theta, out_shape, align_corners=True, name=None):
if isinstance(out_shape, Variable)
else out_shape
)
return _C_ops.affine_grid(theta, _out_shape, align_corners, use_cudnn)
theta = theta._use_cudnn(use_cudnn)
return _C_ops.affine_grid(theta, _out_shape, align_corners)
elif in_dynamic_mode():
_out_shape = (
out_shape.numpy().tolist()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册