未验证 提交 693c7629 编写于 作者: Q Qi Li 提交者: GitHub

[ROCM] fix depth conv2d in rocm, test=develop (#32170)

上级 fdf63b4e
...@@ -1363,7 +1363,14 @@ REGISTER_OP_KERNEL( ...@@ -1363,7 +1363,14 @@ REGISTER_OP_KERNEL(
conv2d_grad_grad, CUDNN, plat::CUDAPlace, conv2d_grad_grad, CUDNN, plat::CUDAPlace,
paddle::operators::CUDNNConvDoubleGradOpKernel<float>, paddle::operators::CUDNNConvDoubleGradOpKernel<float>,
paddle::operators::CUDNNConvDoubleGradOpKernel<plat::float16>); paddle::operators::CUDNNConvDoubleGradOpKernel<plat::float16>);
// ROCM has limit thread in depthwise_conv.cu and willl result in accuracy issue
// Use depthwise_conv2d in MIOPEN to resolve this issue
REGISTER_OP_KERNEL(depthwise_conv2d, CUDNN, plat::CUDAPlace,
paddle::operators::CUDNNConvOpKernel<float>,
paddle::operators::CUDNNConvOpKernel<plat::float16>);
REGISTER_OP_KERNEL(depthwise_conv2d_grad, CUDNN, plat::CUDAPlace,
paddle::operators::CUDNNConvGradOpKernel<float>,
paddle::operators::CUDNNConvGradOpKernel<plat::float16>);
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
depthwise_conv2d_grad_grad, depthwise_conv2d_grad_grad,
paddle::operators::CUDNNConvDoubleGradOpKernel<float>, paddle::operators::CUDNNConvDoubleGradOpKernel<float>,
......
...@@ -919,11 +919,10 @@ class DepthwiseConvFunctor<platform::CUDADeviceContext, T, ...@@ -919,11 +919,10 @@ class DepthwiseConvFunctor<platform::CUDADeviceContext, T,
batch_size * output_channels * output_height * output_width; batch_size * output_channels * output_height * output_width;
#ifdef __HIPCC__ #ifdef __HIPCC__
int block_size = 256; int block_size = 256;
int grid_size = std::min((nums_output + block_size - 1) / block_size, 256);
#else #else
int block_size = 512; int block_size = 512;
int grid_size = (nums_output + block_size - 1) / block_size;
#endif #endif
int grid_size = (nums_output + block_size - 1) / block_size;
#define check_case(c_filter_multiplier, c_stride, c_filter) \ #define check_case(c_filter_multiplier, c_stride, c_filter) \
if (c_filter_multiplier == 0 || \ if (c_filter_multiplier == 0 || \
......
...@@ -1524,6 +1524,10 @@ def conv2d(input, ...@@ -1524,6 +1524,10 @@ def conv2d(input,
not use_cudnn): not use_cudnn):
l_type = 'depthwise_conv2d' l_type = 'depthwise_conv2d'
if (num_channels == groups and num_filters % num_channels == 0 and
core.is_compiled_with_rocm()):
l_type = 'depthwise_conv2d'
helper = LayerHelper(l_type, **locals()) helper = LayerHelper(l_type, **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
......
...@@ -1248,6 +1248,17 @@ create_test_cudnn_channel_last_class(TestWithStride_AsyPadding) ...@@ -1248,6 +1248,17 @@ create_test_cudnn_channel_last_class(TestWithStride_AsyPadding)
create_test_cudnn_channel_last_class(TestWithGroup_AsyPadding) create_test_cudnn_channel_last_class(TestWithGroup_AsyPadding)
create_test_cudnn_channel_last_class(TestWithDilation_AsyPadding) create_test_cudnn_channel_last_class(TestWithDilation_AsyPadding)
# ------------ depthwise conv2d in MIOPEN ---------
if core.is_compiled_with_rocm():
create_test_cudnn_padding_SAME_class(TestDepthwiseConv_AsyPadding)
create_test_cudnn_padding_SAME_class(
TestDepthwiseConvWithDilation_AsyPadding)
create_test_padding_VALID_class(TestDepthwiseConv_AsyPadding)
create_test_padding_VALID_class(TestDepthwiseConvWithDilation_AsyPadding)
create_test_cudnn_channel_last_class(TestDepthwiseConv_AsyPadding)
create_test_cudnn_channel_last_class(
TestDepthwiseConvWithDilation2_AsyPadding)
create_test_cudnn_channel_last_fp16_class( create_test_cudnn_channel_last_fp16_class(
TestConv2DOp_AsyPadding, grad_check=False) TestConv2DOp_AsyPadding, grad_check=False)
create_test_cudnn_channel_last_fp16_class( create_test_cudnn_channel_last_fp16_class(
......
...@@ -25,7 +25,7 @@ __all__ = [ ...@@ -25,7 +25,7 @@ __all__ = [
import numpy as np import numpy as np
from ...device import get_cudnn_version from ...device import get_cudnn_version
from ...fluid.framework import Variable, in_dygraph_mode from ...fluid.framework import Variable, in_dygraph_mode
from ...fluid import core, dygraph_utils from ...fluid import core, dygraph_utils, get_flags
from ...fluid.layers import nn, utils from ...fluid.layers import nn, utils
from ...fluid.data_feeder import check_variable_and_dtype from ...fluid.data_feeder import check_variable_and_dtype
from ...fluid.param_attr import ParamAttr from ...fluid.param_attr import ParamAttr
...@@ -551,6 +551,13 @@ def conv2d(x, ...@@ -551,6 +551,13 @@ def conv2d(x,
if (num_channels == groups and num_channels != 1 and if (num_channels == groups and num_channels != 1 and
num_filters % num_channels == 0): num_filters % num_channels == 0):
l_type = 'depthwise_conv2d' l_type = 'depthwise_conv2d'
if core.is_compiled_with_rocm():
use_cudnn = True
else:
use_cudnn = False
if (core.is_compiled_with_cuda() and get_flags("FLAGS_conv2d_disable_cudnn")
["FLAGS_conv2d_disable_cudnn"]):
use_cudnn = False use_cudnn = False
return _conv_nd(x, weight, bias, stride, padding, padding_algorithm, return _conv_nd(x, weight, bias, stride, padding, padding_algorithm,
......
...@@ -153,6 +153,13 @@ class _ConvNd(layers.Layer): ...@@ -153,6 +153,13 @@ class _ConvNd(layers.Layer):
in_channels != 1 and in_channels != 1 and
out_channels % in_channels == 0): out_channels % in_channels == 0):
self._op_type = 'depthwise_conv2d' self._op_type = 'depthwise_conv2d'
if core.is_compiled_with_rocm():
self._use_cudnn = True
else:
self._use_cudnn = False
if (core.is_compiled_with_cuda() and get_flags(
"FLAGS_conv2d_disable_cudnn")["FLAGS_conv2d_disable_cudnn"]):
self._use_cudnn = False self._use_cudnn = False
def extra_repr(self): def extra_repr(self):
...@@ -645,10 +652,6 @@ class Conv2D(_ConvNd): ...@@ -645,10 +652,6 @@ class Conv2D(_ConvNd):
bias_attr=bias_attr, bias_attr=bias_attr,
data_format=data_format) data_format=data_format)
if (core.is_compiled_with_cuda() and get_flags(
"FLAGS_conv2d_disable_cudnn")["FLAGS_conv2d_disable_cudnn"]):
self._use_cudnn = False
def forward(self, x): def forward(self, x):
if self._padding_mode != 'zeros': if self._padding_mode != 'zeros':
x = F.pad(x, x = F.pad(x,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册