diff --git a/paddle/fluid/operators/conv_cudnn_op.cu b/paddle/fluid/operators/conv_cudnn_op.cu index 39e9d37ddc6c75a2b1b82038e6f9c8f805c0ea3c..ab535e341f7575d4eef06af555b0aff4fa151f83 100644 --- a/paddle/fluid/operators/conv_cudnn_op.cu +++ b/paddle/fluid/operators/conv_cudnn_op.cu @@ -1363,7 +1363,14 @@ REGISTER_OP_KERNEL( conv2d_grad_grad, CUDNN, plat::CUDAPlace, paddle::operators::CUDNNConvDoubleGradOpKernel, paddle::operators::CUDNNConvDoubleGradOpKernel); - +// ROCM has limit thread in depthwise_conv.cu and willl result in accuracy issue +// Use depthwise_conv2d in MIOPEN to resolve this issue +REGISTER_OP_KERNEL(depthwise_conv2d, CUDNN, plat::CUDAPlace, + paddle::operators::CUDNNConvOpKernel, + paddle::operators::CUDNNConvOpKernel); +REGISTER_OP_KERNEL(depthwise_conv2d_grad, CUDNN, plat::CUDAPlace, + paddle::operators::CUDNNConvGradOpKernel, + paddle::operators::CUDNNConvGradOpKernel); REGISTER_OP_CUDA_KERNEL( depthwise_conv2d_grad_grad, paddle::operators::CUDNNConvDoubleGradOpKernel, diff --git a/paddle/fluid/operators/math/depthwise_conv.cu b/paddle/fluid/operators/math/depthwise_conv.cu index 5fd543b5c6c5ccabd6d2e741a8ff4a9d1da5902c..7c5f59fab0d280587e15b6e1353c9dd2bda9270a 100644 --- a/paddle/fluid/operators/math/depthwise_conv.cu +++ b/paddle/fluid/operators/math/depthwise_conv.cu @@ -919,11 +919,10 @@ class DepthwiseConvFunctor