diff --git a/paddle/fluid/operators/conv_cudnn_op.cu b/paddle/fluid/operators/conv_cudnn_op.cu index ad11906f3f1439287ef986897f842181a307dd68..cc5ae4c8375516a12e647697cc0d2cc03379eaba 100644 --- a/paddle/fluid/operators/conv_cudnn_op.cu +++ b/paddle/fluid/operators/conv_cudnn_op.cu @@ -272,6 +272,16 @@ class CUDNNConvOpKernel : public framework::OpKernel { algo = search::Find(args, exhaustive_search, false, 0, ctx); workspace_size = search::GetWorkspaceSize(args, algo); +#if CUDNN_VERSION_MIN(7, 0, 1) + // when groups > 1, SearchAlgorithm find algo is CUDNN_CONVOLUTION_\ + // FWD_ALGO_WINOGRAD_NONFUSED, but this kind of algorithm is unstable + // in forward computation, so change the algorithm to CUDNN_CONVOLUTION_\ + // FWD_ALGO_IMPLICIT_GEMM manually. + if (ctx.Attr("groups") > 1) { + algo = static_cast(0); + } +#endif + // ------------------- cudnn conv forward --------------------- ScalingParamType alpha = 1.0f, beta = 0.0f; for (int i = 0; i < groups; i++) { @@ -881,6 +891,7 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel { #if CUDNN_VERSION_MIN(7, 0, 1) iwo_group = 1; c_group = groups; + groups = 1; #endif auto dtype = platform::CudnnDataType::type; diff --git a/paddle/fluid/operators/conv_transpose_cudnn_op.cu b/paddle/fluid/operators/conv_transpose_cudnn_op.cu index b827f200c0ef80625604d9bbd15a432307cff984..b39aaa29cf3188ab1b2d017f89d2dd0ccdc23758 100644 --- a/paddle/fluid/operators/conv_transpose_cudnn_op.cu +++ b/paddle/fluid/operators/conv_transpose_cudnn_op.cu @@ -245,7 +245,7 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel { CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, workspace_size_limit, &algo)); - if (algo == 0 && FLAGS_cudnn_deterministic) { + if (FLAGS_cudnn_deterministic) { algo = static_cast(1); } @@ -476,6 +476,10 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel { handle, cudnn_output_desc, cudnn_filter_desc, cudnn_conv_desc, cudnn_input_desc, CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, workspace_size_limit, &data_algo)); + + if (FLAGS_cudnn_deterministic) { + data_algo = static_cast(1); + } PADDLE_ENFORCE_CUDA_SUCCESS( platform::dynload::cudnnGetConvolutionForwardWorkspaceSize( handle, cudnn_output_desc, cudnn_filter_desc, cudnn_conv_desc, @@ -492,6 +496,9 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel { CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, workspace_size_limit, &filter_algo)); + if (FLAGS_cudnn_deterministic) { + filter_algo = static_cast(1); + } // get workspace for backwards filter algorithm PADDLE_ENFORCE_CUDA_SUCCESS( platform::dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize(