diff --git a/paddle/fluid/operators/conv_cudnn_op.cu.cc b/paddle/fluid/operators/conv_cudnn_op.cu.cc index 9e5ccd928e9d6012c1da3baa17521dcac0c8ff2f..63088d05a54a4c8859ccbae139c680e1f1ac539b 100644 --- a/paddle/fluid/operators/conv_cudnn_op.cu.cc +++ b/paddle/fluid/operators/conv_cudnn_op.cu.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include "paddle/fluid/operators/conv_op.h" #include "paddle/fluid/platform/assert.h" #include "paddle/fluid/platform/cudnn_helper.h" +#include "paddle/fluid/platform/cudnn_workspace_helper.h" #include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/profiler.h" @@ -26,7 +27,8 @@ DEFINE_bool(cudnn_deterministic, false, "Whether allow using an autotuning algorithm for convolution " "operator. The autotuning algorithm may be non-deterministic. If " "true, the algorithm is deterministic."); -DEFINE_uint64(conv_workspace_size_limit, 4096, +DEFINE_uint64(conv_workspace_size_limit, + paddle::platform::kDefaultConvWorkspaceSizeLimitMB, "cuDNN convolution workspace limit in MB unit."); DEFINE_bool(cudnn_exhaustive_search, false, "Whether enable exhaustive search for cuDNN convolution or " @@ -127,10 +129,10 @@ class CUDNNConvOpKernel : public framework::OpKernel { int group_offset_filter = filter->numel() / groups; // ------------------- cudnn conv workspace --------------------- size_t workspace_size_in_bytes; // final workspace to allocate. - size_t workspace_size_limit = kCONV_CUDNN_WORKSPACE_LIMIT_BYTES; + size_t workspace_size_limit = 0; if (FLAGS_conv_workspace_size_limit > 0 || user_workspace_size > 0) { int64_t max_user_size = - std::max(static_cast(FLAGS_conv_workspace_size_limit), + std::min(static_cast(FLAGS_conv_workspace_size_limit), user_workspace_size); workspace_size_limit = max_user_size * 1024 * 1024; } @@ -348,10 +350,10 @@ class CUDNNConvGradOpKernel : public framework::OpKernel { cudnnConvolutionBwdDataAlgo_t data_algo; cudnnConvolutionBwdFilterAlgo_t filter_algo; size_t workspace_size_in_bytes = 0, tmp_size = 0; - size_t workspace_size_limit = kCONV_CUDNN_WORKSPACE_LIMIT_BYTES; + size_t workspace_size_limit = 0; if (FLAGS_conv_workspace_size_limit > 0 || user_workspace_size > 0) { int64_t max_user_size = - std::max(static_cast(FLAGS_conv_workspace_size_limit), + std::min(static_cast(FLAGS_conv_workspace_size_limit), user_workspace_size); workspace_size_limit = max_user_size * 1024 * 1024; } diff --git a/paddle/fluid/operators/conv_cudnn_op_cache.h b/paddle/fluid/operators/conv_cudnn_op_cache.h index de92b75a501dfc300bb8b52ebfa7903995847218..1158dc2d7aa50061c32be63ae2786d71bec9ebeb 100644 --- a/paddle/fluid/operators/conv_cudnn_op_cache.h +++ b/paddle/fluid/operators/conv_cudnn_op_cache.h @@ -31,9 +31,6 @@ static constexpr char kCUDNNFwdAlgoCache[] = "kCUDNNFwdAlgoCache"; static constexpr char kCUDNNBwdDataAlgoCache[] = "kCUDNNBwdDataAlgoCache"; static constexpr char kCUDNNBwdFilterAlgoCache[] = "kCUDNNBwdFilterAlgoCache"; -static constexpr size_t kCONV_CUDNN_WORKSPACE_LIMIT_BYTES = - static_cast(1024) * 1024 * 1024; - #if CUDNN_VERSION_MIN(6, 0, 5) static constexpr size_t kNUM_CUDNN_FWD_ALGS = CUDNN_CONVOLUTION_FWD_ALGO_COUNT; static constexpr size_t kNUM_CUDNN_BWD_FILTER_ALGS = diff --git a/paddle/fluid/operators/conv_fusion_op.cu.cc b/paddle/fluid/operators/conv_fusion_op.cu.cc index 64152829b4f000e545054e528edca33dfe96ec56..ad24e6682b2274c1b352e2778e7784ac62d57720 100644 --- a/paddle/fluid/operators/conv_fusion_op.cu.cc +++ b/paddle/fluid/operators/conv_fusion_op.cu.cc @@ -95,10 +95,10 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { // ------------------- cudnn conv workspace --------------------- size_t workspace_size_in_bytes; // final workspace to allocate. - size_t workspace_size_limit = kCONV_CUDNN_WORKSPACE_LIMIT_BYTES; + size_t workspace_size_limit = 0; if (FLAGS_conv_workspace_size_limit > 0 || user_workspace_size > 0) { int64_t max_user_size = - std::max(static_cast(FLAGS_conv_workspace_size_limit), + std::min(static_cast(FLAGS_conv_workspace_size_limit), user_workspace_size); workspace_size_limit = max_user_size * 1024 * 1024; } diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index e1281602bf0d1bf25a2c4dfa32f495ed724d24eb..1bacc54b61d7f7d1f6e62a317a97cd96cf15669e 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -25,6 +25,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif +#include "paddle/fluid/platform/cudnn_workspace_helper.h" namespace paddle { namespace operators { @@ -248,7 +249,7 @@ void Conv2DOpMaker::Make() { "allocated/freed each time the operator runs, larger " "workspace size can increase performance but also requires " "better hardware. This size should be chosen carefully.") - .SetDefault(4096); + .SetDefault(platform::kDefaultConvWorkspaceSizeLimitMB); AddAttr("exhaustive_search", "(bool, default false) cuDNN has many algorithm to calculation " "convolution, whether enable exhaustive search " @@ -367,7 +368,7 @@ void Conv3DOpMaker::Make() { "allocated/freed each time the operator runs, larger " "workspace size can increase performance but also requires " "better hardware. This size should be chosen carefully.") - .SetDefault(4096); + .SetDefault(platform::kDefaultConvWorkspaceSizeLimitMB); AddAttr("exhaustive_search", "(bool, default false) cuDNN has many algorithm to calculation " "convolution, whether enable exhaustive search " diff --git a/paddle/fluid/operators/conv_transpose_op.cc b/paddle/fluid/operators/conv_transpose_op.cc index baa39c0f9926efc233f9a228e055e2eb2116dbcc..01afdd2807809c625535d7c20488a5fc6d67932f 100644 --- a/paddle/fluid/operators/conv_transpose_op.cc +++ b/paddle/fluid/operators/conv_transpose_op.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include +#include "paddle/fluid/platform/cudnn_workspace_helper.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" @@ -183,7 +184,7 @@ void Conv2DTransposeOpMaker::Make() { "allocated/freed each time the operator runs, larger " "workspace size can increase performance but also requires " "better hardward. This size should be carefully setted.") - .SetDefault(4096); + .SetDefault(platform::kDefaultConvWorkspaceSizeLimitMB); AddComment(R"DOC( Convolution2D Transpose Operator. @@ -279,7 +280,7 @@ void Conv3DTransposeOpMaker::Make() { "allocated/freed each time the operator runs, larger " "workspace size can increase performance but also requires " "better hardward. This size should be carefully setted.") - .SetDefault(4096); + .SetDefault(platform::kDefaultConvWorkspaceSizeLimitMB); AddComment(R"DOC( Convolution3D Transpose Operator. diff --git a/paddle/fluid/operators/fused/fusion_conv_inception_op.cc b/paddle/fluid/operators/fused/fusion_conv_inception_op.cc index 4690bd766d0b8a4b7a249fb5ccad5f278d1830f5..569527c3c16cbe845a1674c846c700b674f7d37d 100644 --- a/paddle/fluid/operators/fused/fusion_conv_inception_op.cc +++ b/paddle/fluid/operators/fused/fusion_conv_inception_op.cc @@ -18,6 +18,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/platform/cudnn_helper.h" #endif +#include "paddle/fluid/platform/cudnn_workspace_helper.h" namespace paddle { namespace operators { @@ -95,7 +96,7 @@ class ConvInceptionFusionOpMaker : public framework::OpProtoAndCheckerMaker { "allocated/freed each time the operator runs, larger " "workspace size can increase performance but also requires " "better hardware. This size should be chosen carefully.") - .SetDefault(4096); + .SetDefault(platform::kDefaultConvWorkspaceSizeLimitMB); AddComment(R"DOC( )DOC"); } diff --git a/paddle/fluid/operators/fused/fusion_conv_inception_op.cu b/paddle/fluid/operators/fused/fusion_conv_inception_op.cu index 6e13887866485bd114ebf12f4bdfa8d60fca6d01..76ea6f1b59d6c2c4512f53846886fd81b77ecfbb 100644 --- a/paddle/fluid/operators/fused/fusion_conv_inception_op.cu +++ b/paddle/fluid/operators/fused/fusion_conv_inception_op.cu @@ -162,10 +162,10 @@ class CUDNNConvInceptionFusionOpKernel : public framework::OpKernel { auto handle = dev_ctx.cudnn_handle(); size_t workspace_size_in_bytes = 0; // final workspace to allocate. - size_t workspace_size_limit = kCONV_CUDNN_WORKSPACE_LIMIT_BYTES; + size_t workspace_size_limit = 0; if (FLAGS_conv_workspace_size_limit > 0 || user_workspace_size > 0) { int64_t max_user_size = - std::max(static_cast(FLAGS_conv_workspace_size_limit), + std::min(static_cast(FLAGS_conv_workspace_size_limit), user_workspace_size); workspace_size_limit = max_user_size * 1024 * 1024; } diff --git a/paddle/fluid/platform/cudnn_workspace_helper.h b/paddle/fluid/platform/cudnn_workspace_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..58f76e3128e4b4c5b8cd54a495413de0eabe790e --- /dev/null +++ b/paddle/fluid/platform/cudnn_workspace_helper.h @@ -0,0 +1,23 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +namespace paddle { +namespace platform { + +static constexpr int kDefaultConvWorkspaceSizeLimitMB = 4096; + +} // namespace platform +} // namespace paddle