From 0c335dcd2cb29b82a37cf607c91ecaf83298b6e0 Mon Sep 17 00:00:00 2001 From: Zeng Jinle <32832641+sneaxiy@users.noreply.github.com> Date: Tue, 23 Apr 2019 03:18:06 -0500 Subject: [PATCH] Make conv cudnn workspace size configurable (#17036) * make_conv_cudnn_ws_size_configurable, test=develop * change std::max to std::min test=develop --- paddle/fluid/operators/conv_cudnn_op.cu.cc | 12 ++++++---- paddle/fluid/operators/conv_cudnn_op_cache.h | 3 --- paddle/fluid/operators/conv_fusion_op.cu.cc | 4 ++-- paddle/fluid/operators/conv_op.cc | 5 ++-- paddle/fluid/operators/conv_transpose_op.cc | 5 ++-- .../fused/fusion_conv_inception_op.cc | 3 ++- .../fused/fusion_conv_inception_op.cu | 4 ++-- .../fluid/platform/cudnn_workspace_helper.h | 23 +++++++++++++++++++ 8 files changed, 42 insertions(+), 17 deletions(-) create mode 100644 paddle/fluid/platform/cudnn_workspace_helper.h diff --git a/paddle/fluid/operators/conv_cudnn_op.cu.cc b/paddle/fluid/operators/conv_cudnn_op.cu.cc index 9e5ccd928..63088d05a 100644 --- a/paddle/fluid/operators/conv_cudnn_op.cu.cc +++ b/paddle/fluid/operators/conv_cudnn_op.cu.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include "paddle/fluid/operators/conv_op.h" #include "paddle/fluid/platform/assert.h" #include "paddle/fluid/platform/cudnn_helper.h" +#include "paddle/fluid/platform/cudnn_workspace_helper.h" #include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/profiler.h" @@ -26,7 +27,8 @@ DEFINE_bool(cudnn_deterministic, false, "Whether allow using an autotuning algorithm for convolution " "operator. The autotuning algorithm may be non-deterministic. If " "true, the algorithm is deterministic."); -DEFINE_uint64(conv_workspace_size_limit, 4096, +DEFINE_uint64(conv_workspace_size_limit, + paddle::platform::kDefaultConvWorkspaceSizeLimitMB, "cuDNN convolution workspace limit in MB unit."); DEFINE_bool(cudnn_exhaustive_search, false, "Whether enable exhaustive search for cuDNN convolution or " @@ -127,10 +129,10 @@ class CUDNNConvOpKernel : public framework::OpKernel { int group_offset_filter = filter->numel() / groups; // ------------------- cudnn conv workspace --------------------- size_t workspace_size_in_bytes; // final workspace to allocate. - size_t workspace_size_limit = kCONV_CUDNN_WORKSPACE_LIMIT_BYTES; + size_t workspace_size_limit = 0; if (FLAGS_conv_workspace_size_limit > 0 || user_workspace_size > 0) { int64_t max_user_size = - std::max(static_cast(FLAGS_conv_workspace_size_limit), + std::min(static_cast(FLAGS_conv_workspace_size_limit), user_workspace_size); workspace_size_limit = max_user_size * 1024 * 1024; } @@ -348,10 +350,10 @@ class CUDNNConvGradOpKernel : public framework::OpKernel { cudnnConvolutionBwdDataAlgo_t data_algo; cudnnConvolutionBwdFilterAlgo_t filter_algo; size_t workspace_size_in_bytes = 0, tmp_size = 0; - size_t workspace_size_limit = kCONV_CUDNN_WORKSPACE_LIMIT_BYTES; + size_t workspace_size_limit = 0; if (FLAGS_conv_workspace_size_limit > 0 || user_workspace_size > 0) { int64_t max_user_size = - std::max(static_cast(FLAGS_conv_workspace_size_limit), + std::min(static_cast(FLAGS_conv_workspace_size_limit), user_workspace_size); workspace_size_limit = max_user_size * 1024 * 1024; } diff --git a/paddle/fluid/operators/conv_cudnn_op_cache.h b/paddle/fluid/operators/conv_cudnn_op_cache.h index de92b75a5..1158dc2d7 100644 --- a/paddle/fluid/operators/conv_cudnn_op_cache.h +++ b/paddle/fluid/operators/conv_cudnn_op_cache.h @@ -31,9 +31,6 @@ static constexpr char kCUDNNFwdAlgoCache[] = "kCUDNNFwdAlgoCache"; static constexpr char kCUDNNBwdDataAlgoCache[] = "kCUDNNBwdDataAlgoCache"; static constexpr char kCUDNNBwdFilterAlgoCache[] = "kCUDNNBwdFilterAlgoCache"; -static constexpr size_t kCONV_CUDNN_WORKSPACE_LIMIT_BYTES = - static_cast(1024) * 1024 * 1024; - #if CUDNN_VERSION_MIN(6, 0, 5) static constexpr size_t kNUM_CUDNN_FWD_ALGS = CUDNN_CONVOLUTION_FWD_ALGO_COUNT; static constexpr size_t kNUM_CUDNN_BWD_FILTER_ALGS = diff --git a/paddle/fluid/operators/conv_fusion_op.cu.cc b/paddle/fluid/operators/conv_fusion_op.cu.cc index 64152829b..ad24e6682 100644 --- a/paddle/fluid/operators/conv_fusion_op.cu.cc +++ b/paddle/fluid/operators/conv_fusion_op.cu.cc @@ -95,10 +95,10 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { // ------------------- cudnn conv workspace --------------------- size_t workspace_size_in_bytes; // final workspace to allocate. - size_t workspace_size_limit = kCONV_CUDNN_WORKSPACE_LIMIT_BYTES; + size_t workspace_size_limit = 0; if (FLAGS_conv_workspace_size_limit > 0 || user_workspace_size > 0) { int64_t max_user_size = - std::max(static_cast(FLAGS_conv_workspace_size_limit), + std::min(static_cast(FLAGS_conv_workspace_size_limit), user_workspace_size); workspace_size_limit = max_user_size * 1024 * 1024; } diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index e1281602b..1bacc54b6 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -25,6 +25,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif +#include "paddle/fluid/platform/cudnn_workspace_helper.h" namespace paddle { namespace operators { @@ -248,7 +249,7 @@ void Conv2DOpMaker::Make() { "allocated/freed each time the operator runs, larger " "workspace size can increase performance but also requires " "better hardware. This size should be chosen carefully.") - .SetDefault(4096); + .SetDefault(platform::kDefaultConvWorkspaceSizeLimitMB); AddAttr("exhaustive_search", "(bool, default false) cuDNN has many algorithm to calculation " "convolution, whether enable exhaustive search " @@ -367,7 +368,7 @@ void Conv3DOpMaker::Make() { "allocated/freed each time the operator runs, larger " "workspace size can increase performance but also requires " "better hardware. This size should be chosen carefully.") - .SetDefault(4096); + .SetDefault(platform::kDefaultConvWorkspaceSizeLimitMB); AddAttr("exhaustive_search", "(bool, default false) cuDNN has many algorithm to calculation " "convolution, whether enable exhaustive search " diff --git a/paddle/fluid/operators/conv_transpose_op.cc b/paddle/fluid/operators/conv_transpose_op.cc index baa39c0f9..01afdd280 100644 --- a/paddle/fluid/operators/conv_transpose_op.cc +++ b/paddle/fluid/operators/conv_transpose_op.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include +#include "paddle/fluid/platform/cudnn_workspace_helper.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" @@ -183,7 +184,7 @@ void Conv2DTransposeOpMaker::Make() { "allocated/freed each time the operator runs, larger " "workspace size can increase performance but also requires " "better hardward. This size should be carefully setted.") - .SetDefault(4096); + .SetDefault(platform::kDefaultConvWorkspaceSizeLimitMB); AddComment(R"DOC( Convolution2D Transpose Operator. @@ -279,7 +280,7 @@ void Conv3DTransposeOpMaker::Make() { "allocated/freed each time the operator runs, larger " "workspace size can increase performance but also requires " "better hardward. This size should be carefully setted.") - .SetDefault(4096); + .SetDefault(platform::kDefaultConvWorkspaceSizeLimitMB); AddComment(R"DOC( Convolution3D Transpose Operator. diff --git a/paddle/fluid/operators/fused/fusion_conv_inception_op.cc b/paddle/fluid/operators/fused/fusion_conv_inception_op.cc index 4690bd766..569527c3c 100644 --- a/paddle/fluid/operators/fused/fusion_conv_inception_op.cc +++ b/paddle/fluid/operators/fused/fusion_conv_inception_op.cc @@ -18,6 +18,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/platform/cudnn_helper.h" #endif +#include "paddle/fluid/platform/cudnn_workspace_helper.h" namespace paddle { namespace operators { @@ -95,7 +96,7 @@ class ConvInceptionFusionOpMaker : public framework::OpProtoAndCheckerMaker { "allocated/freed each time the operator runs, larger " "workspace size can increase performance but also requires " "better hardware. This size should be chosen carefully.") - .SetDefault(4096); + .SetDefault(platform::kDefaultConvWorkspaceSizeLimitMB); AddComment(R"DOC( )DOC"); } diff --git a/paddle/fluid/operators/fused/fusion_conv_inception_op.cu b/paddle/fluid/operators/fused/fusion_conv_inception_op.cu index 6e1388786..76ea6f1b5 100644 --- a/paddle/fluid/operators/fused/fusion_conv_inception_op.cu +++ b/paddle/fluid/operators/fused/fusion_conv_inception_op.cu @@ -162,10 +162,10 @@ class CUDNNConvInceptionFusionOpKernel : public framework::OpKernel { auto handle = dev_ctx.cudnn_handle(); size_t workspace_size_in_bytes = 0; // final workspace to allocate. - size_t workspace_size_limit = kCONV_CUDNN_WORKSPACE_LIMIT_BYTES; + size_t workspace_size_limit = 0; if (FLAGS_conv_workspace_size_limit > 0 || user_workspace_size > 0) { int64_t max_user_size = - std::max(static_cast(FLAGS_conv_workspace_size_limit), + std::min(static_cast(FLAGS_conv_workspace_size_limit), user_workspace_size); workspace_size_limit = max_user_size * 1024 * 1024; } diff --git a/paddle/fluid/platform/cudnn_workspace_helper.h b/paddle/fluid/platform/cudnn_workspace_helper.h new file mode 100644 index 000000000..58f76e312 --- /dev/null +++ b/paddle/fluid/platform/cudnn_workspace_helper.h @@ -0,0 +1,23 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +namespace paddle { +namespace platform { + +static constexpr int kDefaultConvWorkspaceSizeLimitMB = 4096; + +} // namespace platform +} // namespace paddle -- GitLab