未验证 提交 0c335dcd 编写于 作者: Z Zeng Jinle 提交者: GitHub

Make conv cudnn workspace size configurable (#17036)

* make_conv_cudnn_ws_size_configurable, test=develop

* change std::max to std::min
test=develop
上级 ea3504c7
...@@ -19,6 +19,7 @@ limitations under the License. */ ...@@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/fluid/operators/conv_op.h" #include "paddle/fluid/operators/conv_op.h"
#include "paddle/fluid/platform/assert.h" #include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/cudnn_helper.h" #include "paddle/fluid/platform/cudnn_helper.h"
#include "paddle/fluid/platform/cudnn_workspace_helper.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
...@@ -26,7 +27,8 @@ DEFINE_bool(cudnn_deterministic, false, ...@@ -26,7 +27,8 @@ DEFINE_bool(cudnn_deterministic, false,
"Whether allow using an autotuning algorithm for convolution " "Whether allow using an autotuning algorithm for convolution "
"operator. The autotuning algorithm may be non-deterministic. If " "operator. The autotuning algorithm may be non-deterministic. If "
"true, the algorithm is deterministic."); "true, the algorithm is deterministic.");
DEFINE_uint64(conv_workspace_size_limit, 4096, DEFINE_uint64(conv_workspace_size_limit,
paddle::platform::kDefaultConvWorkspaceSizeLimitMB,
"cuDNN convolution workspace limit in MB unit."); "cuDNN convolution workspace limit in MB unit.");
DEFINE_bool(cudnn_exhaustive_search, false, DEFINE_bool(cudnn_exhaustive_search, false,
"Whether enable exhaustive search for cuDNN convolution or " "Whether enable exhaustive search for cuDNN convolution or "
...@@ -127,10 +129,10 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> { ...@@ -127,10 +129,10 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
int group_offset_filter = filter->numel() / groups; int group_offset_filter = filter->numel() / groups;
// ------------------- cudnn conv workspace --------------------- // ------------------- cudnn conv workspace ---------------------
size_t workspace_size_in_bytes; // final workspace to allocate. size_t workspace_size_in_bytes; // final workspace to allocate.
size_t workspace_size_limit = kCONV_CUDNN_WORKSPACE_LIMIT_BYTES; size_t workspace_size_limit = 0;
if (FLAGS_conv_workspace_size_limit > 0 || user_workspace_size > 0) { if (FLAGS_conv_workspace_size_limit > 0 || user_workspace_size > 0) {
int64_t max_user_size = int64_t max_user_size =
std::max(static_cast<int64_t>(FLAGS_conv_workspace_size_limit), std::min(static_cast<int64_t>(FLAGS_conv_workspace_size_limit),
user_workspace_size); user_workspace_size);
workspace_size_limit = max_user_size * 1024 * 1024; workspace_size_limit = max_user_size * 1024 * 1024;
} }
...@@ -348,10 +350,10 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> { ...@@ -348,10 +350,10 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
cudnnConvolutionBwdDataAlgo_t data_algo; cudnnConvolutionBwdDataAlgo_t data_algo;
cudnnConvolutionBwdFilterAlgo_t filter_algo; cudnnConvolutionBwdFilterAlgo_t filter_algo;
size_t workspace_size_in_bytes = 0, tmp_size = 0; size_t workspace_size_in_bytes = 0, tmp_size = 0;
size_t workspace_size_limit = kCONV_CUDNN_WORKSPACE_LIMIT_BYTES; size_t workspace_size_limit = 0;
if (FLAGS_conv_workspace_size_limit > 0 || user_workspace_size > 0) { if (FLAGS_conv_workspace_size_limit > 0 || user_workspace_size > 0) {
int64_t max_user_size = int64_t max_user_size =
std::max(static_cast<int64_t>(FLAGS_conv_workspace_size_limit), std::min(static_cast<int64_t>(FLAGS_conv_workspace_size_limit),
user_workspace_size); user_workspace_size);
workspace_size_limit = max_user_size * 1024 * 1024; workspace_size_limit = max_user_size * 1024 * 1024;
} }
......
...@@ -31,9 +31,6 @@ static constexpr char kCUDNNFwdAlgoCache[] = "kCUDNNFwdAlgoCache"; ...@@ -31,9 +31,6 @@ static constexpr char kCUDNNFwdAlgoCache[] = "kCUDNNFwdAlgoCache";
static constexpr char kCUDNNBwdDataAlgoCache[] = "kCUDNNBwdDataAlgoCache"; static constexpr char kCUDNNBwdDataAlgoCache[] = "kCUDNNBwdDataAlgoCache";
static constexpr char kCUDNNBwdFilterAlgoCache[] = "kCUDNNBwdFilterAlgoCache"; static constexpr char kCUDNNBwdFilterAlgoCache[] = "kCUDNNBwdFilterAlgoCache";
static constexpr size_t kCONV_CUDNN_WORKSPACE_LIMIT_BYTES =
static_cast<size_t>(1024) * 1024 * 1024;
#if CUDNN_VERSION_MIN(6, 0, 5) #if CUDNN_VERSION_MIN(6, 0, 5)
static constexpr size_t kNUM_CUDNN_FWD_ALGS = CUDNN_CONVOLUTION_FWD_ALGO_COUNT; static constexpr size_t kNUM_CUDNN_FWD_ALGS = CUDNN_CONVOLUTION_FWD_ALGO_COUNT;
static constexpr size_t kNUM_CUDNN_BWD_FILTER_ALGS = static constexpr size_t kNUM_CUDNN_BWD_FILTER_ALGS =
......
...@@ -95,10 +95,10 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> { ...@@ -95,10 +95,10 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
// ------------------- cudnn conv workspace --------------------- // ------------------- cudnn conv workspace ---------------------
size_t workspace_size_in_bytes; // final workspace to allocate. size_t workspace_size_in_bytes; // final workspace to allocate.
size_t workspace_size_limit = kCONV_CUDNN_WORKSPACE_LIMIT_BYTES; size_t workspace_size_limit = 0;
if (FLAGS_conv_workspace_size_limit > 0 || user_workspace_size > 0) { if (FLAGS_conv_workspace_size_limit > 0 || user_workspace_size > 0) {
int64_t max_user_size = int64_t max_user_size =
std::max(static_cast<int64_t>(FLAGS_conv_workspace_size_limit), std::min(static_cast<int64_t>(FLAGS_conv_workspace_size_limit),
user_workspace_size); user_workspace_size);
workspace_size_limit = max_user_size * 1024 * 1024; workspace_size_limit = max_user_size * 1024 * 1024;
} }
......
...@@ -25,6 +25,7 @@ limitations under the License. */ ...@@ -25,6 +25,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#endif #endif
#include "paddle/fluid/platform/cudnn_workspace_helper.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -248,7 +249,7 @@ void Conv2DOpMaker::Make() { ...@@ -248,7 +249,7 @@ void Conv2DOpMaker::Make() {
"allocated/freed each time the operator runs, larger " "allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires " "workspace size can increase performance but also requires "
"better hardware. This size should be chosen carefully.") "better hardware. This size should be chosen carefully.")
.SetDefault(4096); .SetDefault(platform::kDefaultConvWorkspaceSizeLimitMB);
AddAttr<bool>("exhaustive_search", AddAttr<bool>("exhaustive_search",
"(bool, default false) cuDNN has many algorithm to calculation " "(bool, default false) cuDNN has many algorithm to calculation "
"convolution, whether enable exhaustive search " "convolution, whether enable exhaustive search "
...@@ -367,7 +368,7 @@ void Conv3DOpMaker::Make() { ...@@ -367,7 +368,7 @@ void Conv3DOpMaker::Make() {
"allocated/freed each time the operator runs, larger " "allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires " "workspace size can increase performance but also requires "
"better hardware. This size should be chosen carefully.") "better hardware. This size should be chosen carefully.")
.SetDefault(4096); .SetDefault(platform::kDefaultConvWorkspaceSizeLimitMB);
AddAttr<bool>("exhaustive_search", AddAttr<bool>("exhaustive_search",
"(bool, default false) cuDNN has many algorithm to calculation " "(bool, default false) cuDNN has many algorithm to calculation "
"convolution, whether enable exhaustive search " "convolution, whether enable exhaustive search "
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/platform/cudnn_workspace_helper.h"
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
...@@ -183,7 +184,7 @@ void Conv2DTransposeOpMaker::Make() { ...@@ -183,7 +184,7 @@ void Conv2DTransposeOpMaker::Make() {
"allocated/freed each time the operator runs, larger " "allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires " "workspace size can increase performance but also requires "
"better hardward. This size should be carefully setted.") "better hardward. This size should be carefully setted.")
.SetDefault(4096); .SetDefault(platform::kDefaultConvWorkspaceSizeLimitMB);
AddComment(R"DOC( AddComment(R"DOC(
Convolution2D Transpose Operator. Convolution2D Transpose Operator.
...@@ -279,7 +280,7 @@ void Conv3DTransposeOpMaker::Make() { ...@@ -279,7 +280,7 @@ void Conv3DTransposeOpMaker::Make() {
"allocated/freed each time the operator runs, larger " "allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires " "workspace size can increase performance but also requires "
"better hardward. This size should be carefully setted.") "better hardward. This size should be carefully setted.")
.SetDefault(4096); .SetDefault(platform::kDefaultConvWorkspaceSizeLimitMB);
AddComment(R"DOC( AddComment(R"DOC(
Convolution3D Transpose Operator. Convolution3D Transpose Operator.
......
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cudnn_helper.h" #include "paddle/fluid/platform/cudnn_helper.h"
#endif #endif
#include "paddle/fluid/platform/cudnn_workspace_helper.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -95,7 +96,7 @@ class ConvInceptionFusionOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -95,7 +96,7 @@ class ConvInceptionFusionOpMaker : public framework::OpProtoAndCheckerMaker {
"allocated/freed each time the operator runs, larger " "allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires " "workspace size can increase performance but also requires "
"better hardware. This size should be chosen carefully.") "better hardware. This size should be chosen carefully.")
.SetDefault(4096); .SetDefault(platform::kDefaultConvWorkspaceSizeLimitMB);
AddComment(R"DOC( AddComment(R"DOC(
)DOC"); )DOC");
} }
......
...@@ -162,10 +162,10 @@ class CUDNNConvInceptionFusionOpKernel : public framework::OpKernel<T> { ...@@ -162,10 +162,10 @@ class CUDNNConvInceptionFusionOpKernel : public framework::OpKernel<T> {
auto handle = dev_ctx.cudnn_handle(); auto handle = dev_ctx.cudnn_handle();
size_t workspace_size_in_bytes = 0; // final workspace to allocate. size_t workspace_size_in_bytes = 0; // final workspace to allocate.
size_t workspace_size_limit = kCONV_CUDNN_WORKSPACE_LIMIT_BYTES; size_t workspace_size_limit = 0;
if (FLAGS_conv_workspace_size_limit > 0 || user_workspace_size > 0) { if (FLAGS_conv_workspace_size_limit > 0 || user_workspace_size > 0) {
int64_t max_user_size = int64_t max_user_size =
std::max(static_cast<int64_t>(FLAGS_conv_workspace_size_limit), std::min(static_cast<int64_t>(FLAGS_conv_workspace_size_limit),
user_workspace_size); user_workspace_size);
workspace_size_limit = max_user_size * 1024 * 1024; workspace_size_limit = max_user_size * 1024 * 1024;
} }
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
namespace paddle {
namespace platform {
static constexpr int kDefaultConvWorkspaceSizeLimitMB = 4096;
} // namespace platform
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册