From a1f28a48951f6c6541cd107382ccf08317bb4e76 Mon Sep 17 00:00:00 2001 From: Yuanle Liu Date: Tue, 31 Jan 2023 14:29:11 +0800 Subject: [PATCH] [Paddle Inference] change the default values of some gflags (#50074) --- .../fluid/inference/api/analysis_predictor.cc | 177 ++++++++++-------- 1 file changed, 100 insertions(+), 77 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 6ccad994b0..e89bcfa2c6 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -1384,13 +1385,6 @@ template <> std::unique_ptr CreatePaddlePredictor( const AnalysisConfig &config) { - // TODO(NHZlX): Should add the link to the doc of - // paddle_infer::CreatePredictor - if (config.glog_info_disabled()) { - FLAGS_logtostderr = 1; - FLAGS_minloglevel = 2; // GLOG_ERROR - } - VLOG(3) << "create AnalysisConfig"; PADDLE_ENFORCE_EQ( config.is_valid(), true, @@ -1403,83 +1397,112 @@ CreatePaddlePredictor( std::call_once(custom_operators_registered, []() { inference::RegisterAllCustomOperator(); }); - if (config.use_gpu()) { - static std::once_flag gflags_initialized; - static bool process_level_allocator_enabled; - - std::call_once(gflags_initialized, [&]() { - std::vector gflags; - PADDLE_ENFORCE_GE( - config.memory_pool_init_size_mb(), - 0.f, + auto SetGflags = [](const AnalysisConfig &config) { + auto SetGflag = [](const char *name, const char *value) { + std::string ret = ::GFLAGS_NAMESPACE::SetCommandLineOption(name, value); + PADDLE_ENFORCE_EQ( + ret.empty(), + false, platform::errors::InvalidArgument( - "The size of memory pool should be greater than 0.")); - PADDLE_ENFORCE_GE( - config.gpu_device_id(), - 0, - platform::errors::InvalidArgument( - "Invalid device id (%d). The device id should be greater than 0.", - config.gpu_device_id())); - gflags.push_back("dummy"); - - float fraction_of_gpu_memory = config.fraction_of_gpu_memory_for_pool(); - if (fraction_of_gpu_memory > 0.95f) { - LOG(ERROR) - << "Allocate too much memory for the GPU memory pool, assigned " - << config.memory_pool_init_size_mb() << " MB"; - LOG(ERROR) << "Try to shink the value by setting " - "AnalysisConfig::EnableGpu(...)"; - } + "Fail to set gflag: %s, please make sure the gflag exists.", + name)); + VLOG(3) << "set gflag: --" << name << "=" << value; + }; + // TODO(NHZlX): Should add the link to the doc of + // paddle_infer::CreatePredictor + if (config.glog_info_disabled()) { + FLAGS_logtostderr = 1; + FLAGS_minloglevel = 2; // GLOG_ERROR + } - if (fraction_of_gpu_memory >= 0.0f || fraction_of_gpu_memory <= 0.95f) { - std::string flag = "--fraction_of_gpu_memory_to_use=" + - std::to_string(fraction_of_gpu_memory); - VLOG(3) << "set flag: " << flag; - gflags.push_back(flag); - } + if (config.use_gpu()) { + static std::once_flag gflags_initialized; + static bool process_level_allocator_enabled; + + std::call_once(gflags_initialized, [&]() { + PADDLE_ENFORCE_GE( + config.memory_pool_init_size_mb(), + 0.f, + platform::errors::InvalidArgument( + "The size of memory pool should be greater than 0.")); + PADDLE_ENFORCE_GE(config.gpu_device_id(), + 0, + platform::errors::InvalidArgument( + "Invalid device id (%d). The device id should be " + "greater than 0.", + config.gpu_device_id())); + + float fraction_of_gpu_memory = config.fraction_of_gpu_memory_for_pool(); + if (fraction_of_gpu_memory > 0.95f) { + LOG(ERROR) + << "Allocate too much memory for the GPU memory pool, assigned " + << config.memory_pool_init_size_mb() << " MB"; + LOG(ERROR) << "Try to shink the value by setting " + "AnalysisConfig::EnableUseGpu(...)"; + } + if (fraction_of_gpu_memory >= 0.0f || fraction_of_gpu_memory <= 0.95f) { + std::string value = std::to_string(fraction_of_gpu_memory); + SetGflag("fraction_of_gpu_memory_to_use", value.data()); + } - // TODO(Shixiaowei02): Add a mandatory scheme to use the thread local - // allocator when multi-stream is enabled. - if (config.thread_local_stream_enabled()) { - gflags.push_back("--allocator_strategy=thread_local"); - process_level_allocator_enabled = false; - } else { - process_level_allocator_enabled = true; - } + // TODO(Shixiaowei02): Add a mandatory scheme to use the thread local + // allocator when multi-stream is enabled. + if (config.thread_local_stream_enabled()) { + SetGflag("allocator_strategy", "thread_local"); + process_level_allocator_enabled = false; + } else { + process_level_allocator_enabled = true; + } - // support set flags from enviorment. - const phi::ExportedFlagInfoMap &env_map = phi::GetExportedFlagInfoMap(); - std::ostringstream os; - os << "--tryfromenv="; - for (auto &pair : env_map) { - os << pair.second.name << ","; - } - auto tryfromenv_str = os.str(); - gflags.push_back(os.str().substr(0, tryfromenv_str.size() - 1)); - - if (framework::InitGflags(gflags)) { - VLOG(3) << "The following gpu analysis configurations only take effect " - "for the first predictor: "; - for (size_t i = 1; i < gflags.size(); ++i) { - VLOG(3) << gflags[i]; + // for inference, the following default values are better. + if (std::getenv("FLAGS_conv_workspace_size_limit") == nullptr) { + SetGflag("conv_workspace_size_limit", "32"); } - } else { - LOG(WARNING) << "The one-time configuration of analysis predictor " - "failed, which may be due to native predictor called " - "first and its configurations taken effect."; - } - }); + if (std::getenv("FLAGS_initial_cpu_memory_in_mb") == nullptr) { + SetGflag("initial_cpu_memory_in_mb", "0"); + } + + // support set gflags from environment. + std::vector gflags; + const phi::ExportedFlagInfoMap &env_map = phi::GetExportedFlagInfoMap(); + std::ostringstream os; + for (auto &pair : env_map) { + os << pair.second.name << ","; + } + std::string tryfromenv_str = os.str(); + if (!tryfromenv_str.empty()) { + tryfromenv_str.pop_back(); + tryfromenv_str = "--tryfromenv=" + tryfromenv_str; + gflags.push_back(tryfromenv_str); + } + if (framework::InitGflags(gflags)) { + VLOG(3) + << "The following gpu analysis configurations only take effect " + "for the first predictor: "; + for (const auto &gflag : gflags) { + VLOG(3) << gflag; + } + } else { + LOG(WARNING) << "The one-time configuration of analysis predictor " + "failed, which may be due to native predictor called " + "first and its configurations taken effect."; + } + }); - if (config.thread_local_stream_enabled() && - process_level_allocator_enabled) { - PADDLE_THROW(platform::errors::Fatal( - "When binding threads and streams, the use of " - "process-level allocators will result in undefined result " - "errors due to memory asynchronous operations." - "The thread and stream binding configuration of all " - "predictors should be the same in a single process.")); + if (config.thread_local_stream_enabled() && + process_level_allocator_enabled) { + PADDLE_THROW(platform::errors::Fatal( + "When binding threads and streams, the use of " + "process-level allocators will result in undefined result " + "errors due to memory asynchronous operations." + "The thread and stream binding configuration of all " + "predictors should be the same in a single process.")); + } } - } + }; + SetGflags(config); + + VLOG(3) << "create AnalysisPredictor"; std::unique_ptr predictor(new AnalysisPredictor(config)); // Each config can only be used for one predictor. -- GitLab