未验证 提交 a1f28a48 编写于 作者: Y Yuanle Liu 提交者: GitHub

[Paddle Inference] change the default values of some gflags (#50074)

上级 3a7e470b
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <glog/logging.h> #include <glog/logging.h>
#include <algorithm> #include <algorithm>
#include <cstdlib>
#include <fstream> #include <fstream>
#include <memory> #include <memory>
#include <set> #include <set>
...@@ -1384,13 +1385,6 @@ template <> ...@@ -1384,13 +1385,6 @@ template <>
std::unique_ptr<PaddlePredictor> std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>( CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
const AnalysisConfig &config) { const AnalysisConfig &config) {
// TODO(NHZlX): Should add the link to the doc of
// paddle_infer::CreatePredictor<paddle_infer::Config>
if (config.glog_info_disabled()) {
FLAGS_logtostderr = 1;
FLAGS_minloglevel = 2; // GLOG_ERROR
}
VLOG(3) << "create AnalysisConfig";
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
config.is_valid(), config.is_valid(),
true, true,
...@@ -1403,83 +1397,112 @@ CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>( ...@@ -1403,83 +1397,112 @@ CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
std::call_once(custom_operators_registered, std::call_once(custom_operators_registered,
[]() { inference::RegisterAllCustomOperator(); }); []() { inference::RegisterAllCustomOperator(); });
if (config.use_gpu()) { auto SetGflags = [](const AnalysisConfig &config) {
static std::once_flag gflags_initialized; auto SetGflag = [](const char *name, const char *value) {
static bool process_level_allocator_enabled; std::string ret = ::GFLAGS_NAMESPACE::SetCommandLineOption(name, value);
PADDLE_ENFORCE_EQ(
std::call_once(gflags_initialized, [&]() { ret.empty(),
std::vector<std::string> gflags; false,
PADDLE_ENFORCE_GE(
config.memory_pool_init_size_mb(),
0.f,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The size of memory pool should be greater than 0.")); "Fail to set gflag: %s, please make sure the gflag exists.",
PADDLE_ENFORCE_GE( name));
config.gpu_device_id(), VLOG(3) << "set gflag: --" << name << "=" << value;
0, };
platform::errors::InvalidArgument( // TODO(NHZlX): Should add the link to the doc of
"Invalid device id (%d). The device id should be greater than 0.", // paddle_infer::CreatePredictor<paddle_infer::Config>
config.gpu_device_id())); if (config.glog_info_disabled()) {
gflags.push_back("dummy"); FLAGS_logtostderr = 1;
FLAGS_minloglevel = 2; // GLOG_ERROR
float fraction_of_gpu_memory = config.fraction_of_gpu_memory_for_pool(); }
if (fraction_of_gpu_memory > 0.95f) {
LOG(ERROR)
<< "Allocate too much memory for the GPU memory pool, assigned "
<< config.memory_pool_init_size_mb() << " MB";
LOG(ERROR) << "Try to shink the value by setting "
"AnalysisConfig::EnableGpu(...)";
}
if (fraction_of_gpu_memory >= 0.0f || fraction_of_gpu_memory <= 0.95f) { if (config.use_gpu()) {
std::string flag = "--fraction_of_gpu_memory_to_use=" + static std::once_flag gflags_initialized;
std::to_string(fraction_of_gpu_memory); static bool process_level_allocator_enabled;
VLOG(3) << "set flag: " << flag;
gflags.push_back(flag); std::call_once(gflags_initialized, [&]() {
} PADDLE_ENFORCE_GE(
config.memory_pool_init_size_mb(),
0.f,
platform::errors::InvalidArgument(
"The size of memory pool should be greater than 0."));
PADDLE_ENFORCE_GE(config.gpu_device_id(),
0,
platform::errors::InvalidArgument(
"Invalid device id (%d). The device id should be "
"greater than 0.",
config.gpu_device_id()));
float fraction_of_gpu_memory = config.fraction_of_gpu_memory_for_pool();
if (fraction_of_gpu_memory > 0.95f) {
LOG(ERROR)
<< "Allocate too much memory for the GPU memory pool, assigned "
<< config.memory_pool_init_size_mb() << " MB";
LOG(ERROR) << "Try to shink the value by setting "
"AnalysisConfig::EnableUseGpu(...)";
}
if (fraction_of_gpu_memory >= 0.0f || fraction_of_gpu_memory <= 0.95f) {
std::string value = std::to_string(fraction_of_gpu_memory);
SetGflag("fraction_of_gpu_memory_to_use", value.data());
}
// TODO(Shixiaowei02): Add a mandatory scheme to use the thread local // TODO(Shixiaowei02): Add a mandatory scheme to use the thread local
// allocator when multi-stream is enabled. // allocator when multi-stream is enabled.
if (config.thread_local_stream_enabled()) { if (config.thread_local_stream_enabled()) {
gflags.push_back("--allocator_strategy=thread_local"); SetGflag("allocator_strategy", "thread_local");
process_level_allocator_enabled = false; process_level_allocator_enabled = false;
} else { } else {
process_level_allocator_enabled = true; process_level_allocator_enabled = true;
} }
// support set flags from enviorment. // for inference, the following default values are better.
const phi::ExportedFlagInfoMap &env_map = phi::GetExportedFlagInfoMap(); if (std::getenv("FLAGS_conv_workspace_size_limit") == nullptr) {
std::ostringstream os; SetGflag("conv_workspace_size_limit", "32");
os << "--tryfromenv=";
for (auto &pair : env_map) {
os << pair.second.name << ",";
}
auto tryfromenv_str = os.str();
gflags.push_back(os.str().substr(0, tryfromenv_str.size() - 1));
if (framework::InitGflags(gflags)) {
VLOG(3) << "The following gpu analysis configurations only take effect "
"for the first predictor: ";
for (size_t i = 1; i < gflags.size(); ++i) {
VLOG(3) << gflags[i];
} }
} else { if (std::getenv("FLAGS_initial_cpu_memory_in_mb") == nullptr) {
LOG(WARNING) << "The one-time configuration of analysis predictor " SetGflag("initial_cpu_memory_in_mb", "0");
"failed, which may be due to native predictor called " }
"first and its configurations taken effect.";
} // support set gflags from environment.
}); std::vector<std::string> gflags;
const phi::ExportedFlagInfoMap &env_map = phi::GetExportedFlagInfoMap();
std::ostringstream os;
for (auto &pair : env_map) {
os << pair.second.name << ",";
}
std::string tryfromenv_str = os.str();
if (!tryfromenv_str.empty()) {
tryfromenv_str.pop_back();
tryfromenv_str = "--tryfromenv=" + tryfromenv_str;
gflags.push_back(tryfromenv_str);
}
if (framework::InitGflags(gflags)) {
VLOG(3)
<< "The following gpu analysis configurations only take effect "
"for the first predictor: ";
for (const auto &gflag : gflags) {
VLOG(3) << gflag;
}
} else {
LOG(WARNING) << "The one-time configuration of analysis predictor "
"failed, which may be due to native predictor called "
"first and its configurations taken effect.";
}
});
if (config.thread_local_stream_enabled() && if (config.thread_local_stream_enabled() &&
process_level_allocator_enabled) { process_level_allocator_enabled) {
PADDLE_THROW(platform::errors::Fatal( PADDLE_THROW(platform::errors::Fatal(
"When binding threads and streams, the use of " "When binding threads and streams, the use of "
"process-level allocators will result in undefined result " "process-level allocators will result in undefined result "
"errors due to memory asynchronous operations." "errors due to memory asynchronous operations."
"The thread and stream binding configuration of all " "The thread and stream binding configuration of all "
"predictors should be the same in a single process.")); "predictors should be the same in a single process."));
}
} }
} };
SetGflags(config);
VLOG(3) << "create AnalysisPredictor";
std::unique_ptr<PaddlePredictor> predictor(new AnalysisPredictor(config)); std::unique_ptr<PaddlePredictor> predictor(new AnalysisPredictor(config));
// Each config can only be used for one predictor. // Each config can only be used for one predictor.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册