未验证 提交 a1f28a48 编写于 作者: Y Yuanle Liu 提交者: GitHub

[Paddle Inference] change the default values of some gflags (#50074)

上级 3a7e470b
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <glog/logging.h> #include <glog/logging.h>
#include <algorithm> #include <algorithm>
#include <cstdlib>
#include <fstream> #include <fstream>
#include <memory> #include <memory>
#include <set> #include <set>
...@@ -1384,13 +1385,6 @@ template <> ...@@ -1384,13 +1385,6 @@ template <>
std::unique_ptr<PaddlePredictor> std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>( CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
const AnalysisConfig &config) { const AnalysisConfig &config) {
// TODO(NHZlX): Should add the link to the doc of
// paddle_infer::CreatePredictor<paddle_infer::Config>
if (config.glog_info_disabled()) {
FLAGS_logtostderr = 1;
FLAGS_minloglevel = 2; // GLOG_ERROR
}
VLOG(3) << "create AnalysisConfig";
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
config.is_valid(), config.is_valid(),
true, true,
...@@ -1403,24 +1397,40 @@ CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>( ...@@ -1403,24 +1397,40 @@ CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
std::call_once(custom_operators_registered, std::call_once(custom_operators_registered,
[]() { inference::RegisterAllCustomOperator(); }); []() { inference::RegisterAllCustomOperator(); });
auto SetGflags = [](const AnalysisConfig &config) {
auto SetGflag = [](const char *name, const char *value) {
std::string ret = ::GFLAGS_NAMESPACE::SetCommandLineOption(name, value);
PADDLE_ENFORCE_EQ(
ret.empty(),
false,
platform::errors::InvalidArgument(
"Fail to set gflag: %s, please make sure the gflag exists.",
name));
VLOG(3) << "set gflag: --" << name << "=" << value;
};
// TODO(NHZlX): Should add the link to the doc of
// paddle_infer::CreatePredictor<paddle_infer::Config>
if (config.glog_info_disabled()) {
FLAGS_logtostderr = 1;
FLAGS_minloglevel = 2; // GLOG_ERROR
}
if (config.use_gpu()) { if (config.use_gpu()) {
static std::once_flag gflags_initialized; static std::once_flag gflags_initialized;
static bool process_level_allocator_enabled; static bool process_level_allocator_enabled;
std::call_once(gflags_initialized, [&]() { std::call_once(gflags_initialized, [&]() {
std::vector<std::string> gflags;
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(
config.memory_pool_init_size_mb(), config.memory_pool_init_size_mb(),
0.f, 0.f,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The size of memory pool should be greater than 0.")); "The size of memory pool should be greater than 0."));
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(config.gpu_device_id(),
config.gpu_device_id(),
0, 0,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"Invalid device id (%d). The device id should be greater than 0.", "Invalid device id (%d). The device id should be "
"greater than 0.",
config.gpu_device_id())); config.gpu_device_id()));
gflags.push_back("dummy");
float fraction_of_gpu_memory = config.fraction_of_gpu_memory_for_pool(); float fraction_of_gpu_memory = config.fraction_of_gpu_memory_for_pool();
if (fraction_of_gpu_memory > 0.95f) { if (fraction_of_gpu_memory > 0.95f) {
...@@ -1428,40 +1438,49 @@ CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>( ...@@ -1428,40 +1438,49 @@ CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
<< "Allocate too much memory for the GPU memory pool, assigned " << "Allocate too much memory for the GPU memory pool, assigned "
<< config.memory_pool_init_size_mb() << " MB"; << config.memory_pool_init_size_mb() << " MB";
LOG(ERROR) << "Try to shink the value by setting " LOG(ERROR) << "Try to shink the value by setting "
"AnalysisConfig::EnableGpu(...)"; "AnalysisConfig::EnableUseGpu(...)";
} }
if (fraction_of_gpu_memory >= 0.0f || fraction_of_gpu_memory <= 0.95f) { if (fraction_of_gpu_memory >= 0.0f || fraction_of_gpu_memory <= 0.95f) {
std::string flag = "--fraction_of_gpu_memory_to_use=" + std::string value = std::to_string(fraction_of_gpu_memory);
std::to_string(fraction_of_gpu_memory); SetGflag("fraction_of_gpu_memory_to_use", value.data());
VLOG(3) << "set flag: " << flag;
gflags.push_back(flag);
} }
// TODO(Shixiaowei02): Add a mandatory scheme to use the thread local // TODO(Shixiaowei02): Add a mandatory scheme to use the thread local
// allocator when multi-stream is enabled. // allocator when multi-stream is enabled.
if (config.thread_local_stream_enabled()) { if (config.thread_local_stream_enabled()) {
gflags.push_back("--allocator_strategy=thread_local"); SetGflag("allocator_strategy", "thread_local");
process_level_allocator_enabled = false; process_level_allocator_enabled = false;
} else { } else {
process_level_allocator_enabled = true; process_level_allocator_enabled = true;
} }
// support set flags from enviorment. // for inference, the following default values are better.
if (std::getenv("FLAGS_conv_workspace_size_limit") == nullptr) {
SetGflag("conv_workspace_size_limit", "32");
}
if (std::getenv("FLAGS_initial_cpu_memory_in_mb") == nullptr) {
SetGflag("initial_cpu_memory_in_mb", "0");
}
// support set gflags from environment.
std::vector<std::string> gflags;
const phi::ExportedFlagInfoMap &env_map = phi::GetExportedFlagInfoMap(); const phi::ExportedFlagInfoMap &env_map = phi::GetExportedFlagInfoMap();
std::ostringstream os; std::ostringstream os;
os << "--tryfromenv=";
for (auto &pair : env_map) { for (auto &pair : env_map) {
os << pair.second.name << ","; os << pair.second.name << ",";
} }
auto tryfromenv_str = os.str(); std::string tryfromenv_str = os.str();
gflags.push_back(os.str().substr(0, tryfromenv_str.size() - 1)); if (!tryfromenv_str.empty()) {
tryfromenv_str.pop_back();
tryfromenv_str = "--tryfromenv=" + tryfromenv_str;
gflags.push_back(tryfromenv_str);
}
if (framework::InitGflags(gflags)) { if (framework::InitGflags(gflags)) {
VLOG(3) << "The following gpu analysis configurations only take effect " VLOG(3)
<< "The following gpu analysis configurations only take effect "
"for the first predictor: "; "for the first predictor: ";
for (size_t i = 1; i < gflags.size(); ++i) { for (const auto &gflag : gflags) {
VLOG(3) << gflags[i]; VLOG(3) << gflag;
} }
} else { } else {
LOG(WARNING) << "The one-time configuration of analysis predictor " LOG(WARNING) << "The one-time configuration of analysis predictor "
...@@ -1480,6 +1499,10 @@ CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>( ...@@ -1480,6 +1499,10 @@ CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
"predictors should be the same in a single process.")); "predictors should be the same in a single process."));
} }
} }
};
SetGflags(config);
VLOG(3) << "create AnalysisPredictor";
std::unique_ptr<PaddlePredictor> predictor(new AnalysisPredictor(config)); std::unique_ptr<PaddlePredictor> predictor(new AnalysisPredictor(config));
// Each config can only be used for one predictor. // Each config can only be used for one predictor.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册