diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 89c8c7902bac9fd2e15a164f7e0dfd21945cf16e..1ec692d3d1df66d8c1df689d557b289fc2880b30 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -650,13 +650,6 @@ std::unique_ptr CreatePaddlePredictor< gflags.push_back("--cudnn_deterministic=True"); } - if (config.thread_local_stream_enabled()) { - gflags.push_back("--allocator_strategy=thread_local"); - process_level_allocator_enabled = false; - } else { - process_level_allocator_enabled = true; - } - // TODO(wilber): jetson tx2 may fail to run the model due to insufficient memory // under the native_best_fit strategy. Modify the default allocation strategy to // auto_growth. todo, find a more appropriate way to solve the problem. @@ -664,6 +657,15 @@ std::unique_ptr CreatePaddlePredictor< gflags.push_back("--allocator_strategy=auto_growth"); #endif + // TODO(Shixiaowei02): Add a mandatory scheme to use the thread local + // allocator when multi-stream is enabled. + if (config.thread_local_stream_enabled()) { + gflags.push_back("--allocator_strategy=thread_local"); + process_level_allocator_enabled = false; + } else { + process_level_allocator_enabled = true; + } + if (framework::InitGflags(gflags)) { VLOG(3) << "The following gpu analysis configurations only take effect " "for the first predictor: ";