From ae576f3c68cb35e9793d3a7bbefe6cb97e02bc39 Mon Sep 17 00:00:00 2001 From: Zhaolong Xing Date: Thu, 6 Jun 2019 14:46:50 +0800 Subject: [PATCH] fix: when use the load model from memory mode, the RAM occupy is high (#17788) test=develop --- paddle/fluid/inference/analysis/argument.h | 10 +++++ .../inference/analysis/ir_pass_manager.cc | 1 + .../ir_passes/tensorrt_subgraph_pass.cc | 33 +++++++++------- .../ir_params_sync_among_devices_pass.cc | 2 +- paddle/fluid/inference/api/analysis_config.cc | 14 ++++++- .../fluid/inference/api/analysis_predictor.cc | 8 ++++ .../inference/api/paddle_analysis_config.h | 12 +++++- .../inference/api/paddle_pass_builder.cc | 1 + .../inference/tensorrt/convert/op_converter.h | 1 + paddle/fluid/inference/tensorrt/engine.h | 39 +++++++++++++++++++ paddle/fluid/inference/tensorrt/op_teller.cc | 2 +- .../tests/api/analyzer_pyramid_dnn_tester.cc | 4 ++ .../tests/api/analyzer_rnn1_tester.cc | 4 ++ .../tests/api/analyzer_save_model_tester.cc | 15 +++++-- .../tests/api/analyzer_seq_pool1_tester.cc | 4 ++ .../fluid/inference/tests/api/tester_helper.h | 6 +-- .../operators/tensorrt/tensorrt_engine_op.h | 36 ++++++++--------- .../tensorrt/tensorrt_engine_op_test.cc | 2 + 18 files changed, 149 insertions(+), 45 deletions(-) diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 590baf4ee37..7bcd1f01bfe 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -63,6 +63,16 @@ struct Argument { using anakin_max_shape_t = std::map>; bool Has(const std::string& key) const { return valid_fields_.count(key); } + void PartiallyRelease() { + if (Has("model_program_path")) { + if (Has("model_from_memory") && model_from_memory()) { + model_program_path().clear(); + model_program_path().shrink_to_fit(); + model_params_path().clear(); + model_params_path().shrink_to_fit(); + } + } + } #define DECL_ARGUMENT_FIELD(field__, Field, type__) \ public: \ diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 17b7d42d3a1..f290e6fce49 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -87,6 +87,7 @@ void IRPassManager::CreatePasses(Argument *argument, bool enable_int8 = argument->tensorrt_precision_mode() == AnalysisConfig::Precision::kInt8; + pass->Set("predictor_id", new int(argument->predictor_id())); bool use_calib_mode = argument->tensorrt_use_calib_mode(); pass->Set("enable_int8", new bool(enable_int8)); pass->Set("use_calib_mode", new bool(use_calib_mode)); diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index 3fad263b05f..37c3fc79554 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -199,8 +199,12 @@ void TensorRtSubgraphPass::CreateTensorRTOp( SetAttr(op_desc->Proto(), "parameters", params); auto use_static_engine = Get("use_static_engine"); + // TODO(NHZlX) + // There are models with the same structure but the different parameters, + // when runing in the 'use_serialize' mode, there is a bug. auto engine_key = GenerateEngineKey(input_names_with_id, output_names_with_id, std::to_string(0)); + auto predictor_id = Get("predictor_id"); // Get "" when there is no cached calibration table data. bool load_from_memory = Get("model_from_memory"); @@ -214,6 +218,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp( SetAttr(op_desc->Proto(), "enable_int8", enable_int8); SetAttr(op_desc->Proto(), "use_calib_mode", use_calib_mode); SetAttr(op_desc->Proto(), "engine_key", engine_key); + SetAttr(op_desc->Proto(), "predictor_id", predictor_id); std::string trt_engine_serialized_data = ""; SetAttr(op_desc->Proto(), "engine_serialized_data", trt_engine_serialized_data); @@ -233,15 +238,20 @@ void TensorRtSubgraphPass::CreateTensorRTOp( std::copy(params.begin(), params.end(), std::back_inserter(*repetitive_params)); - bool need_serialize = (use_static_engine && !load_from_memory); + tensorrt::TensorRTEngine *trt_engine = + inference::Singleton::Global() + .Create(engine_key + std::to_string(predictor_id), + Get("max_batch_size"), Get("workspace_size"), + enable_int8, calibrator.get(), Get("gpu_device_id")); + + bool need_serialize = (use_static_engine && !load_from_memory); if (need_serialize) { trt_engine_serialized_data = GetTrtEngineSerializedData( Get("model_opt_cache_dir"), engine_key); // we can load the engine info serialized before from the disk. if (!trt_engine_serialized_data.empty()) { - SetAttr(op_desc->Proto(), "engine_serialized_data", - trt_engine_serialized_data); + trt_engine->Deserialize(trt_engine_serialized_data); LOG(INFO) << "Load TRT Optimized Info from " << GetTrtEngineSerializedPath( Get("model_opt_cache_dir"), engine_key); @@ -254,10 +264,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp( // 2. already load serialized trt engine info. LOG(INFO) << "Prepare TRT engine (Optimize model structure, Select OP " "kernel etc). This process may cost a lot of time."; - std::unique_ptr trt_engine( - new tensorrt::TensorRTEngine( - Get("max_batch_size"), Get("workspace_size"), enable_int8, - calibrator.get(), Get("gpu_device_id"))); + auto *scope = param_scope(); framework::BlockDesc block_desc_temp(nullptr, block_desc.Proto()); std::unordered_set param_set(params.begin(), params.end()); @@ -265,20 +272,18 @@ void TensorRtSubgraphPass::CreateTensorRTOp( .ConvertBlockToTRTEngine( &block_desc_temp, *scope, std::vector(input_names.begin(), input_names.end()), - param_set, output_mapping, trt_engine.get()); - nvinfer1::IHostMemory *serialized_engine_data = trt_engine->Serialize(); - trt_engine_serialized_data = - std::string((const char *)serialized_engine_data->data(), - serialized_engine_data->size()); + param_set, output_mapping, trt_engine); if (need_serialize) { + nvinfer1::IHostMemory *serialized_engine_data = trt_engine->Serialize(); + trt_engine_serialized_data = + std::string((const char *)serialized_engine_data->data(), + serialized_engine_data->size()); SaveTrtEngineSerializedDataToFile( GetTrtEngineSerializedPath(Get("model_opt_cache_dir"), engine_key), trt_engine_serialized_data); } - SetAttr(op_desc->Proto(), "engine_serialized_data", - trt_engine_serialized_data); } } // namespace analysis diff --git a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc index 1f27e80cf49..fedee3ff95f 100644 --- a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc @@ -69,7 +69,7 @@ void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) { // Copy the parameter data to a tmp tensor. TensorCopySync(*t, cpu_place, &temp_tensor); // Reallocation the space on GPU - t->mutable_data(place); + t->clear(); // Copy parameter data to newly allocated GPU space. TensorCopySync(temp_tensor, place, t); diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 0d25c159fd2..2dc96c87151 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -87,10 +87,12 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { // Model related. CP_MEMBER(model_dir_); - CP_MEMBER(prog_file_); - CP_MEMBER(params_file_); CP_MEMBER(model_from_memory_); // the memory model reuses prog_file_ and // params_file_ fields. + + prog_file_ = std::move(other.prog_file_); + params_file_ = std::move(other.params_file_); + // Gpu related. CP_MEMBER(use_gpu_); CP_MEMBER(device_id_); @@ -439,4 +441,12 @@ void AnalysisConfig::EnableAnakinEngine( anakin_auto_config_layout_ = auto_config_layout; Update(); } + +void AnalysisConfig::PartiallyRelease() { + prog_file_.clear(); + prog_file_.shrink_to_fit(); + params_file_.clear(); + params_file_.shrink_to_fit(); +} + } // namespace paddle diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index adc88409b6a..5d9d5a3178a 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -444,6 +444,10 @@ void AnalysisPredictor::OptimizeInferenceProgram() { ARGUMENT_CHECK_FIELD((&argument_), ir_analyzed_program); inference_program_.reset( new framework::ProgramDesc(argument_.ir_analyzed_program())); + // The config and argument take a lot of storage, + // when the predictor settings are complete, we release these stores. + argument_.PartiallyRelease(); + config_.PartiallyRelease(); LOG(INFO) << "== optimize end =="; } @@ -451,6 +455,8 @@ template <> std::unique_ptr CreatePaddlePredictor< AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) { VLOG(3) << "create AnalysisConfig"; + PADDLE_ENFORCE(config.is_valid(), + "Note: Each config can only be used for one predictor."); if (config.use_gpu()) { // 1. GPU memory PADDLE_ENFORCE_GE(config.memory_pool_init_size_mb(), 0.f); @@ -480,6 +486,8 @@ std::unique_ptr CreatePaddlePredictor< } std::unique_ptr predictor(new AnalysisPredictor(config)); + // Each config can only be used for one predictor. + config.SetInValid(); auto predictor_p = dynamic_cast(predictor.get()); if (!predictor_p->Init(nullptr)) { diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 951cb669cca..e3682d27054 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -232,6 +232,8 @@ struct AnalysisConfig { bool force_update_static_cache = false); /** Tell whether the memory optimization is activated. */ bool enable_memory_optim() const; + void SetInValid() const { is_valid_ = false; } + bool is_valid() const { return is_valid_; } friend class ::paddle::AnalysisPredictor; @@ -239,6 +241,7 @@ struct AnalysisConfig { * Get a pass builder for customize the passes in IR analysis phase. */ PassStrategy* pass_builder() const; + void PartiallyRelease(); protected: // Update the config. @@ -249,8 +252,8 @@ struct AnalysisConfig { protected: // Model pathes. std::string model_dir_; - std::string prog_file_; - std::string params_file_; + mutable std::string prog_file_; + mutable std::string params_file_; // GPU related. bool use_gpu_{false}; @@ -312,6 +315,11 @@ struct AnalysisConfig { bool use_mkldnn_quantizer_{false}; std::shared_ptr mkldnn_quantizer_config_; + // If the config is already used on a predictor, it becomes invalid. + mutable bool is_valid_{true}; + // Any config can only be used with one predictor. + // Variables held by config can take up a lot of memory in some cases. + // So we release the memory when the predictor is set up. }; } // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 9150a4ffc12..bc2c0914728 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -109,6 +109,7 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { "conv_affine_channel_fuse_pass", // "conv_eltwiseadd_affine_channel_fuse_pass", // "conv_bn_fuse_pass", // + "conv_eltwiseadd_bn_fuse_pass", // #if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be // guaranteed at least v7 "conv_elementwise_add_act_fuse_pass", // diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h index 96a722dc89a..f89b0d7efe2 100644 --- a/paddle/fluid/inference/tensorrt/convert/op_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h @@ -170,6 +170,7 @@ class OpConverter { engine->DeclareOutput(output); } engine->FreezeNetwork(); + engine->ClearWeights(); } void RreplenishLayerAndOutput( diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 0396b084b8f..80af463d274 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -149,6 +149,12 @@ class TensorRTEngine { std::unordered_map> weight_map; + void ClearWeights() { + for (auto& weight_pair : weight_map) { + weight_pair.second.reset(nullptr); + } + } + private: // Each ICudaEngine object is bound to a specific GPU when it is instantiated, // ensure that the thread is associated with the correct device by calling @@ -213,6 +219,39 @@ class TensorRTEngine { #define TRT_ENGINE_ADD_LAYER(engine__, layer__, ARGS...) \ engine__->network()->add##layer__(ARGS); +class TRTEngineManager { + public: + bool Empty() const { return engines_.size() == 0; } + bool Has(const std::string& name) const { + if (engines_.count(name) == 0) return false; + return engines_.at(name).get() != nullptr; + } + + TensorRTEngine* Get(const std::string& name) const { + return engines_.at(name).get(); + } + + TensorRTEngine* Create(std::string name, int max_batch, int max_workspace, + bool enable_int8 = false, + TRTInt8Calibrator* calibrator = nullptr, + int device_id = 0, + nvinfer1::ILogger& logger = NaiveLogger::Global()) { + auto* p = new TensorRTEngine(max_batch, max_workspace, enable_int8, + calibrator, device_id, logger); + engines_[name].reset(p); + return p; + } + + void DeleteAll() { + for (auto& item : engines_) { + item.second.reset(nullptr); + } + } + + private: + std::unordered_map> engines_; +}; + } // namespace tensorrt } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 8a5aed5d43a..170ca40d659 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -31,7 +31,7 @@ struct SimpleOpTypeSetTeller : public Teller { std::unordered_set teller_set{ {"mul", "conv2d", "pool2d", "relu", "softmax", "sigmoid", "depthwise_conv2d", "batch_norm", "concat", "tanh", "pad", - "elementwise_add", "elementwise_mul", "dropout", "split", "prelu", + "elementwise_add", "elementwise_mul", "dropout", "prelu", "conv2d_transpose", "leaky_relu", "fc"}}; }; diff --git a/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc index cc31ab9588d..11a49ed2914 100644 --- a/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc @@ -177,11 +177,15 @@ TEST(Analyzer_Pyramid_DNN, compare_zero_copy) { AnalysisConfig cfg; SetConfig(&cfg); + AnalysisConfig cfg1; + SetConfig(&cfg1); + std::vector> input_slots_all; SetInput(&input_slots_all); std::vector outputs_name; outputs_name.emplace_back("cos_sim_2.tmp_0"); CompareAnalysisAndZeroCopy(reinterpret_cast(&cfg), + reinterpret_cast(&cfg1), input_slots_all, outputs_name); } diff --git a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc index 54fd3a4a4ca..620a1d1f7a3 100644 --- a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc @@ -293,11 +293,15 @@ TEST(Analyzer_rnn1, compare_zero_copy) { AnalysisConfig cfg; SetConfig(&cfg); + AnalysisConfig cfg1; + SetConfig(&cfg1); + std::vector> input_slots_all; SetInput(&input_slots_all); std::vector outputs_name; outputs_name.emplace_back("final_output.tmp_1"); CompareAnalysisAndZeroCopy(reinterpret_cast(&cfg), + reinterpret_cast(&cfg1), input_slots_all, outputs_name); } diff --git a/paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc b/paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc index 4d99bbd36ff..977b2ec885d 100644 --- a/paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_save_model_tester.cc @@ -39,10 +39,17 @@ TEST(Analyzer, save_model) { mkdir(optimModelPath.c_str(), 0777); SaveOptimModel(&cfg, optimModelPath); - cfg.pass_builder()->ClearPasses(); - int origin_num_ops = GetNumOps(cfg); - cfg.SetModel(optimModelPath + "/model", optimModelPath + "/params"); - int fused_num_ops = GetNumOps(cfg); + // Each config can only be applied to one predictor. + AnalysisConfig cfg2; + SetConfig(&cfg2); + cfg2.pass_builder()->ClearPasses(); + cfg2.SetModel(optimModelPath + "/model", optimModelPath + "/params"); + int origin_num_ops = GetNumOps(cfg2); + + AnalysisConfig cfg3; + SetConfig(&cfg3); + cfg3.SetModel(optimModelPath + "/model", optimModelPath + "/params"); + int fused_num_ops = GetNumOps(cfg3); CHECK_LE(fused_num_ops, origin_num_ops); } diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc index e78f04a07c5..e6f2bfad68c 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc @@ -215,11 +215,15 @@ TEST(Analyzer_seq_pool1, compare_zero_copy) { AnalysisConfig cfg; SetConfig(&cfg); + AnalysisConfig cfg1; + SetConfig(&cfg1); + std::vector> input_slots_all; SetInput(&input_slots_all); std::vector outputs_name; outputs_name.emplace_back(out_var_name); CompareAnalysisAndZeroCopy(reinterpret_cast(&cfg), + reinterpret_cast(&cfg1), input_slots_all, outputs_name); } diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 6dda9ed0ec6..eda86c3b42b 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -534,7 +534,7 @@ void CompareNativeAndAnalysis( } void CompareAnalysisAndZeroCopy( - PaddlePredictor::Config *config, + PaddlePredictor::Config *config, PaddlePredictor::Config *config1, const std::vector> &inputs, const std::vector &outputs_name) { int batch_size = FLAGS_batch_size; @@ -544,8 +544,8 @@ void CompareAnalysisAndZeroCopy( predictor->Run(inputs[0], &analysis_outputs, batch_size); // analysis + zero_copy std::vector zerocopy_outputs; - reinterpret_cast(config)->SwitchUseFeedFetchOps(false); - predictor = CreateTestPredictor(config, true); + reinterpret_cast(config1)->SwitchUseFeedFetchOps(false); + predictor = CreateTestPredictor(config1, true); ConvertPaddleTensorToZeroCopyTensor(predictor.get(), inputs[0]); predictor->ZeroCopyRun(); for (size_t i = 0; i < outputs_name.size(); i++) { diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 1c32368e9de..21cf15cb0b0 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -43,7 +43,7 @@ class TensorRTEngineOp : public framework::OperatorBase { private: std::vector input_names_; std::unordered_set param_names_; - mutable std::unique_ptr trt_engine_; + mutable TensorRTEngine *trt_engine_{nullptr}; int max_batch_size_; int workspace_size_; std::unique_ptr calibrator_; @@ -51,8 +51,8 @@ class TensorRTEngineOp : public framework::OperatorBase { bool use_calib_mode_; std::string calibration_data_; std::string engine_key_; - std::string engine_serialized_data_; bool calibration_mode_; + int predictor_id_; int device_id_; public: @@ -69,7 +69,7 @@ class TensorRTEngineOp : public framework::OperatorBase { use_calib_mode_ = Attr("use_calib_mode"); calibration_data_ = Attr("calibration_data"); engine_key_ = Attr("engine_key"); - engine_serialized_data_ = Attr("engine_serialized_data"); + predictor_id_ = Attr("predictor_id"); auto params = Attr>("parameters"); for (const auto ¶m : params) { @@ -84,16 +84,14 @@ class TensorRTEngineOp : public framework::OperatorBase { if (enable_int8_ && calibration_data_.size()) { calibrator_.reset(new TRTInt8Calibrator(calibration_data_)); } - - if (!calibration_mode_ && !engine_serialized_data_.empty()) { - trt_engine_.reset(new inference::tensorrt::TensorRTEngine( - max_batch_size_, workspace_size_, enable_int8_, calibrator_.get(), - device_id_)); - PADDLE_ENFORCE(engine_serialized_data_.size(), - "TRT serialized data should not be empty here," - "there must be error when generate serialized data in TRT " - "subgraph detect pass."); - trt_engine_->Deserialize(engine_serialized_data_); + bool has_engine = + inference::Singleton::Global() + .Has(engine_key_ + std::to_string(predictor_id_)); + + if (!calibration_mode_ && has_engine) { + trt_engine_ = + inference::Singleton::Global() + .Get(engine_key_ + std::to_string(predictor_id_)); } } @@ -239,12 +237,14 @@ class TensorRTEngineOp : public framework::OperatorBase { TensorRTEngine *GetEngine(const framework::Scope &scope, const platform::Place &dev_place) const { if (!trt_engine_) { - trt_engine_.reset(new inference::tensorrt::TensorRTEngine( - max_batch_size_, workspace_size_, enable_int8_, calibrator_.get(), - device_id_)); - PrepareTRTEngine(scope, trt_engine_.get()); + trt_engine_ = + inference::Singleton::Global() + .Create(engine_key_ + std::to_string(predictor_id_), + max_batch_size_, workspace_size_, enable_int8_, + calibrator_.get(), device_id_); + PrepareTRTEngine(scope, trt_engine_); } - return trt_engine_.get(); + return trt_engine_; } void PrepareTRTEngine(const framework::Scope &scope, diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc b/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc index b39508a34d8..efc50fc06f4 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc @@ -102,6 +102,7 @@ TEST(TensorRTEngineOp, manual) { engine_op_desc.SetAttr("workspace_size", static_cast(1 << 20)); engine_op_desc.SetAttr("parameters", std::vector({})); engine_op_desc.SetAttr("engine_key", std::string("a_engine")); + engine_op_desc.SetAttr("predictor_id", 1); engine_op_desc.SetAttr("calibration_data", std::string("")); engine_op_desc.SetAttr("enable_int8", static_cast(false)); engine_op_desc.SetAttr("use_calib_mode", static_cast(false)); @@ -201,6 +202,7 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) { engine_op_desc.SetAttr("parameters", std::vector({"y0", "y1", "y2", "y3"})); engine_op_desc.SetAttr("engine_key", std::string("b_engine")); + engine_op_desc.SetAttr("predictor_id", 1); engine_op_desc.SetAttr("calibration_data", std::string("")); engine_op_desc.SetAttr("enable_int8", static_cast(false)); engine_op_desc.SetAttr("use_calib_mode", static_cast(false)); -- GitLab