diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index c8c25086db1d58089604555b6c46dac0e6a1251e..2f31b182af7293488719e41a92b2ea78709bda02 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -99,10 +99,6 @@ struct Argument { private: \ unique_ptr_t field__##_; - // Each predictor has an unique id. - // For now, this attr will help us to get the right - // trt_engine for each trt_engine_op for each predictor when using trt. - DECL_ARGUMENT_FIELD(predictor_id, PredictorID, int); // Model path DECL_ARGUMENT_FIELD(model_dir, ModelDir, std::string); // Model specified with program and parameters files. diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 3e5525b1ec3190340525e79b1daa587dca44c14a..16973aeb865eb5a6eaac74d304357219fa1f124a 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -81,7 +81,6 @@ void IRPassManager::CreatePasses(Argument *argument, pass->Set( "model_opt_cache_dir", new std::string(GetOrCreateModelOptCacheDir(model_opt_cache_dir))); - pass->Set("predictor_id", new int(argument->predictor_id())); pass->Set("gpu_device_id", new int(argument->gpu_device_id())); } diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index 6f23330d6d05741bdcb8a22345cb5974c4a99f92..2b5ae2a840b3b3da3380107b54d5cd8723e9914d 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -209,9 +209,8 @@ void TensorRtSubgraphPass::CreateTensorRTOp( SetAttr(op_desc->Proto(), "parameters", params); auto enable_int8 = Get("enable_int8"); - int predictor_id = Get("predictor_id"); auto engine_key = GenerateEngineKey(input_names_with_id, output_names_with_id, - std::to_string(predictor_id)); + std::to_string(0)); // Get "" when there is no cached calibration table data. std::string calibration_data = GetTrtCalibTableData( @@ -221,9 +220,6 @@ void TensorRtSubgraphPass::CreateTensorRTOp( SetAttr(op_desc->Proto(), "enable_int8", enable_int8); SetAttr(op_desc->Proto(), "engine_key", engine_key); SetAttr(op_desc->Proto(), "engine_serialized_data", std::string("")); - SetAttr(op_desc->Proto(), "engine_serialized_data_path", - GetTrtEngineSerializedPath(Get("model_opt_cache_dir"), - engine_key)); std::unique_ptr calibrator; if (enable_int8 && calibration_data.size() != 0) { @@ -239,13 +235,13 @@ void TensorRtSubgraphPass::CreateTensorRTOp( std::string trt_engine_serialized_data = GetTrtEngineSerializedData( Get("model_opt_cache_dir"), engine_key); - tensorrt::TensorRTEngine *trt_engine = - inference::Singleton::Global().Create( - Get("max_batch_size"), Get("workspace_size"), enable_int8, - calibrator.get(), engine_key, Get("gpu_device_id")); if (trt_engine_serialized_data.size() == 0) { LOG(INFO) << "Prepare TRT engine (Optimize model structure, Select OP " "kernel etc). This process may cost a lot of time."; + std::unique_ptr trt_engine( + new tensorrt::TensorRTEngine( + Get("max_batch_size"), Get("workspace_size"), + enable_int8, calibrator.get(), Get("gpu_device_id"))); auto *scope = param_scope(); framework::BlockDesc block_desc_temp(nullptr, block_desc.Proto()); std::unordered_set param_set(params.begin(), params.end()); @@ -253,7 +249,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp( .ConvertBlockToTRTEngine( &block_desc_temp, *scope, std::vector(input_names.begin(), input_names.end()), - param_set, output_mapping, trt_engine); + param_set, output_mapping, trt_engine.get()); nvinfer1::IHostMemory *serialized_engine_data = trt_engine->Serialize(); trt_engine_serialized_data = std::string((const char *)serialized_engine_data->data(), @@ -263,11 +259,11 @@ void TensorRtSubgraphPass::CreateTensorRTOp( engine_key), trt_engine_serialized_data); } else { - LOG(INFO) << "Load TRT Engine from optimized serialized data : " + LOG(INFO) << "Load TRT Optimized Info from " << GetTrtEngineSerializedPath( Get("model_opt_cache_dir"), engine_key); - trt_engine->Deserialize(trt_engine_serialized_data); } + SetAttr(op_desc->Proto(), "engine_serialized_data", trt_engine_serialized_data); } diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index b78da7787715e1378a2878c85f414f8efd090814..467d4411376381df950bb582f9c73410284a5e2d 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -345,7 +345,6 @@ void AnalysisPredictor::OptimizeInferenceProgram() { config_.static_memory_optim_force_update_); argument_.SetModelFromMemory(config_.model_from_memory_); // Analyze inference_program - argument_.SetPredictorID(predictor_id_); if (!config_.model_dir().empty()) { argument_.SetModelDir(config_.model_dir()); } else { diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index 7ad361616bf3797eaa088112460a4bef37e55642..b9d0fdc51ce3231d4710e99752886e6ef8141ae0 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -44,9 +44,7 @@ using framework::NaiveExecutor; */ class AnalysisPredictor : public PaddlePredictor { public: - explicit AnalysisPredictor(const AnalysisConfig &config) : config_(config) { - predictor_id_ = inference::GetUniqueId(); - } + explicit AnalysisPredictor(const AnalysisConfig &config) : config_(config) {} ~AnalysisPredictor(); bool Init(const std::shared_ptr &parent_scope, @@ -146,7 +144,6 @@ class AnalysisPredictor : public PaddlePredictor { const size_t max_shape_collect_count_{1000}; int need_collect_var_shapes_{-1}; // -1 for default, 0 for false, 1 for true. std::vector>> batch_var_shapes_; - int predictor_id_; private: // Some status here that help to determine the status inside the predictor. diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 6abc9a1f082dfea6955fa2d0750a95d61b98edcc..657dfd9355f9e3167a123b1f71655869d030a3df 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -199,43 +199,6 @@ class TensorRTEngine { #define TRT_ENGINE_ADD_LAYER(engine__, layer__, ARGS...) \ engine__->network()->add##layer__(ARGS); -/* - * Helper to control the TensorRT engine's creation and deletion. - */ -class TRTEngineManager { - public: - bool HasEngine(const std::string& name) const { - if (engines_.count(name) == 0) return false; - return engines_.at(name).get() != nullptr; - } - - // Get an engine called `name`. - TensorRTEngine* Get(const std::string& name) const { - return engines_.at(name).get(); - } - - // Create or get an engine called `name` - TensorRTEngine* Create(int max_batch, int max_workspace, bool enable_int8, - TRTInt8Calibrator* calibrator, - const std::string& engine_name, int device_id = 0) { - std::unique_lock lk(mut_); - auto* p = new TensorRTEngine(max_batch, max_workspace, enable_int8, - calibrator, device_id); - engines_[engine_name].reset(p); - return p; - } - - void DeleteALL() { - for (auto& item : engines_) { - item.second.reset(nullptr); - } - } - - private: - std::unordered_map> engines_; - std::mutex mut_; -}; - } // namespace tensorrt } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h index 03992f88b5bc34115a9d2fc1bfb9b7e0e6e7babb..061dd30497de2d515afdf4e06f0e7112f2885065 100644 --- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h +++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h @@ -31,7 +31,8 @@ namespace inference { namespace tensorrt { namespace plugin { -class PluginFactoryTensorRT : public nvinfer1::IPluginFactory { +class PluginFactoryTensorRT : public nvinfer1::IPluginFactory, + public DeleteHelper { public: // Deserialization method PluginTensorRT* createPlugin(const char* layer_name, const void* serial_data, diff --git a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h index 55ca681c788b6549cc37df4141ed2be7b7b14f35..1cae4ccae4cc593785d9b3b0e87523e740eef4ff 100644 --- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h +++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h @@ -24,6 +24,13 @@ namespace inference { namespace tensorrt { namespace plugin { +// Some trt base classes lack of the destructor. +// We use a assisted class to fix this. +struct DeleteHelper { + protected: + virtual ~DeleteHelper() {} +}; + template inline void SerializeValue(void** buffer, T const& value); diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index cb6412115b324f61a4a6b1eabfd4879a20617655..3f98b0a93406fc2ac5dec2663f4d80a923501bef 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -41,7 +41,7 @@ class TensorRTEngineOp : public framework::OperatorBase { private: std::vector input_names_; std::unordered_set param_names_; - mutable TensorRTEngine *trt_engine_; + mutable std::unique_ptr trt_engine_; int max_batch_size_; int workspace_size_; std::unique_ptr calibrator_; @@ -64,7 +64,6 @@ class TensorRTEngineOp : public framework::OperatorBase { calibration_data_ = Attr("calibration_data"); engine_key_ = Attr("engine_key"); engine_serialized_data_ = Attr("engine_serialized_data"); - trt_engine_ = nullptr; auto params = Attr>("parameters"); for (const auto ¶m : params) { @@ -78,16 +77,6 @@ class TensorRTEngineOp : public framework::OperatorBase { if (enable_int8_ && calibration_data_.size()) { calibrator_.reset(new TRTInt8Calibrator(calibration_data_)); } - - // we will create an engine here. - if (!calibration_mode_) { - if (inference::Singleton::Global() - .HasEngine(engine_key_)) { - trt_engine_ = inference::Singleton< - inference::tensorrt::TRTEngineManager>::Global() - .Get(engine_key_); - } - } } protected: @@ -231,15 +220,17 @@ class TensorRTEngineOp : public framework::OperatorBase { TensorRTEngine *GetEngine(const framework::Scope &scope, const platform::Place &dev_place) const { - if (trt_engine_ == nullptr) { - trt_engine_ = - inference::Singleton::Global() - .Create(max_batch_size_, workspace_size_, enable_int8_, - calibrator_.get(), engine_key_, - boost::get(dev_place).device); - PrepareTRTEngine(scope, trt_engine_); + if (trt_engine_.get() == nullptr) { + trt_engine_.reset(new inference::tensorrt::TensorRTEngine( + max_batch_size_, workspace_size_, enable_int8_, calibrator_.get(), + boost::get(dev_place).device)); + if (engine_serialized_data_.size() > 0) { + trt_engine_->Deserialize(engine_serialized_data_); + } else { + PrepareTRTEngine(scope, trt_engine_.get()); + } } - return trt_engine_; + return trt_engine_.get(); } void PrepareTRTEngine(const framework::Scope &scope,