From 0ed63b2108fdbfb683140765dd5a378697593659 Mon Sep 17 00:00:00 2001 From: nhzlx Date: Tue, 26 Feb 2019 06:11:09 +0000 Subject: [PATCH] 6. delete useless predictor id test=develop --- paddle/fluid/inference/analysis/argument.h | 4 -- .../inference/analysis/ir_pass_manager.cc | 1 - .../ir_passes/tensorrt_subgraph_pass.cc | 20 ++++------ .../fluid/inference/api/analysis_predictor.cc | 1 - .../fluid/inference/api/analysis_predictor.h | 5 +-- paddle/fluid/inference/tensorrt/engine.h | 37 ------------------- .../tensorrt/plugin/trt_plugin_factory.h | 3 +- .../tensorrt/plugin/trt_plugin_utils.h | 7 ++++ .../operators/tensorrt/tensorrt_engine_op.h | 31 ++++++---------- 9 files changed, 29 insertions(+), 80 deletions(-) diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index c8c25086db..2f31b182af 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -99,10 +99,6 @@ struct Argument { private: \ unique_ptr_t field__##_; - // Each predictor has an unique id. - // For now, this attr will help us to get the right - // trt_engine for each trt_engine_op for each predictor when using trt. - DECL_ARGUMENT_FIELD(predictor_id, PredictorID, int); // Model path DECL_ARGUMENT_FIELD(model_dir, ModelDir, std::string); // Model specified with program and parameters files. diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 2b3653bce4..3fc125d8e1 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -81,7 +81,6 @@ void IRPassManager::CreatePasses(Argument *argument, pass->Set( "model_opt_cache_dir", new std::string(GetOrCreateModelOptCacheDir(model_opt_cache_dir))); - pass->Set("predictor_id", new int(argument->predictor_id())); pass->Set("gpu_device_id", new int(argument->gpu_device_id())); } diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index 6f23330d6d..2b5ae2a840 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -209,9 +209,8 @@ void TensorRtSubgraphPass::CreateTensorRTOp( SetAttr(op_desc->Proto(), "parameters", params); auto enable_int8 = Get("enable_int8"); - int predictor_id = Get("predictor_id"); auto engine_key = GenerateEngineKey(input_names_with_id, output_names_with_id, - std::to_string(predictor_id)); + std::to_string(0)); // Get "" when there is no cached calibration table data. std::string calibration_data = GetTrtCalibTableData( @@ -221,9 +220,6 @@ void TensorRtSubgraphPass::CreateTensorRTOp( SetAttr(op_desc->Proto(), "enable_int8", enable_int8); SetAttr(op_desc->Proto(), "engine_key", engine_key); SetAttr(op_desc->Proto(), "engine_serialized_data", std::string("")); - SetAttr(op_desc->Proto(), "engine_serialized_data_path", - GetTrtEngineSerializedPath(Get("model_opt_cache_dir"), - engine_key)); std::unique_ptr calibrator; if (enable_int8 && calibration_data.size() != 0) { @@ -239,13 +235,13 @@ void TensorRtSubgraphPass::CreateTensorRTOp( std::string trt_engine_serialized_data = GetTrtEngineSerializedData( Get("model_opt_cache_dir"), engine_key); - tensorrt::TensorRTEngine *trt_engine = - inference::Singleton::Global().Create( - Get("max_batch_size"), Get("workspace_size"), enable_int8, - calibrator.get(), engine_key, Get("gpu_device_id")); if (trt_engine_serialized_data.size() == 0) { LOG(INFO) << "Prepare TRT engine (Optimize model structure, Select OP " "kernel etc). This process may cost a lot of time."; + std::unique_ptr trt_engine( + new tensorrt::TensorRTEngine( + Get("max_batch_size"), Get("workspace_size"), + enable_int8, calibrator.get(), Get("gpu_device_id"))); auto *scope = param_scope(); framework::BlockDesc block_desc_temp(nullptr, block_desc.Proto()); std::unordered_set param_set(params.begin(), params.end()); @@ -253,7 +249,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp( .ConvertBlockToTRTEngine( &block_desc_temp, *scope, std::vector(input_names.begin(), input_names.end()), - param_set, output_mapping, trt_engine); + param_set, output_mapping, trt_engine.get()); nvinfer1::IHostMemory *serialized_engine_data = trt_engine->Serialize(); trt_engine_serialized_data = std::string((const char *)serialized_engine_data->data(), @@ -263,11 +259,11 @@ void TensorRtSubgraphPass::CreateTensorRTOp( engine_key), trt_engine_serialized_data); } else { - LOG(INFO) << "Load TRT Engine from optimized serialized data : " + LOG(INFO) << "Load TRT Optimized Info from " << GetTrtEngineSerializedPath( Get("model_opt_cache_dir"), engine_key); - trt_engine->Deserialize(trt_engine_serialized_data); } + SetAttr(op_desc->Proto(), "engine_serialized_data", trt_engine_serialized_data); } diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 7149f16b36..da2e9803f0 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -342,7 +342,6 @@ void AnalysisPredictor::OptimizeInferenceProgram() { config_.static_memory_optim_force_update_); argument_.SetModelFromMemory(config_.model_from_memory_); // Analyze inference_program - argument_.SetPredictorID(predictor_id_); if (!config_.model_dir().empty()) { argument_.SetModelDir(config_.model_dir()); } else { diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index 732ea8061b..9ff9174305 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -44,9 +44,7 @@ using framework::NaiveExecutor; */ class AnalysisPredictor : public PaddlePredictor { public: - explicit AnalysisPredictor(const AnalysisConfig &config) : config_(config) { - predictor_id_ = inference::GetUniqueId(); - } + explicit AnalysisPredictor(const AnalysisConfig &config) : config_(config) {} ~AnalysisPredictor(); bool Init(const std::shared_ptr &parent_scope, @@ -146,7 +144,6 @@ class AnalysisPredictor : public PaddlePredictor { const size_t max_shape_collect_count_{1000}; int need_collect_var_shapes_{-1}; // -1 for default, 0 for false, 1 for true. std::vector>> batch_var_shapes_; - int predictor_id_; private: // Some status here that help to determine the status inside the predictor. diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 6abc9a1f08..657dfd9355 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -199,43 +199,6 @@ class TensorRTEngine { #define TRT_ENGINE_ADD_LAYER(engine__, layer__, ARGS...) \ engine__->network()->add##layer__(ARGS); -/* - * Helper to control the TensorRT engine's creation and deletion. - */ -class TRTEngineManager { - public: - bool HasEngine(const std::string& name) const { - if (engines_.count(name) == 0) return false; - return engines_.at(name).get() != nullptr; - } - - // Get an engine called `name`. - TensorRTEngine* Get(const std::string& name) const { - return engines_.at(name).get(); - } - - // Create or get an engine called `name` - TensorRTEngine* Create(int max_batch, int max_workspace, bool enable_int8, - TRTInt8Calibrator* calibrator, - const std::string& engine_name, int device_id = 0) { - std::unique_lock lk(mut_); - auto* p = new TensorRTEngine(max_batch, max_workspace, enable_int8, - calibrator, device_id); - engines_[engine_name].reset(p); - return p; - } - - void DeleteALL() { - for (auto& item : engines_) { - item.second.reset(nullptr); - } - } - - private: - std::unordered_map> engines_; - std::mutex mut_; -}; - } // namespace tensorrt } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h index 03992f88b5..061dd30497 100644 --- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h +++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h @@ -31,7 +31,8 @@ namespace inference { namespace tensorrt { namespace plugin { -class PluginFactoryTensorRT : public nvinfer1::IPluginFactory { +class PluginFactoryTensorRT : public nvinfer1::IPluginFactory, + public DeleteHelper { public: // Deserialization method PluginTensorRT* createPlugin(const char* layer_name, const void* serial_data, diff --git a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h index 55ca681c78..1cae4ccae4 100644 --- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h +++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h @@ -24,6 +24,13 @@ namespace inference { namespace tensorrt { namespace plugin { +// Some trt base classes lack of the destructor. +// We use a assisted class to fix this. +struct DeleteHelper { + protected: + virtual ~DeleteHelper() {} +}; + template inline void SerializeValue(void** buffer, T const& value); diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index cb6412115b..3f98b0a934 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -41,7 +41,7 @@ class TensorRTEngineOp : public framework::OperatorBase { private: std::vector input_names_; std::unordered_set param_names_; - mutable TensorRTEngine *trt_engine_; + mutable std::unique_ptr trt_engine_; int max_batch_size_; int workspace_size_; std::unique_ptr calibrator_; @@ -64,7 +64,6 @@ class TensorRTEngineOp : public framework::OperatorBase { calibration_data_ = Attr("calibration_data"); engine_key_ = Attr("engine_key"); engine_serialized_data_ = Attr("engine_serialized_data"); - trt_engine_ = nullptr; auto params = Attr>("parameters"); for (const auto ¶m : params) { @@ -78,16 +77,6 @@ class TensorRTEngineOp : public framework::OperatorBase { if (enable_int8_ && calibration_data_.size()) { calibrator_.reset(new TRTInt8Calibrator(calibration_data_)); } - - // we will create an engine here. - if (!calibration_mode_) { - if (inference::Singleton::Global() - .HasEngine(engine_key_)) { - trt_engine_ = inference::Singleton< - inference::tensorrt::TRTEngineManager>::Global() - .Get(engine_key_); - } - } } protected: @@ -231,15 +220,17 @@ class TensorRTEngineOp : public framework::OperatorBase { TensorRTEngine *GetEngine(const framework::Scope &scope, const platform::Place &dev_place) const { - if (trt_engine_ == nullptr) { - trt_engine_ = - inference::Singleton::Global() - .Create(max_batch_size_, workspace_size_, enable_int8_, - calibrator_.get(), engine_key_, - boost::get(dev_place).device); - PrepareTRTEngine(scope, trt_engine_); + if (trt_engine_.get() == nullptr) { + trt_engine_.reset(new inference::tensorrt::TensorRTEngine( + max_batch_size_, workspace_size_, enable_int8_, calibrator_.get(), + boost::get(dev_place).device)); + if (engine_serialized_data_.size() > 0) { + trt_engine_->Deserialize(engine_serialized_data_); + } else { + PrepareTRTEngine(scope, trt_engine_.get()); + } } - return trt_engine_; + return trt_engine_.get(); } void PrepareTRTEngine(const framework::Scope &scope, -- GitLab