From 6e0cf610125a7a74b2729fcc296f26f56e805c33 Mon Sep 17 00:00:00 2001 From: Tian Zheng Date: Tue, 23 May 2023 13:55:31 +0800 Subject: [PATCH] Fix trt runtime destroy issue (#53937) --- paddle/fluid/inference/tensorrt/engine.cc | 20 ++++++++++---------- paddle/fluid/inference/tensorrt/engine.h | 1 + 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 5844626c373..4cd36bd0451 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -370,9 +370,9 @@ void TensorRTEngine::FreezeNetwork() { #else ihost_memory_.reset(infer_builder_->buildSerializedNetwork( *network(), *infer_builder_config_)); - infer_ptr runtime(createInferRuntime(&logger_)); - infer_engine_.reset(runtime->deserializeCudaEngine(ihost_memory_->data(), - ihost_memory_->size())); + infer_runtime_.reset(createInferRuntime(&logger_)); + infer_engine_.reset(infer_runtime_->deserializeCudaEngine( + ihost_memory_->data(), ihost_memory_->size())); #endif PADDLE_ENFORCE_NOT_NULL( @@ -559,31 +559,31 @@ std::unordered_map void TensorRTEngine::Deserialize(const std::string &engine_serialized_data) { freshDeviceId(); - infer_ptr runtime(createInferRuntime(&logger_)); + infer_runtime_.reset(createInferRuntime(&logger_)); if (use_dla_) { if (precision_ != phi::DataType::INT8 && precision_ != phi::DataType::FLOAT16) { LOG(WARNING) << "TensorRT DLA must be used with int8 or fp16, but you " "set float32, so DLA is not used."; - } else if (runtime->getNbDLACores() == 0) { + } else if (infer_runtime_->getNbDLACores() == 0) { LOG(WARNING) << "TensorRT DLA is set by config, but your device does not have " "DLA, so DLA is not used."; } else { - if (dla_core_ < 0 || dla_core_ >= runtime->getNbDLACores()) { + if (dla_core_ < 0 || dla_core_ >= infer_runtime_->getNbDLACores()) { dla_core_ = 0; LOG(WARNING) << "Invalid DLACore, must be 0 < DLACore < " - << runtime->getNbDLACores() << ", but got " << dla_core_ - << ", so use use 0 as default."; + << infer_runtime_->getNbDLACores() << ", but got " + << dla_core_ << ", so use use 0 as default."; } - runtime->setDLACore(dla_core_); + infer_runtime_->setDLACore(dla_core_); LOG(INFO) << "TensorRT DLA enabled in Deserialize(), DLACore " << dla_core_; } } - infer_engine_.reset(runtime->deserializeCudaEngine( + infer_engine_.reset(infer_runtime_->deserializeCudaEngine( engine_serialized_data.c_str(), engine_serialized_data.size())); PADDLE_ENFORCE_NOT_NULL( diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index d203b50a0d6..1f69bbfba09 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -811,6 +811,7 @@ class TensorRTEngine { // TensorRT related internal members infer_ptr infer_builder_; infer_ptr infer_network_; + infer_ptr infer_runtime_; infer_ptr infer_engine_; std::unordered_map> infer_context_; -- GitLab