diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 8a4afa325559d042a66c7ed25d85242841d91612..a3f3921db4ed2abbe80d6db54c39b3b43ac38d09 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -42,6 +42,8 @@ limitations under the License. */ #include "paddle/phi/core/stream.h" #include "paddle/utils/any.h" +DECLARE_bool(trt_ibuilder_cache); + namespace paddle { namespace inference { namespace tensorrt { @@ -679,16 +681,6 @@ class TensorRTEngine { std::vector> owned_plugin_v2ioext_; // TensorRT related internal members - template - struct Destroyer { - void operator()(T* x) { - if (x) { - x->destroy(); - } - } - }; - template - using infer_ptr = std::unique_ptr>; infer_ptr infer_builder_; infer_ptr infer_network_; infer_ptr infer_engine_; @@ -733,6 +725,15 @@ class TRTEngineManager { using AllocationPtr = phi::Allocator::AllocationPtr; public: + TRTEngineManager() { + // createInferBuilder loads trt kernels and take a few second + // But as long as one IBuilder lives, trt kernel will not be unloaded + // Hence, a persistent IBuilder to avoid TensorRT unload/reload kernels + if (FLAGS_trt_ibuilder_cache) { + holder_.reset(createInferBuilder(&NaiveLogger::Global())); + } + } + bool Empty() const { std::lock_guard lock(mutex_); return engines_.size() == 0; @@ -855,15 +856,7 @@ class TRTEngineManager { size_t max_ctx_mem_size_{0}; std::unordered_map context_memorys_; std::unordered_map> engines_; - // createInferBuilder loads trt kernels and take a few second - // But as long as one IBuilder lives, trt kernel will not be unloaded - // Hence, a persistent IBuilder to avoid TensorRT unload/reload kernels - std::unique_ptr> - holder{createInferBuilder(&NaiveLogger::Global()), [](auto* ptr) { - if (ptr) { - ptr->destroy(); - } - }}; + infer_ptr holder_; }; } // namespace tensorrt diff --git a/paddle/fluid/inference/tensorrt/helper.h b/paddle/fluid/inference/tensorrt/helper.h index 3d6bb6da3d7585cca4ec3ce2601d66505b7e816c..1effa5c671b79a6a4601ee94d193bed0d79bfd1e 100644 --- a/paddle/fluid/inference/tensorrt/helper.h +++ b/paddle/fluid/inference/tensorrt/helper.h @@ -95,6 +95,17 @@ static std::tuple GetTrtCompileVersion() { NV_TENSORRT_MAJOR, NV_TENSORRT_MINOR, NV_TENSORRT_PATCH}; } +template +struct Destroyer { + void operator()(T* x) { + if (x) { + x->destroy(); + } + } +}; +template +using infer_ptr = std::unique_ptr>; + // A logger for create TensorRT infer builder. class NaiveLogger : public nvinfer1::ILogger { public: diff --git a/paddle/phi/core/flags.cc b/paddle/phi/core/flags.cc index 6850d91b8dd4519453096363634fdc6ca92a979d..5f7058fdc1079c3ae0f8be7fe78cc5a56e8b5287 100644 --- a/paddle/phi/core/flags.cc +++ b/paddle/phi/core/flags.cc @@ -1165,3 +1165,16 @@ PADDLE_DEFINE_EXPORTED_bool(enable_cudnn_frontend, false, ""); */ PADDLE_DEFINE_EXPORTED_int32(cudnn_cache_saturation_count, 1, ""); #endif // PADDLE_WITH_CUDNN_FRONTEND + +/** + * CI related FLAG + * Name: trt_ibuilder_cache + * Since Version: 2.5.0 + * Value Range: bool, default=false + * Example: + * Note: This FLAG is only enabled when CI is running. If True, a persistent + * IBuilder is added to avoid TensorRT unload/reload kernels. + */ +PADDLE_DEFINE_EXPORTED_bool(trt_ibuilder_cache, + false, + "Add a persistent ibuilder."); diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 1587ff83fb8918b06e8aee403c184e33e4364a24..6b3a3c4487e3a0f78634f5cde682c27b962660f4 100644 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -1403,6 +1403,7 @@ EOF set -x # set trt_convert ut to run 15% cases. export TEST_NUM_PERCENT_CASES=0.15 + export FLAGS_trt_ibuilder_cache=1 precison_cases="" bash $PADDLE_ROOT/tools/check_added_ut.sh if [ ${PRECISION_TEST:-OFF} == "ON" ]; then @@ -2547,6 +2548,7 @@ EOF set -x # set trt_convert ut to run 15% cases. export TEST_NUM_PERCENT_CASES=0.15 + export FLAGS_trt_ibuilder_cache=1 precison_cases="" bash $PADDLE_ROOT/tools/check_added_ut.sh #check change of pr_unnitests and dev_unnitests