未验证 提交 fcd6d675 编写于 作者: X xiaoxiaohehe001 提交者: GitHub

[Paddle Inference] Add ci flags for a persistent IBuilder. (#49538)

上级 66d516ff
...@@ -42,6 +42,8 @@ limitations under the License. */ ...@@ -42,6 +42,8 @@ limitations under the License. */
#include "paddle/phi/core/stream.h" #include "paddle/phi/core/stream.h"
#include "paddle/utils/any.h" #include "paddle/utils/any.h"
DECLARE_bool(trt_ibuilder_cache);
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace tensorrt { namespace tensorrt {
...@@ -679,16 +681,6 @@ class TensorRTEngine { ...@@ -679,16 +681,6 @@ class TensorRTEngine {
std::vector<std::unique_ptr<nvinfer1::IPluginV2IOExt>> owned_plugin_v2ioext_; std::vector<std::unique_ptr<nvinfer1::IPluginV2IOExt>> owned_plugin_v2ioext_;
// TensorRT related internal members // TensorRT related internal members
template <typename T>
struct Destroyer {
void operator()(T* x) {
if (x) {
x->destroy();
}
}
};
template <typename T>
using infer_ptr = std::unique_ptr<T, Destroyer<T>>;
infer_ptr<nvinfer1::IBuilder> infer_builder_; infer_ptr<nvinfer1::IBuilder> infer_builder_;
infer_ptr<nvinfer1::INetworkDefinition> infer_network_; infer_ptr<nvinfer1::INetworkDefinition> infer_network_;
infer_ptr<nvinfer1::ICudaEngine> infer_engine_; infer_ptr<nvinfer1::ICudaEngine> infer_engine_;
...@@ -733,6 +725,15 @@ class TRTEngineManager { ...@@ -733,6 +725,15 @@ class TRTEngineManager {
using AllocationPtr = phi::Allocator::AllocationPtr; using AllocationPtr = phi::Allocator::AllocationPtr;
public: public:
TRTEngineManager() {
// createInferBuilder loads trt kernels and take a few second
// But as long as one IBuilder lives, trt kernel will not be unloaded
// Hence, a persistent IBuilder to avoid TensorRT unload/reload kernels
if (FLAGS_trt_ibuilder_cache) {
holder_.reset(createInferBuilder(&NaiveLogger::Global()));
}
}
bool Empty() const { bool Empty() const {
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);
return engines_.size() == 0; return engines_.size() == 0;
...@@ -855,15 +856,7 @@ class TRTEngineManager { ...@@ -855,15 +856,7 @@ class TRTEngineManager {
size_t max_ctx_mem_size_{0}; size_t max_ctx_mem_size_{0};
std::unordered_map<PredictorID, AllocationPtr> context_memorys_; std::unordered_map<PredictorID, AllocationPtr> context_memorys_;
std::unordered_map<std::string, std::unique_ptr<TensorRTEngine>> engines_; std::unordered_map<std::string, std::unique_ptr<TensorRTEngine>> engines_;
// createInferBuilder loads trt kernels and take a few second infer_ptr<nvinfer1::IBuilder> holder_;
// But as long as one IBuilder lives, trt kernel will not be unloaded
// Hence, a persistent IBuilder to avoid TensorRT unload/reload kernels
std::unique_ptr<nvinfer1::IBuilder, std::function<void(nvinfer1::IBuilder*)>>
holder{createInferBuilder(&NaiveLogger::Global()), [](auto* ptr) {
if (ptr) {
ptr->destroy();
}
}};
}; };
} // namespace tensorrt } // namespace tensorrt
......
...@@ -95,6 +95,17 @@ static std::tuple<int, int, int> GetTrtCompileVersion() { ...@@ -95,6 +95,17 @@ static std::tuple<int, int, int> GetTrtCompileVersion() {
NV_TENSORRT_MAJOR, NV_TENSORRT_MINOR, NV_TENSORRT_PATCH}; NV_TENSORRT_MAJOR, NV_TENSORRT_MINOR, NV_TENSORRT_PATCH};
} }
template <typename T>
struct Destroyer {
void operator()(T* x) {
if (x) {
x->destroy();
}
}
};
template <typename T>
using infer_ptr = std::unique_ptr<T, Destroyer<T>>;
// A logger for create TensorRT infer builder. // A logger for create TensorRT infer builder.
class NaiveLogger : public nvinfer1::ILogger { class NaiveLogger : public nvinfer1::ILogger {
public: public:
......
...@@ -1165,3 +1165,16 @@ PADDLE_DEFINE_EXPORTED_bool(enable_cudnn_frontend, false, ""); ...@@ -1165,3 +1165,16 @@ PADDLE_DEFINE_EXPORTED_bool(enable_cudnn_frontend, false, "");
*/ */
PADDLE_DEFINE_EXPORTED_int32(cudnn_cache_saturation_count, 1, ""); PADDLE_DEFINE_EXPORTED_int32(cudnn_cache_saturation_count, 1, "");
#endif // PADDLE_WITH_CUDNN_FRONTEND #endif // PADDLE_WITH_CUDNN_FRONTEND
/**
* CI related FLAG
* Name: trt_ibuilder_cache
* Since Version: 2.5.0
* Value Range: bool, default=false
* Example:
* Note: This FLAG is only enabled when CI is running. If True, a persistent
* IBuilder is added to avoid TensorRT unload/reload kernels.
*/
PADDLE_DEFINE_EXPORTED_bool(trt_ibuilder_cache,
false,
"Add a persistent ibuilder.");
...@@ -1403,6 +1403,7 @@ EOF ...@@ -1403,6 +1403,7 @@ EOF
set -x set -x
# set trt_convert ut to run 15% cases. # set trt_convert ut to run 15% cases.
export TEST_NUM_PERCENT_CASES=0.15 export TEST_NUM_PERCENT_CASES=0.15
export FLAGS_trt_ibuilder_cache=1
precison_cases="" precison_cases=""
bash $PADDLE_ROOT/tools/check_added_ut.sh bash $PADDLE_ROOT/tools/check_added_ut.sh
if [ ${PRECISION_TEST:-OFF} == "ON" ]; then if [ ${PRECISION_TEST:-OFF} == "ON" ]; then
...@@ -2547,6 +2548,7 @@ EOF ...@@ -2547,6 +2548,7 @@ EOF
set -x set -x
# set trt_convert ut to run 15% cases. # set trt_convert ut to run 15% cases.
export TEST_NUM_PERCENT_CASES=0.15 export TEST_NUM_PERCENT_CASES=0.15
export FLAGS_trt_ibuilder_cache=1
precison_cases="" precison_cases=""
bash $PADDLE_ROOT/tools/check_added_ut.sh bash $PADDLE_ROOT/tools/check_added_ut.sh
#check change of pr_unnitests and dev_unnitests #check change of pr_unnitests and dev_unnitests
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册