未验证 提交 fcd6d675 编写于 作者: X xiaoxiaohehe001 提交者: GitHub

[Paddle Inference] Add ci flags for a persistent IBuilder. (#49538)

上级 66d516ff
......@@ -42,6 +42,8 @@ limitations under the License. */
#include "paddle/phi/core/stream.h"
#include "paddle/utils/any.h"
DECLARE_bool(trt_ibuilder_cache);
namespace paddle {
namespace inference {
namespace tensorrt {
......@@ -679,16 +681,6 @@ class TensorRTEngine {
std::vector<std::unique_ptr<nvinfer1::IPluginV2IOExt>> owned_plugin_v2ioext_;
// TensorRT related internal members
template <typename T>
struct Destroyer {
void operator()(T* x) {
if (x) {
x->destroy();
}
}
};
template <typename T>
using infer_ptr = std::unique_ptr<T, Destroyer<T>>;
infer_ptr<nvinfer1::IBuilder> infer_builder_;
infer_ptr<nvinfer1::INetworkDefinition> infer_network_;
infer_ptr<nvinfer1::ICudaEngine> infer_engine_;
......@@ -733,6 +725,15 @@ class TRTEngineManager {
using AllocationPtr = phi::Allocator::AllocationPtr;
public:
TRTEngineManager() {
// createInferBuilder loads trt kernels and take a few second
// But as long as one IBuilder lives, trt kernel will not be unloaded
// Hence, a persistent IBuilder to avoid TensorRT unload/reload kernels
if (FLAGS_trt_ibuilder_cache) {
holder_.reset(createInferBuilder(&NaiveLogger::Global()));
}
}
bool Empty() const {
std::lock_guard<std::mutex> lock(mutex_);
return engines_.size() == 0;
......@@ -855,15 +856,7 @@ class TRTEngineManager {
size_t max_ctx_mem_size_{0};
std::unordered_map<PredictorID, AllocationPtr> context_memorys_;
std::unordered_map<std::string, std::unique_ptr<TensorRTEngine>> engines_;
// createInferBuilder loads trt kernels and take a few second
// But as long as one IBuilder lives, trt kernel will not be unloaded
// Hence, a persistent IBuilder to avoid TensorRT unload/reload kernels
std::unique_ptr<nvinfer1::IBuilder, std::function<void(nvinfer1::IBuilder*)>>
holder{createInferBuilder(&NaiveLogger::Global()), [](auto* ptr) {
if (ptr) {
ptr->destroy();
}
}};
infer_ptr<nvinfer1::IBuilder> holder_;
};
} // namespace tensorrt
......
......@@ -95,6 +95,17 @@ static std::tuple<int, int, int> GetTrtCompileVersion() {
NV_TENSORRT_MAJOR, NV_TENSORRT_MINOR, NV_TENSORRT_PATCH};
}
template <typename T>
struct Destroyer {
void operator()(T* x) {
if (x) {
x->destroy();
}
}
};
template <typename T>
using infer_ptr = std::unique_ptr<T, Destroyer<T>>;
// A logger for create TensorRT infer builder.
class NaiveLogger : public nvinfer1::ILogger {
public:
......
......@@ -1165,3 +1165,16 @@ PADDLE_DEFINE_EXPORTED_bool(enable_cudnn_frontend, false, "");
*/
PADDLE_DEFINE_EXPORTED_int32(cudnn_cache_saturation_count, 1, "");
#endif // PADDLE_WITH_CUDNN_FRONTEND
/**
* CI related FLAG
* Name: trt_ibuilder_cache
* Since Version: 2.5.0
* Value Range: bool, default=false
* Example:
* Note: This FLAG is only enabled when CI is running. If True, a persistent
* IBuilder is added to avoid TensorRT unload/reload kernels.
*/
PADDLE_DEFINE_EXPORTED_bool(trt_ibuilder_cache,
false,
"Add a persistent ibuilder.");
......@@ -1403,6 +1403,7 @@ EOF
set -x
# set trt_convert ut to run 15% cases.
export TEST_NUM_PERCENT_CASES=0.15
export FLAGS_trt_ibuilder_cache=1
precison_cases=""
bash $PADDLE_ROOT/tools/check_added_ut.sh
if [ ${PRECISION_TEST:-OFF} == "ON" ]; then
......@@ -2547,6 +2548,7 @@ EOF
set -x
# set trt_convert ut to run 15% cases.
export TEST_NUM_PERCENT_CASES=0.15
export FLAGS_trt_ibuilder_cache=1
precison_cases=""
bash $PADDLE_ROOT/tools/check_added_ut.sh
#check change of pr_unnitests and dev_unnitests
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册