未验证 提交 2abf4326 编写于 作者: M ming1753 提交者: GitHub

Add MarkTrtEngineOutputs API (#56188)

* [paddle-TRT] support mark output

* [fix bug] hook function only call one in different predictor

* add api test
上级 df445c1c
......@@ -240,6 +240,10 @@ struct Argument {
DECL_ARGUMENT_FIELD(tensorrt_max_batch_size, TensorRtMaxBatchSize, int);
DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int64_t);
DECL_ARGUMENT_FIELD(tensorrt_min_subgraph_size, TensorRtMinSubgraphSize, int);
DECL_ARGUMENT_FIELD(trt_mark_output, TRTMarkOutput, bool);
DECL_ARGUMENT_FIELD(trt_output_tensor_names,
TRTOutputTensorNames,
std::vector<std::string>);
DECL_ARGUMENT_FIELD(tensorrt_disabled_ops,
TensorRtDisabledOPs,
std::vector<std::string>);
......
......@@ -160,6 +160,10 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("max_batch_size", new int(argument->tensorrt_max_batch_size()));
pass->Set("min_subgraph_size",
new int(argument->tensorrt_min_subgraph_size()));
pass->Set("mark_output", new bool(argument->trt_mark_output()));
pass->Set(
"output_tensor_names",
new std::vector<std::string>(argument->trt_output_tensor_names()));
pass->Set("program",
new framework::ProgramDesc *(&argument->main_program()));
pass->Set("predictor_id", new int(argument->predictor_id()));
......
......@@ -371,6 +371,40 @@ std::string TensorRtSubgraphPass::CreateTensorRTOp(
// record the origin output data type
std::vector<int> origin_outputs_dtype;
std::map<std::string, int> map_origin_outputs_dtype;
// Whether to mark Outpus
auto mark_output = Get<bool>("mark_output");
auto output_tensor_name =
Get<std::vector<std::string>>("output_tensor_names");
VLOG(1) << "mark Output: " << mark_output;
if (mark_output == 1) {
VLOG(1) << "begin to mark output ...";
for (auto node : subgraph) {
if (node->NodeType() == Node::Type::kOperation) {
if (node->Op()->Outputs().count("Xshape")) continue;
for (auto *x : node->outputs) {
if (std::count(parameters.begin(), parameters.end(), x->Name()) > 0)
continue;
if (!output_tensor_name.empty() &&
std::count(output_tensor_name.begin(),
output_tensor_name.end(),
x->Name())) {
VLOG(1) << "output " << x->Name() << " has been marked";
std::string output_name_withid =
x->Name() + std::to_string(x->id());
output_names.insert(x->Name());
output_names_with_id.insert(output_name_withid);
origin_name_output_rank[x->Name()] = x->Var()->GetShape().size();
trt_outputs.insert(x);
map_origin_outputs_dtype[x->Name()] =
static_cast<int>(x->Var()->GetDataType());
}
}
}
}
}
for (auto *x : node->outputs) {
output_names.insert(x->Name());
output_names_with_id.insert(x->Name() + std::to_string(x->id()));
......
......@@ -459,6 +459,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(tensorrt_max_batchsize_);
CP_MEMBER(tensorrt_min_subgraph_size_);
CP_MEMBER(tensorrt_precision_mode_);
CP_MEMBER(trt_mark_output_);
CP_MEMBER(trt_output_tensor_names_);
CP_MEMBER(trt_disabled_ops_);
CP_MEMBER(trt_use_dla_);
CP_MEMBER(trt_dla_core_);
......@@ -757,6 +759,12 @@ void AnalysisConfig::EnableTensorRtEngine(int64_t workspace_size,
#endif
}
void AnalysisConfig::MarkTrtEngineOutputs(
const std::vector<std::string> &output_tensor_names) {
trt_mark_output_ = true;
trt_output_tensor_names_ = output_tensor_names;
}
void AnalysisConfig::EnableTensorRTMemoryOptim(bool engine_memory_sharing,
int sharing_identifier) {
PADDLE_ENFORCE_EQ(
......@@ -1050,6 +1058,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << tensorrt_workspace_size_;
ss << tensorrt_max_batchsize_;
ss << tensorrt_min_subgraph_size_;
ss << trt_mark_output_;
ss << use_dlnne_;
ss << dlnne_min_subgraph_size_;
......@@ -1331,6 +1340,7 @@ std::string AnalysisConfig::Summary() {
}
os.InsertRow({"trt_engine_memory_sharing",
trt_engine_memory_sharing_ ? "true" : "false"});
os.InsertRow({"trt_mark_output", trt_mark_output_ ? "true" : "false"});
#endif
}
}
......
......@@ -1386,6 +1386,8 @@ void AnalysisPredictor::PrepareArgument() {
argument_->SetTensorRtWorkspaceSize(config_.tensorrt_workspace_size_);
argument_->SetTensorRtMaxBatchSize(config_.tensorrt_max_batchsize_);
argument_->SetTensorRtMinSubgraphSize(config_.tensorrt_min_subgraph_size_);
argument_->SetTRTMarkOutput(config_.trt_mark_output_);
argument_->SetTRTOutputTensorNames(config_.trt_output_tensor_names_);
argument_->SetTensorRtDisabledOPs(config_.trt_disabled_ops_);
argument_->SetTensorRtUseDLA(config_.trt_use_dla_);
argument_->SetTensorRtDLACore(config_.trt_dla_core_);
......@@ -2695,8 +2697,7 @@ void AnalysisPredictor::SaveOptimModel(const std::string &dir) {
}
void AnalysisPredictor::RegisterInputHook(const InputTensorHookFunc &hookfunc) {
static std::once_flag register_input_hook_flag;
std::call_once(register_input_hook_flag, [this] {
std::call_once(register_input_hook_flag_, [this] {
executor_->RegisterInputHook(
[this](framework::OperatorBase *op, framework::Scope *scope) {
for (auto &input : op->Inputs()) {
......@@ -2719,8 +2720,7 @@ void AnalysisPredictor::RegisterInputHook(const InputTensorHookFunc &hookfunc) {
void AnalysisPredictor::RegisterOutputHook(
const OutputTensorHookFunc &hookfunc) {
static std::once_flag register_output_hook_flag;
std::call_once(register_output_hook_flag, [this] {
std::call_once(register_output_hook_flag_, [this] {
executor_->RegisterOutputHook(
[this](framework::OperatorBase *op, framework::Scope *scope) {
for (auto &output : op->Outputs()) {
......
......@@ -571,6 +571,8 @@ class AnalysisPredictor : public PaddlePredictor {
std::map<size_t, std::string> idx2feeds_;
std::vector<framework::OpDesc *> fetches_;
std::map<size_t, std::string> idx2fetches_;
std::once_flag register_input_hook_flag_;
std::once_flag register_output_hook_flag_;
phi::DataType model_precision_{phi::DataType::FLOAT32};
......
......@@ -690,6 +690,13 @@ struct PD_INFER_DECL AnalysisConfig {
///
bool tensorrt_engine_enabled() const { return use_tensorrt_; }
///
/// \brief Whether to get the intermediate output of TensorRT Engine.
///
/// \param output_tensor_names The name of the Tensor that needs to be marked
///
void MarkTrtEngineOutputs(
const std::vector<std::string>& output_tensor_names = {});
///
/// \brief Turn on the TensorRT memory optimization.
///
/// \param engine_memory_sharing Whether to enable TensorRT memory
......@@ -1204,6 +1211,8 @@ struct PD_INFER_DECL AnalysisConfig {
bool trt_use_cuda_graph_{false};
bool trt_use_varseqlen_{false};
bool trt_with_interleaved_{false};
bool trt_mark_output_{false};
std::vector<std::string> trt_output_tensor_names_{};
std::string tensorrt_transformer_posid_{""};
std::string tensorrt_transformer_maskid_{""};
bool trt_use_dla_{false};
......
......@@ -871,6 +871,9 @@ void BindAnalysisConfig(py::module *m) {
py::arg("disable_trt_plugin_fp16") = false)
.def("tensorrt_dynamic_shape_enabled",
&AnalysisConfig::tensorrt_dynamic_shape_enabled)
.def("mark_trt_engine_outputs",
&AnalysisConfig::MarkTrtEngineOutputs,
py::arg("output_tensor_names") = std::vector<std::string>({}))
.def("enable_tensorrt_varseqlen", &AnalysisConfig::EnableVarseqlen)
.def("tensorrt_varseqlen_enabled",
&AnalysisConfig::tensorrt_varseqlen_enabled)
......
......@@ -978,6 +978,14 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST)
paddle_inference_shared
ARGS
--infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models)
inference_analysis_test(
trt_mark_trt_engine_outputs_test
SRCS
trt_mark_trt_engine_outputs_test.cc
EXTRA_DEPS
paddle_inference_shared
ARGS
--infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models)
inference_analysis_test(
trt_fc_prelu_test
SRCS
......@@ -1370,6 +1378,8 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST)
set_tests_properties(test_trt_dynamic_shape_ernie_fp16_ser_deser
PROPERTIES TIMEOUT 300)
set_tests_properties(test_trt_dynamic_shape_ernie PROPERTIES TIMEOUT 480)
set_tests_properties(trt_mark_trt_engine_outputs_test PROPERTIES TIMEOUT
300)
endif()
if(WITH_MKLDNN)
......
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "gflags/gflags.h"
#include "test/cpp/inference/api/trt_test_helper.h"
namespace paddle {
namespace inference {
TEST(TensorRT, mark_trt_engine_outputs) {
std::string model_dir = FLAGS_infer_model + "/resnet50";
AnalysisConfig config;
config.SetModel(model_dir);
config.EnableUseGpu(100, 0);
config.EnableTensorRtEngine(
1 << 30, 1, 5, AnalysisConfig::Precision::kFloat32, false, false);
// The name of the tensor that needs to be marked, the default is empty (all
// marks)
std::vector<std::string> markOutput = {"fc_0.tmp_0", "fc_0.tmp_1"};
config.MarkTrtEngineOutputs(markOutput);
std::vector<std::vector<PaddleTensor>> inputs_all;
auto predictor = CreatePaddlePredictor(config);
SetFakeImageInput(&inputs_all, model_dir, false, "__model__", "");
std::vector<PaddleTensor> outputs;
for (auto &input : inputs_all) {
ASSERT_TRUE(predictor->Run(input, &outputs));
predictor->ClearIntermediateTensor();
}
}
} // namespace inference
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册