未验证 提交 179d4264 编写于 作者: M ming1753 提交者: GitHub

Modify MarkTrtEngineOutputs API (#56858)

* Modify MarkTrtEngineOutputs API
上级 8aa1772c
...@@ -241,6 +241,7 @@ struct Argument { ...@@ -241,6 +241,7 @@ struct Argument {
DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int64_t); DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int64_t);
DECL_ARGUMENT_FIELD(tensorrt_min_subgraph_size, TensorRtMinSubgraphSize, int); DECL_ARGUMENT_FIELD(tensorrt_min_subgraph_size, TensorRtMinSubgraphSize, int);
DECL_ARGUMENT_FIELD(trt_mark_output, TRTMarkOutput, bool); DECL_ARGUMENT_FIELD(trt_mark_output, TRTMarkOutput, bool);
DECL_ARGUMENT_FIELD(trt_mark_output_with_id, TRTMarkOutputWithId, bool);
DECL_ARGUMENT_FIELD(trt_output_tensor_names, DECL_ARGUMENT_FIELD(trt_output_tensor_names,
TRTOutputTensorNames, TRTOutputTensorNames,
std::vector<std::string>); std::vector<std::string>);
......
...@@ -163,6 +163,8 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -163,6 +163,8 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("min_subgraph_size", pass->Set("min_subgraph_size",
new int(argument->tensorrt_min_subgraph_size())); new int(argument->tensorrt_min_subgraph_size()));
pass->Set("mark_output", new bool(argument->trt_mark_output())); pass->Set("mark_output", new bool(argument->trt_mark_output()));
pass->Set("mark_output_with_id",
new bool(argument->trt_mark_output_with_id()));
pass->Set( pass->Set(
"output_tensor_names", "output_tensor_names",
new std::vector<std::string>(argument->trt_output_tensor_names())); new std::vector<std::string>(argument->trt_output_tensor_names()));
......
...@@ -376,29 +376,30 @@ std::string TensorRtSubgraphPass::CreateTensorRTOp( ...@@ -376,29 +376,30 @@ std::string TensorRtSubgraphPass::CreateTensorRTOp(
std::vector<int> origin_outputs_dtype; std::vector<int> origin_outputs_dtype;
std::map<std::string, int> map_origin_outputs_dtype; std::map<std::string, int> map_origin_outputs_dtype;
// Whether to mark Outpus // Mark TensorRT output nodes as trt outputs
auto mark_output = Get<bool>("mark_output"); auto mark_output = Get<bool>("mark_output");
auto output_tensor_name = auto output_tensor_name =
Get<std::vector<std::string>>("output_tensor_names"); Get<std::vector<std::string>>("output_tensor_names");
VLOG(1) << "mark Output: " << mark_output; auto mark_output_with_id = Get<bool>("mark_output_with_id");
if (mark_output == 1) { if (mark_output) {
VLOG(1) << "begin to mark output ..."; VLOG(1) << "begin to mark output ...";
for (auto node : subgraph) { for (auto node : subgraph) {
if (node->NodeType() == Node::Type::kOperation) { if (node->NodeType() == Node::Type::kOperation) {
if (node->Op()->Outputs().count("Xshape")) continue;
for (auto *x : node->outputs) { for (auto *x : node->outputs) {
if (std::count(parameters.begin(), parameters.end(), x->Name()) > 0) if (std::count(parameters.begin(), parameters.end(), x->Name()) > 0)
continue; continue;
if (!output_tensor_name.empty() && std::string name_with_id = x->Name() + std::to_string(x->id());
std::count(output_tensor_name.begin(), if (((!mark_output_with_id && std::count(output_tensor_name.begin(),
output_tensor_name.end(), output_tensor_name.end(),
x->Name())) { x->Name()) > 0) ||
VLOG(1) << "output " << x->Name() << " has been marked"; (mark_output_with_id && std::count(output_tensor_name.begin(),
std::string output_name_withid = output_tensor_name.end(),
x->Name() + std::to_string(x->id()); name_with_id) > 0)) &&
!x->outputs.empty()) {
VLOG(3) << "output " << x->Name() << " has been marked";
output_names.insert(x->Name()); output_names.insert(x->Name());
output_names_with_id.insert(output_name_withid); output_names_with_id.insert(name_with_id);
origin_name_output_rank[x->Name()] = x->Var()->GetShape().size(); origin_name_output_rank[x->Name()] = x->Var()->GetShape().size();
trt_outputs.insert(x); trt_outputs.insert(x);
map_origin_outputs_dtype[x->Name()] = map_origin_outputs_dtype[x->Name()] =
......
...@@ -461,6 +461,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { ...@@ -461,6 +461,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(tensorrt_min_subgraph_size_); CP_MEMBER(tensorrt_min_subgraph_size_);
CP_MEMBER(tensorrt_precision_mode_); CP_MEMBER(tensorrt_precision_mode_);
CP_MEMBER(trt_mark_output_); CP_MEMBER(trt_mark_output_);
CP_MEMBER(trt_mark_output_with_id_);
CP_MEMBER(trt_output_tensor_names_); CP_MEMBER(trt_output_tensor_names_);
CP_MEMBER(trt_disabled_ops_); CP_MEMBER(trt_disabled_ops_);
CP_MEMBER(trt_use_dla_); CP_MEMBER(trt_use_dla_);
...@@ -762,8 +763,10 @@ void AnalysisConfig::EnableTensorRtEngine(int64_t workspace_size, ...@@ -762,8 +763,10 @@ void AnalysisConfig::EnableTensorRtEngine(int64_t workspace_size,
} }
void AnalysisConfig::MarkTrtEngineOutputs( void AnalysisConfig::MarkTrtEngineOutputs(
const std::vector<std::string> &output_tensor_names) { const std::vector<std::string> &output_tensor_names,
const bool mark_output_with_id) {
trt_mark_output_ = true; trt_mark_output_ = true;
trt_mark_output_with_id_ = mark_output_with_id;
trt_output_tensor_names_ = output_tensor_names; trt_output_tensor_names_ = output_tensor_names;
} }
......
...@@ -1392,6 +1392,7 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -1392,6 +1392,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_->SetTensorRtMaxBatchSize(config_.tensorrt_max_batchsize_); argument_->SetTensorRtMaxBatchSize(config_.tensorrt_max_batchsize_);
argument_->SetTensorRtMinSubgraphSize(config_.tensorrt_min_subgraph_size_); argument_->SetTensorRtMinSubgraphSize(config_.tensorrt_min_subgraph_size_);
argument_->SetTRTMarkOutput(config_.trt_mark_output_); argument_->SetTRTMarkOutput(config_.trt_mark_output_);
argument_->SetTRTMarkOutputWithId(config_.trt_mark_output_with_id_);
argument_->SetTRTOutputTensorNames(config_.trt_output_tensor_names_); argument_->SetTRTOutputTensorNames(config_.trt_output_tensor_names_);
argument_->SetTensorRtDisabledOPs(config_.trt_disabled_ops_); argument_->SetTensorRtDisabledOPs(config_.trt_disabled_ops_);
argument_->SetTensorRtUseDLA(config_.trt_use_dla_); argument_->SetTensorRtUseDLA(config_.trt_use_dla_);
......
...@@ -695,7 +695,8 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -695,7 +695,8 @@ struct PD_INFER_DECL AnalysisConfig {
/// \param output_tensor_names The name of the Tensor that needs to be marked /// \param output_tensor_names The name of the Tensor that needs to be marked
/// ///
void MarkTrtEngineOutputs( void MarkTrtEngineOutputs(
const std::vector<std::string>& output_tensor_names = {}); const std::vector<std::string>& output_tensor_names = {},
const bool trt_mark_output_with_id = false);
/// ///
/// \brief Turn on the TensorRT memory optimization. /// \brief Turn on the TensorRT memory optimization.
/// ///
...@@ -1237,6 +1238,7 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -1237,6 +1238,7 @@ struct PD_INFER_DECL AnalysisConfig {
bool trt_use_varseqlen_{false}; bool trt_use_varseqlen_{false};
bool trt_with_interleaved_{false}; bool trt_with_interleaved_{false};
bool trt_mark_output_{false}; bool trt_mark_output_{false};
bool trt_mark_output_with_id_{false};
std::vector<std::string> trt_output_tensor_names_{}; std::vector<std::string> trt_output_tensor_names_{};
std::string tensorrt_transformer_posid_{""}; std::string tensorrt_transformer_posid_{""};
std::string tensorrt_transformer_maskid_{""}; std::string tensorrt_transformer_maskid_{""};
......
...@@ -896,7 +896,8 @@ void BindAnalysisConfig(py::module *m) { ...@@ -896,7 +896,8 @@ void BindAnalysisConfig(py::module *m) {
&AnalysisConfig::tensorrt_dynamic_shape_enabled) &AnalysisConfig::tensorrt_dynamic_shape_enabled)
.def("mark_trt_engine_outputs", .def("mark_trt_engine_outputs",
&AnalysisConfig::MarkTrtEngineOutputs, &AnalysisConfig::MarkTrtEngineOutputs,
py::arg("output_tensor_names") = std::vector<std::string>({})) py::arg("output_tensor_names") = std::vector<std::string>({}),
py::arg("mark_output_with_id") = false)
.def("enable_tensorrt_varseqlen", &AnalysisConfig::EnableVarseqlen) .def("enable_tensorrt_varseqlen", &AnalysisConfig::EnableVarseqlen)
.def("tensorrt_varseqlen_enabled", .def("tensorrt_varseqlen_enabled",
&AnalysisConfig::tensorrt_varseqlen_enabled) &AnalysisConfig::tensorrt_varseqlen_enabled)
......
...@@ -24,9 +24,11 @@ TEST(TensorRT, mark_trt_engine_outputs) { ...@@ -24,9 +24,11 @@ TEST(TensorRT, mark_trt_engine_outputs) {
config.EnableUseGpu(100, 0); config.EnableUseGpu(100, 0);
config.EnableTensorRtEngine( config.EnableTensorRtEngine(
1 << 30, 1, 5, AnalysisConfig::Precision::kFloat32, false, false); 1 << 30, 1, 5, AnalysisConfig::Precision::kFloat32, false, false);
// The name of the tensor that needs to be marked, the default is empty (all // The name of the tensor that needs to be marked
// marks) std::vector<std::string> markOutput = {"pool2d_0.tmp_0",
std::vector<std::string> markOutput = {"fc_0.tmp_0", "fc_0.tmp_1"}; "elementwise_add_0.tmp_0",
"conv2d_5.tmp_0",
"batch_norm_6.tmp_2"};
config.MarkTrtEngineOutputs(markOutput); config.MarkTrtEngineOutputs(markOutput);
std::vector<std::vector<PaddleTensor>> inputs_all; std::vector<std::vector<PaddleTensor>> inputs_all;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册