[inference] Use output var name to mark the NVTX flag (#49825)

* add outvar name for nvtx mark * nly network created with kEXPLICIT_BATCH can setsetMaxBatchSize

[inference] Use output var name to mark the NVTX flag (#49825)
* add outvar name for nvtx mark * nly network created with kEXPLICIT_BATCH can setsetMaxBatchSize
ea2e2495 · Zhang Jun · GitHub · 56dbe426 · ea2e2495 · ea2e2495
隐藏空白更改
内联并排

Showing with 5 addition and 2 deletion

paddle/fluid/framework/naive_executor.cc paddle/fluid/framework/naive_executor.cc +2 -1

paddle/fluid/inference/tensorrt/engine.cc paddle/fluid/inference/tensorrt/engine.cc +3 -1

未找到文件。
--- a/paddle/fluid/framework/naive_executor.cc
+++ b/paddle/fluid/framework/naive_executor.cc
@@ -62,7 +62,8 @@ void NaiveExecutor::Run() {
            << op->DebugStringEx(scope_) << " on scope " << scope_;
    op->SetIsCalledByExecutor(false);
 #ifdef PADDLE_WITH_INFERENCE_NVTX
-    platform::CudaNvtxRangePush(op->Type(), platform::NvtxRangeColor::Green);
+    platform::CudaNvtxRangePush(op->Type() + "|" + op->OutputVars(true).front(),
+                                platform::NvtxRangeColor::Green);
 #endif
    // According to reuse table, we share the out tensor's holder.

--- a/paddle/fluid/inference/tensorrt/engine.cc
+++ b/paddle/fluid/inference/tensorrt/engine.cc
@@ -148,7 +148,9 @@ void TensorRTEngine::FreezeNetwork() {
                          platform::errors::InvalidArgument(
                              "Call InitNetwork first to initialize network."));
  // build engine.
-  infer_builder_->setMaxBatchSize(max_batch_);
+  if (!with_dynamic_shape_) {
+    infer_builder_->setMaxBatchSize(max_batch_);
+  }
 #if IS_TRT_VERSION_GE(8300)
  infer_builder_config_->setMemoryPoolLimit(
      nvinfer1::MemoryPoolType::kWORKSPACE, max_workspace_);