未验证 提交 ea2e2495 编写于 作者: Z Zhang Jun 提交者: GitHub

[inference] Use output var name to mark the NVTX flag (#49825)

* add outvar name for nvtx mark

* nly network created with kEXPLICIT_BATCH can setsetMaxBatchSize
上级 56dbe426
......@@ -62,7 +62,8 @@ void NaiveExecutor::Run() {
<< op->DebugStringEx(scope_) << " on scope " << scope_;
op->SetIsCalledByExecutor(false);
#ifdef PADDLE_WITH_INFERENCE_NVTX
platform::CudaNvtxRangePush(op->Type(), platform::NvtxRangeColor::Green);
platform::CudaNvtxRangePush(op->Type() + "|" + op->OutputVars(true).front(),
platform::NvtxRangeColor::Green);
#endif
// According to reuse table, we share the out tensor's holder.
......
......@@ -148,7 +148,9 @@ void TensorRTEngine::FreezeNetwork() {
platform::errors::InvalidArgument(
"Call InitNetwork first to initialize network."));
// build engine.
infer_builder_->setMaxBatchSize(max_batch_);
if (!with_dynamic_shape_) {
infer_builder_->setMaxBatchSize(max_batch_);
}
#if IS_TRT_VERSION_GE(8300)
infer_builder_config_->setMemoryPoolLimit(
nvinfer1::MemoryPoolType::kWORKSPACE, max_workspace_);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册