diff --git a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc index c8fc0bedfd3dc6a548187adc6393b86fccfeb5fb..7bcf2dd1eeb17e802c5647df31945284ae08fa95 100644 --- a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc @@ -18,7 +18,7 @@ namespace paddle { namespace inference { namespace tensorrt { -bool if_skip_merging_optimize(TensorRTEngine* engine_, +bool to_skip_merging_optimize(TensorRTEngine* engine_, const std::vector& filters, const std::vector& strides, const std::vector& paddings, @@ -101,7 +101,7 @@ class Conv2dOpConverter : public OpConverter { engine_->SetITensor(output_name, layer->getOutput(0)); if (test_mode || - if_skip_merging_optimize(engine_, {filter_h, filter_w}, strides, + to_skip_merging_optimize(engine_, {filter_h, filter_w}, strides, paddings, op_desc.Input("Input").front())) { engine_->DeclareOutput(output_name); } diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 14e9e14d33d637ee68e37593cc48721e5169499f..9e0f95844761db7571c5313726d34685a9aa66b2 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -133,6 +133,10 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer *layer, int offset, buffer_sizes_[name] = 0; } +bool TensorRTEngine::HasDeclared(const std::string &name) { + return buffer_sizes_.count(name) > 0; +} + void TensorRTEngine::DeclareOutput(const std::string &name) { PADDLE_ENFORCE_EQ(0, buffer_sizes_.count(name), "duplicate output name %s", name); diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index e828d2077d7ec9dbeae9e75202f0aa1c26b35b60..d9d3827321127631c0af6e5cfd2dfdd640cee146 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -91,6 +91,8 @@ class TensorRTEngine : public EngineBase { const std::string& name); // Set the itensor_map_[name] as the network's output, and set its name. void DeclareOutput(const std::string& name); + // Check if the ITensor has been declared + bool HasDeclared(const std::string& name); // GPU memory address for an ITensor with specific name. One can operate on // these memory directly for acceleration, for example, output the converted diff --git a/paddle/fluid/inference/tests/api/trt_models_tester.cc b/paddle/fluid/inference/tests/api/trt_models_tester.cc index bf320a0cbc2fff5f973c48768281e26d0fde232b..a5635f911aa245c510efa07939e54dbdd8c1c2d9 100644 --- a/paddle/fluid/inference/tests/api/trt_models_tester.cc +++ b/paddle/fluid/inference/tests/api/trt_models_tester.cc @@ -96,11 +96,16 @@ void CompareTensorRTWithFluid(int batch_size, std::string model_dirname) { } } -TEST(trt_models_test, main) { - std::vector infer_models = {"mobilenet", "resnet50", - "resnext50"}; - for (auto &model_dir : infer_models) { - CompareTensorRTWithFluid(1, FLAGS_dirname + "/" + model_dir); - } +TEST(trt_models_test, mobilenet) { + CompareTensorRTWithFluid(1, FLAGS_dirname + "/mobilenet"); +} + +TEST(trt_models_test, resnet50) { + CompareTensorRTWithFluid(1, FLAGS_dirname + "/resnet50"); } + +TEST(trt_models_test, resnext50) { + CompareTensorRTWithFluid(1, FLAGS_dirname + "/resnext50"); +} + } // namespace paddle diff --git a/paddle/fluid/operators/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt_engine_op.h index d4ba0f9c33c91811647f9d19a332f139c16b0eb2..673f86da76ee0712b4d941f5b33594f89926b973 100644 --- a/paddle/fluid/operators/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt_engine_op.h @@ -223,7 +223,9 @@ class TensorRTEngineKernel : public framework::OpKernel { // Add outputs for (auto& output : output_maps) { - engine->DeclareOutput(output); + if (!engine->HasDeclared(output)) { + engine->DeclareOutput(output); + } } engine->FreezeNetwork();