diff --git a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc index 0a37d3968c39d2c244bbd82161afddf6330e421d..7bcf2dd1eeb17e802c5647df31945284ae08fa95 100644 --- a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc @@ -18,6 +18,21 @@ namespace paddle { namespace inference { namespace tensorrt { +bool to_skip_merging_optimize(TensorRTEngine* engine_, + const std::vector& filters, + const std::vector& strides, + const std::vector& paddings, + std::string input_name) { + if (engine_->itensor_quote_num[input_name] > 0) { + return true; + } + if (filters[0] == 1 && filters[1] == 1 && strides[0] == 1 && + strides[1] == 1 && paddings[0] == 0 && paddings[1] == 0) + engine_->itensor_quote_num[input_name] += 1; + + return false; +} + class Conv2dOpConverter : public OpConverter { public: void operator()(const framework::proto::OpDesc& op, @@ -31,6 +46,7 @@ class Conv2dOpConverter : public OpConverter { PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1); auto* X = engine_->GetITensor(op_desc.Input("Input").front()); + // Declare weights auto* Y_v = scope.FindVar(op_desc.Input("Filter").front()); PADDLE_ENFORCE_NOT_NULL(Y_v); @@ -83,7 +99,10 @@ class Conv2dOpConverter : public OpConverter { std::move(weight_tensor); layer->getOutput(0)->setName(output_name.c_str()); engine_->SetITensor(output_name, layer->getOutput(0)); - if (test_mode) { + + if (test_mode || + to_skip_merging_optimize(engine_, {filter_h, filter_w}, strides, + paddings, op_desc.Input("Input").front())) { engine_->DeclareOutput(output_name); } } diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 14e9e14d33d637ee68e37593cc48721e5169499f..9e0f95844761db7571c5313726d34685a9aa66b2 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -133,6 +133,10 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer *layer, int offset, buffer_sizes_[name] = 0; } +bool TensorRTEngine::HasDeclared(const std::string &name) { + return buffer_sizes_.count(name) > 0; +} + void TensorRTEngine::DeclareOutput(const std::string &name) { PADDLE_ENFORCE_EQ(0, buffer_sizes_.count(name), "duplicate output name %s", name); diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index bd3ba4cea6551a7f6651e311e2649de191a6faa1..d9d3827321127631c0af6e5cfd2dfdd640cee146 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -91,6 +91,8 @@ class TensorRTEngine : public EngineBase { const std::string& name); // Set the itensor_map_[name] as the network's output, and set its name. void DeclareOutput(const std::string& name); + // Check if the ITensor has been declared + bool HasDeclared(const std::string& name); // GPU memory address for an ITensor with specific name. One can operate on // these memory directly for acceleration, for example, output the converted @@ -132,6 +134,16 @@ class TensorRTEngine : public EngineBase { std::unordered_map> weight_map; + // TODO: (NHZLX) + // In the normal case, the paddle-trt exists bug when runing the googlenet. + // When there are more than two convolutions of 1 * 1 with the same input, the + // paddle-tensorrt will do the merging optimization, which fuse those conv + // into + // one conv, and then trigger bug. So, We should use strategy to avoid this + // optimization for the time being. This bug will be fixed in the future. + std::unordered_map + itensor_quote_num; + private: // the max batch size int max_batch_; diff --git a/paddle/fluid/inference/tests/api/trt_models_tester.cc b/paddle/fluid/inference/tests/api/trt_models_tester.cc index 91111f2af56065bbf57ba3a41bddd55ecced1060..75840a9c437d956da4f542a38b2532ea20ee96c5 100644 --- a/paddle/fluid/inference/tests/api/trt_models_tester.cc +++ b/paddle/fluid/inference/tests/api/trt_models_tester.cc @@ -93,11 +93,16 @@ void CompareTensorRTWithFluid(int batch_size, std::string model_dirname) { } } -TEST(trt_models_test, main) { - std::vector infer_models = {"mobilenet", "resnet50", - "resnext50"}; - for (auto &model_dir : infer_models) { - CompareTensorRTWithFluid(1, FLAGS_dirname + "/" + model_dir); - } +TEST(trt_models_test, mobilenet) { + CompareTensorRTWithFluid(1, FLAGS_dirname + "/mobilenet"); +} + +TEST(trt_models_test, resnet50) { + CompareTensorRTWithFluid(1, FLAGS_dirname + "/resnet50"); } + +TEST(trt_models_test, resnext50) { + CompareTensorRTWithFluid(1, FLAGS_dirname + "/resnext50"); +} + } // namespace paddle diff --git a/paddle/fluid/operators/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt_engine_op.h index d4ba0f9c33c91811647f9d19a332f139c16b0eb2..673f86da76ee0712b4d941f5b33594f89926b973 100644 --- a/paddle/fluid/operators/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt_engine_op.h @@ -223,7 +223,9 @@ class TensorRTEngineKernel : public framework::OpKernel { // Add outputs for (auto& output : output_maps) { - engine->DeclareOutput(output); + if (!engine->HasDeclared(output)) { + engine->DeclareOutput(output); + } } engine->FreezeNetwork();