Merge pull request #13927 from NHZlX/fix_googlenet_bug_with_rule

Fix googlenet bug with rule

Merge pull request #13927 from NHZlX/fix_googlenet_bug_with_rule
Fix googlenet bug with rule
65b61db1 · Zhaolong Xing · GitHub · ea8984c9 · 5700fafd · 65b61db1
5 changed file
--- a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
@@ -18,6 +18,21 @@ namespace paddle {
 namespace inference {
 namespace tensorrt {

+bool to_skip_merging_optimize(TensorRTEngine* engine_,
+                              const std::vector<int>& filters,
+                              const std::vector<int>& strides,
+                              const std::vector<int>& paddings,
+                              std::string input_name) {
+  if (engine_->itensor_quote_num[input_name] > 0) {
+    return true;
+  }
+  if (filters[0] == 1 && filters[1] == 1 && strides[0] == 1 &&
+      strides[1] == 1 && paddings[0] == 0 && paddings[1] == 0)
+    engine_->itensor_quote_num[input_name] += 1;
+
+  return false;
+}
+
 class Conv2dOpConverter : public OpConverter {
 public:
  void operator()(const framework::proto::OpDesc& op,
@@ -31,6 +46,7 @@ class Conv2dOpConverter : public OpConverter {
    PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1);

    auto* X = engine_->GetITensor(op_desc.Input("Input").front());
+
    // Declare weights
    auto* Y_v = scope.FindVar(op_desc.Input("Filter").front());
    PADDLE_ENFORCE_NOT_NULL(Y_v);
@@ -83,7 +99,10 @@ class Conv2dOpConverter : public OpConverter {
        std::move(weight_tensor);
    layer->getOutput(0)->setName(output_name.c_str());
    engine_->SetITensor(output_name, layer->getOutput(0));
-    if (test_mode) {
+
+    if (test_mode ||
+        to_skip_merging_optimize(engine_, {filter_h, filter_w}, strides,
+                                 paddings, op_desc.Input("Input").front())) {
      engine_->DeclareOutput(output_name);
    }
  }

--- a/paddle/fluid/inference/tensorrt/engine.cc
+++ b/paddle/fluid/inference/tensorrt/engine.cc
@@ -133,6 +133,10 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer *layer, int offset,
  buffer_sizes_[name] = 0;
 }

+bool TensorRTEngine::HasDeclared(const std::string &name) {
+  return buffer_sizes_.count(name) > 0;
+}
+
 void TensorRTEngine::DeclareOutput(const std::string &name) {
  PADDLE_ENFORCE_EQ(0, buffer_sizes_.count(name), "duplicate output name %s",
                    name);

--- a/paddle/fluid/inference/tensorrt/engine.h
+++ b/paddle/fluid/inference/tensorrt/engine.h
@@ -91,6 +91,8 @@ class TensorRTEngine : public EngineBase {
                     const std::string& name);
  // Set the itensor_map_[name] as the network's output, and set its name.
  void DeclareOutput(const std::string& name);
+  // Check if the ITensor has been declared
+  bool HasDeclared(const std::string& name);

  // GPU memory address for an ITensor with specific name. One can operate on
  // these memory directly for acceleration, for example, output the converted
@@ -132,6 +134,16 @@ class TensorRTEngine : public EngineBase {
  std::unordered_map<std::string /*name*/, std::unique_ptr<framework::Tensor>>
      weight_map;

+  // TODO: (NHZLX)
+  // In the normal case, the paddle-trt exists bug when runing the googlenet.
+  // When there are more than two convolutions of 1 * 1 with the same input, the
+  // paddle-tensorrt will do the merging optimization, which fuse those conv
+  // into
+  // one conv, and then trigger bug. So,  We should use strategy to avoid this
+  // optimization for the time being. This bug will be fixed in the future.
+  std::unordered_map<std::string /*name*/, int /*ITensor_quote_num*/>
+      itensor_quote_num;
+
 private:
  // the max batch size
  int max_batch_;

--- a/paddle/fluid/inference/tests/api/trt_models_tester.cc
+++ b/paddle/fluid/inference/tests/api/trt_models_tester.cc
@@ -93,11 +93,16 @@ void CompareTensorRTWithFluid(int batch_size, std::string model_dirname) {
  }
 }

-TEST(trt_models_test, main) {
-  std::vector<std::string> infer_models = {"mobilenet", "resnet50",
-                                           "resnext50"};
-  for (auto &model_dir : infer_models) {
-    CompareTensorRTWithFluid(1, FLAGS_dirname + "/" + model_dir);
-  }
+TEST(trt_models_test, mobilenet) {
+  CompareTensorRTWithFluid(1, FLAGS_dirname + "/mobilenet");
+}
+
+TEST(trt_models_test, resnet50) {
+  CompareTensorRTWithFluid(1, FLAGS_dirname + "/resnet50");
 }
+
+TEST(trt_models_test, resnext50) {
+  CompareTensorRTWithFluid(1, FLAGS_dirname + "/resnext50");
+}
+
 }  // namespace paddle
--- a/paddle/fluid/operators/tensorrt_engine_op.h
+++ b/paddle/fluid/operators/tensorrt_engine_op.h
@@ -223,7 +223,9 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {

    // Add outputs
    for (auto& output : output_maps) {
-      engine->DeclareOutput(output);
+      if (!engine->HasDeclared(output)) {
+        engine->DeclareOutput(output);
+      }
    }

    engine->FreezeNetwork();