From 849a6874ad6d3b2a0a25237728ffcd0a15de06de Mon Sep 17 00:00:00 2001
From: nhzlx <zlx_hg@163.com>
Date: Tue, 16 Oct 2018 16:22:05 +0000
Subject: [PATCH] fix googlenet bug with relu

---
 .../inference/tensorrt/convert/conv2d_op.cc   | 21 ++++++++++++++++++-
 paddle/fluid/inference/tensorrt/engine.h      | 10 +++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)
diff --git a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
index 0a37d3968c3..c8fc0bedfd3 100644
--- a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
@@ -18,6 +18,21 @@ namespace paddle {
 namespace inference {
 namespace tensorrt {
 
+bool if_skip_merging_optimize(TensorRTEngine* engine_,
+                              const std::vector<int>& filters,
+                              const std::vector<int>& strides,
+                              const std::vector<int>& paddings,
+                              std::string input_name) {
+  if (engine_->itensor_quote_num[input_name] > 0) {
+    return true;
+  }
+  if (filters[0] == 1 && filters[1] == 1 && strides[0] == 1 &&
+      strides[1] == 1 && paddings[0] == 0 && paddings[1] == 0)
+    engine_->itensor_quote_num[input_name] += 1;
+
+  return false;
+}
+
 class Conv2dOpConverter : public OpConverter {
  public:
   void operator()(const framework::proto::OpDesc& op,
@@ -31,6 +46,7 @@ class Conv2dOpConverter : public OpConverter {
     PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1);
 
     auto* X = engine_->GetITensor(op_desc.Input("Input").front());
+
     // Declare weights
     auto* Y_v = scope.FindVar(op_desc.Input("Filter").front());
     PADDLE_ENFORCE_NOT_NULL(Y_v);
@@ -83,7 +99,10 @@ class Conv2dOpConverter : public OpConverter {
         std::move(weight_tensor);
     layer->getOutput(0)->setName(output_name.c_str());
     engine_->SetITensor(output_name, layer->getOutput(0));
-    if (test_mode) {
+
+    if (test_mode ||
+        if_skip_merging_optimize(engine_, {filter_h, filter_w}, strides,
+                                 paddings, op_desc.Input("Input").front())) {
       engine_->DeclareOutput(output_name);
     }
   }
diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h
index bd3ba4cea65..e828d2077d7 100644
--- a/paddle/fluid/inference/tensorrt/engine.h
+++ b/paddle/fluid/inference/tensorrt/engine.h
@@ -132,6 +132,16 @@ class TensorRTEngine : public EngineBase {
   std::unordered_map<std::string /*name*/, std::unique_ptr<framework::Tensor>>
       weight_map;
 
+  // TODO: (NHZLX)
+  // In the normal case, the paddle-trt exists bug when runing the googlenet.
+  // When there are more than two convolutions of 1 * 1 with the same input, the
+  // paddle-tensorrt will do the merging optimization, which fuse those conv
+  // into
+  // one conv, and then trigger bug. So,  We should use strategy to avoid this
+  // optimization for the time being. This bug will be fixed in the future.
+  std::unordered_map<std::string /*name*/, int /*ITensor_quote_num*/>
+      itensor_quote_num;
+
  private:
   // the max batch size
   int max_batch_;
-- 
GitLab