From 74812d1c90eba8f9bd231312a5832eb32217bdcb Mon Sep 17 00:00:00 2001
From: Pei Yang <peiyang@baidu.com>
Date: Sat, 21 Sep 2019 09:17:44 +0800
Subject: [PATCH] Fix BUGS: paddle-TRT repeatedly sets weight_map and
 overdeletes repetitive_params (#19825)

* fix trt bugs when sharing params, test=develop

* add unittest for cascade_rcnn
---
 .../ir_passes/tensorrt_subgraph_pass.cc       | 15 +++--
 .../tensorrt/convert/batch_norm_op.cc         |  9 ++-
 .../inference/tensorrt/convert/dropout_op.cc  |  4 +-
 .../fluid/inference/tensorrt/convert/fc_op.cc |  4 +-
 .../tensorrt/convert/leaky_relu_op.cc         |  2 +-
 .../inference/tensorrt/convert/prelu_op.cc    |  4 +-
 paddle/fluid/inference/tensorrt/engine.cc     | 34 +++++-----
 paddle/fluid/inference/tensorrt/engine.h      | 20 ++++--
 .../fluid/inference/tests/api/CMakeLists.txt  |  5 +-
 .../tests/api/trt_cascade_rcnn_test.cc        | 62 +++++++++++++++++++
 10 files changed, 124 insertions(+), 35 deletions(-)
 create mode 100644 paddle/fluid/inference/tests/api/trt_cascade_rcnn_test.cc

diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
index 1d1e4570e1..bbe68a7bab 100644
--- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
@@ -102,7 +102,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
   // const framework::BlockDesc& main_block = program_desc->Block(0);
   framework::BlockDesc *new_block = program_desc->AppendBlock(main_block);
 
-  // An fake block desc.
+  // A fake block desc.
   framework::proto::BlockDesc block_proto;
   framework::BlockDesc block_desc(nullptr, &block_proto);
   block_desc.Proto()->set_parent_idx(-1);
@@ -118,20 +118,27 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
   }
 
   // Then, we will use the input_names_with_id and output_names_with_id to
-  // generate the eigine key.
+  // generate the engine key.
   // So, We use set instead of unordered_set here to ensure that the engine key
   // is unique.
   std::set<std::string> input_names;
   std::set<std::string> input_names_with_id;
   std::vector<std::string> params;
+  // if we delete fluid copy of params shared by more than 1 ops, there will be
+  // problem, so we filter them out.
+  std::vector<std::string> params_not_shared;
 
-  // The node->inputs containes input tensors and parameters.
+  // The node->inputs contains input tensors and parameters.
   for (auto *x : node->inputs) {
     input_names.insert(x->Name());
     input_names_with_id.insert(x->Name() + std::to_string(x->id()));
     if (std::count(graph_params.begin(), graph_params.end(), x->Name()) > 0) {
       params.push_back(x->Name());
     }
+    if (std::count(graph_params.begin(), graph_params.end(), x->Name()) > 0 &&
+        x->outputs.size() <= 1) {
+      params_not_shared.push_back(x->Name());
+    }
   }
 
   std::set<std::string> output_names;
@@ -241,7 +248,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
     return;
   }
 
-  std::copy(params.begin(), params.end(),
+  std::copy(params_not_shared.begin(), params_not_shared.end(),
             std::back_inserter(*repetitive_params));
 
   tensorrt::TensorRTEngine *trt_engine =
diff --git a/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc b/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc
index d948868464..25f0d866dc 100644
--- a/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc
@@ -116,11 +116,10 @@ class BatchNormOpConverter : public OpConverter {
                              scale_weights.get(), power_weights.get());
 
     auto output_name = op_desc.Output("Y").front();
-    engine_->weight_map[op_desc.Input("Bias").front()] =
-        std::move(combile_bias_tensor);
-    engine_->weight_map[op_desc.Input("Scale").front()] =
-        std::move(combile_scale_tensor);
-
+    engine_->SetWeights(op_desc.Input("Bias").front(),
+                        std::move(combile_bias_tensor));
+    engine_->SetWeights(op_desc.Input("Scale").front(),
+                        std::move(combile_scale_tensor));
     RreplenishLayerAndOutput(layer, "pool2d", {output_name}, test_mode);
   }
 };
diff --git a/paddle/fluid/inference/tensorrt/convert/dropout_op.cc b/paddle/fluid/inference/tensorrt/convert/dropout_op.cc
index 510b622f46..cd28c6d98a 100644
--- a/paddle/fluid/inference/tensorrt/convert/dropout_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/dropout_op.cc
@@ -66,8 +66,8 @@ class DropoutOpConverter : public OpConverter {
         nvinfer1::ScaleMode::kUNIFORM, shift_weights.get(), scale_weights.get(),
         power_weights.get());
 
-    engine_->weight_map[op_desc.Output("Out").front() + "_dropout"] =
-        std::move(weight_tensor);
+    engine_->SetWeights(op_desc.Output("Out").front() + "_dropout",
+                        std::move(weight_tensor));
     auto output_name = op_desc.Output("Out")[0];
 
     RreplenishLayerAndOutput(layer, "dropout", {output_name}, test_mode);
diff --git a/paddle/fluid/inference/tensorrt/convert/fc_op.cc b/paddle/fluid/inference/tensorrt/convert/fc_op.cc
index fb7b89b189..ea108d6a07 100644
--- a/paddle/fluid/inference/tensorrt/convert/fc_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/fc_op.cc
@@ -72,7 +72,7 @@ class FcOpConverter : public OpConverter {
     PADDLE_ENFORCE_NOT_NULL(Y_v);
     auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
     // This may trigger a GPU->CPU copy, because TRT's weight can only be
-    // assigned from CPU memory, that can't be avoided.
+    // assigned from CPU memory, which can't be avoided.
     float* weight_data = nullptr;
     bool enable_int8 = boost::get<bool>(op_desc.HasAttr("enable_int8"));
     if (enable_int8) {
@@ -131,7 +131,7 @@ class FcOpConverter : public OpConverter {
                                        *const_cast<nvinfer1::ITensor*>(X),
                                        n_output, tmp_weight.get(), bias.get());
 
-    engine_->weight_map[op_desc.Input(w_name).front()] = std::move(tmp);
+    engine_->SetWeights(op_desc.Input(w_name).front(), std::move(tmp));
     auto output_name = op_desc.Output("Out").front();
 
     RreplenishLayerAndOutput(layer, "fc", {output_name}, test_mode);
diff --git a/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc b/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc
index 2a46938cb1..f3c714009f 100644
--- a/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc
@@ -81,7 +81,7 @@ class LeakyReluOpConverter : public OpConverter {
     std::string alpha_name = op_desc.Output("Out")[0] + "_alpha";
     PADDLE_ENFORCE(engine_->weight_map.find(alpha_name) ==
                    engine_->weight_map.end());
-    engine_->weight_map[alpha_name] = std::move(alpha_tensor);
+    engine_->SetWeights(alpha_name, std::move(alpha_tensor));
 #endif
     auto output_name = op_desc.Output("Out")[0];
     RreplenishLayerAndOutput(output_layer, "leaky_relu", {output_name},
diff --git a/paddle/fluid/inference/tensorrt/convert/prelu_op.cc b/paddle/fluid/inference/tensorrt/convert/prelu_op.cc
index 01bcd03e52..d327a74366 100644
--- a/paddle/fluid/inference/tensorrt/convert/prelu_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/prelu_op.cc
@@ -55,8 +55,8 @@ class PReluOpConverter : public OpConverter {
     nvinfer1::IPluginLayer* layer =
         engine_->AddPlugin(&input, input_num, plugin);
     // keep alpha tensor to avoid release it's memory
-    engine_->weight_map[op_desc.Input("Alpha")[0]] =
-        std::move(alpha_tensor_temp);
+    engine_->SetWeights(op_desc.Input("Alpha")[0],
+                        std::move(alpha_tensor_temp));
 
     auto output_name = op_desc.Output("Out")[0];
     RreplenishLayerAndOutput(layer, "prelu", {output_name}, test_mode);
diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc
index 4a26417375..f806069b47 100644
--- a/paddle/fluid/inference/tensorrt/engine.cc
+++ b/paddle/fluid/inference/tensorrt/engine.cc
@@ -70,7 +70,7 @@ void TensorRTEngine::FreezeNetwork() {
   }
 #else
   if (enable_fp16)
-    LOG(INFO) << "Using FP16 in Paddle-trt must ensure that the version of TRT "
+    LOG(INFO) << "Using FP16 in Paddle-TRT must ensure that the version of TRT "
                  "is at least 5."
                  "So, use FP32 to run.";
 #endif
@@ -146,8 +146,8 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer *layer, int offset,
   PADDLE_ENFORCE(!output->isNetworkInput());
   infer_network_->markOutput(*output);
   PADDLE_ENFORCE(output->isNetworkOutput());
-  // output buffers' size can only be decided latter, set zero here to mark this
-  // and will reset latter.
+  // output buffers' size can only be decided later, set zero here to mark this
+  // and will reset later.
   buffer_sizes_[name] = 0;
 }
 
@@ -164,8 +164,8 @@ void TensorRTEngine::DeclareOutput(const std::string &name) {
   output->setName(name.c_str());
   PADDLE_ENFORCE(!output->isNetworkInput());
   infer_network_->markOutput(*output);
-  // output buffers' size can only be decided latter, set zero here to mark this
-  // and will reset latter.
+  // output buffers' size can only be decided later, set zero here to mark this
+  // and will reset later.
   buffer_sizes_[name] = 0;
 }
 
@@ -190,20 +190,26 @@ float *TensorRTEngine::GetWeightCPUData(const std::string &name,
                                         framework::Tensor *weight_tensor,
                                         bool enable_int8,
                                         const std::vector<float> &scale) {
+  static int name_suffix_counter = 0;
+  std::string name_suffix = std::to_string(name_suffix_counter);
+  std::string name_with_suffix = name + name_suffix;
   auto w_dims = weight_tensor->dims();
   platform::CPUPlace cpu_place;
-  PADDLE_ENFORCE(!weight_map.count(name),
-                 "During TRT Op converter: We set weight %s with the same name "
-                 "twice into the weight_map",
-                 name);
-  weight_map[name].reset(new framework::Tensor());
-  weight_map[name]->Resize(weight_tensor->dims());
-  TensorCopySync(*weight_tensor, cpu_place, weight_map[name].get());
-  float *weight_data = weight_map[name]->mutable_data<float>(cpu_place);
+  PADDLE_ENFORCE_EQ(
+      weight_map.count(name_with_suffix), 0,
+      "During TRT Op converter: We set weight %s with the same name "
+      "twice into the weight_map",
+      name_with_suffix);
+  weight_map[name_with_suffix].reset(new framework::Tensor());
+  weight_map[name_with_suffix]->Resize(weight_tensor->dims());
+  TensorCopySync(*weight_tensor, cpu_place, weight_map[name_with_suffix].get());
+  float *weight_data =
+      weight_map[name_with_suffix]->mutable_data<float>(cpu_place);
+  name_suffix_counter += 1;
 
   if (enable_int8) {
     // when the op is fc, scale's size should be 1
-    // when the op is conv, the scale's size should be w_dims[0]
+    // when the op is conv, scale's size should be w_dims[0]
     bool valid_scale_size =
         (scale.size() == 1 || scale.size() == static_cast<size_t>(w_dims[0]));
     PADDLE_ENFORCE(valid_scale_size, "TRT int8 quant: invalid scale size");
diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h
index 19ec11017a..c1d950035c 100644
--- a/paddle/fluid/inference/tensorrt/engine.h
+++ b/paddle/fluid/inference/tensorrt/engine.h
@@ -15,10 +15,12 @@ limitations under the License. */
 #pragma once
 
 #include <NvInfer.h>
+#include <map>
 #include <memory>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
+#include <utility>
 #include <vector>
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/framework/tensor_util.h"
@@ -39,7 +41,7 @@ class TRTInt8Calibrator;
  * TensorRT Engine.
  *
  * There are two alternative ways to use it, one is  to build from a paddle
- * protobuf model, another way is to manully construct the network.
+ * protobuf model, another way is to manually construct the network.
  */
 class TensorRTEngine {
   using DescType = ::paddle::framework::proto::BlockDesc;
@@ -89,11 +91,11 @@ class TensorRTEngine {
     infer_builder_.reset(createInferBuilder(&logger_));
     infer_network_.reset(infer_builder_->createNetwork());
   }
-  // After finishing adding ops, freeze this network and creates the executation
+  // After finishing adding ops, freeze this network and creates the execution
   // environment.
   void FreezeNetwork();
 
-  // Add an input and set its name, data type and dimention.
+  // Add an input and set its name, data type and dimension.
   nvinfer1::ITensor* DeclareInput(const std::string& name,
                                   nvinfer1::DataType dtype,
                                   const nvinfer1::Dims& dim);
@@ -151,6 +153,16 @@ class TensorRTEngine {
   std::unordered_map<std::string /*name*/, std::unique_ptr<framework::Tensor>>
       weight_map;
 
+  // When setting weight_map, a self-increasing suffix is needed for the names
+  // so as to avoid repeatedly setting weights with the same name.
+  void SetWeights(std::string w_name,
+                  std::unique_ptr<framework::Tensor> w_tensor) {
+    static int suffix_counter = 0;
+    std::string suffix = std::to_string(suffix_counter);
+    weight_map[w_name + suffix] = std::move(w_tensor);
+    suffix_counter += 1;
+  }
+
   void ClearWeights() {
     for (auto& weight_pair : weight_map) {
       weight_pair.second.reset(nullptr);
@@ -209,7 +221,7 @@ class TensorRTEngine {
   ((NV_TENSORRT_MAJOR * 1000 + NV_TENSORRT_MINOR * 100 + \
     NV_TENSORRT_PATCH * 10 + NV_TENSORRT_BUILD) >= version)
 
-// Add an layer__ into engine__ with args ARGS.
+// Add a layer__ into engine__ with args ARGS.
 // For example:
 //
 // Reference
diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt
index 997e7f44d9..87e0fe7126 100644
--- a/paddle/fluid/inference/tests/api/CMakeLists.txt
+++ b/paddle/fluid/inference/tests/api/CMakeLists.txt
@@ -269,7 +269,7 @@ download_model_and_data(${BERT_INSTALL_DIR} "bert_emb128_model.tar.gz" "bert_dat
 inference_analysis_api_test(test_analyzer_bert ${BERT_INSTALL_DIR} analyzer_bert_tester.cc)
 
 if(WITH_GPU AND TENSORRT_FOUND)
-    set(TRT_MODEL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/trt_tests_models")
+    set(TRT_MODEL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/trt_models")
     if (NOT EXISTS ${TRT_MODEL_INSTALL_DIR})
         inference_download_and_uncompress(${TRT_MODEL_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "trt_inference_test_models.tar.gz")
     endif()
@@ -285,4 +285,7 @@ if(WITH_GPU AND TENSORRT_FOUND)
     inference_analysis_test(trt_fc_prelu_test SRCS trt_fc_prelu_test.cc
             EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
             ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models)
+    inference_analysis_test(trt_cascade_rcnn_test SRCS trt_cascade_rcnn_test.cc
+            EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
+            ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models)
 endif()
diff --git a/paddle/fluid/inference/tests/api/trt_cascade_rcnn_test.cc b/paddle/fluid/inference/tests/api/trt_cascade_rcnn_test.cc
new file mode 100644
index 0000000000..35be7db560
--- /dev/null
+++ b/paddle/fluid/inference/tests/api/trt_cascade_rcnn_test.cc
@@ -0,0 +1,62 @@
+/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "paddle/fluid/inference/tests/api/trt_test_helper.h"
+
+namespace paddle {
+namespace inference {
+
+TEST(TensorRT, cascade_rcnn) {
+  std::string model_dir = FLAGS_infer_model + "/cascade_rcnn";
+  AnalysisConfig config;
+  int batch_size = 1;
+  config.EnableUseGpu(100, 0);
+  config.SetModel(model_dir + "/model", model_dir + "/params");
+  config.SwitchUseFeedFetchOps(false);
+  config.EnableTensorRtEngine(1 << 30, batch_size, 40,
+                              AnalysisConfig::Precision::kFloat32, false);
+
+  auto predictor = CreatePaddlePredictor(config);
+
+  int channels = 3;
+  int height = 640;
+  int width = 640;
+  int input_num = batch_size * channels * height * width;
+  float *input = new float[input_num];
+  memset(input, 1.0, input_num * sizeof(float));
+
+  float *im_shape = new float[3];
+  im_shape[0] = 3.0;
+  im_shape[1] = 640.0;
+  im_shape[2] = 640.0;
+
+  auto input_names = predictor->GetInputNames();
+
+  auto input_t = predictor->GetInputTensor(input_names[0]);
+  input_t->Reshape({batch_size, channels, height, width});
+  input_t->copy_from_cpu(input);
+
+  auto input_t1 = predictor->GetInputTensor(input_names[1]);
+  input_t1->Reshape({batch_size, 3});
+  input_t1->copy_from_cpu(im_shape);
+
+  ASSERT_TRUE(predictor->ZeroCopyRun());
+}
+
+}  // namespace inference
+}  // namespace paddle
-- 
GitLab