From c3a69111c571edbca0dad313d58a31baf2eb5d69 Mon Sep 17 00:00:00 2001
From: zhoutianzi666 <39978853+zhoutianzi666@users.noreply.github.com>
Date: Tue, 8 Nov 2022 14:07:48 +0800
Subject: [PATCH] [Paddle Inference] allow fold fill_constant && allow nms3
 into trt in int8 model (#47551)

* allow fold fill_constant && allow nms3 into trt in int8 model
* use unordered_map
* fix CI failing
---
 paddle/fluid/framework/ir/constant_folding_pass.cc     |  5 ++---
 paddle/fluid/inference/tensorrt/op_teller.cc           |  2 ++
 .../tests/api/analyzer_seq_pool1_fuse_statis_tester.cc |  2 +-
 .../ir/inference/test_trt_convert_expand_v2.py         | 10 ++++++----
 .../ir/inference/test_trt_convert_fill_constant.py     |  4 ++--
 5 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/paddle/fluid/framework/ir/constant_folding_pass.cc b/paddle/fluid/framework/ir/constant_folding_pass.cc
index 31f070de2c..cd069e474e 100644
--- a/paddle/fluid/framework/ir/constant_folding_pass.cc
+++ b/paddle/fluid/framework/ir/constant_folding_pass.cc
@@ -64,8 +64,7 @@ void ConstantFoldingPass::ApplyImpl(ir::Graph *graph) const {
       platform::errors::Fatal(
           "scope must not be null when applying constant floding."));
 
-  // Now, I don't want to fold fill_constant op in Paddle-TRT
-  std::vector<std::string> blacklist{"fill_constant", "feed"};
+  std::vector<std::string> blacklist{"feed"};
 
   auto op_node_sorted = framework::ir::TopologyVarientSort(
       *graph, static_cast<framework::ir::SortKind>(0));
@@ -78,7 +77,7 @@ void ConstantFoldingPass::ApplyImpl(ir::Graph *graph) const {
     bool input_persis = true;
     // map is used to record how many time a name string occures in the whole
     // graph's nodes
-    std::map<std::string, int> map;
+    std::unordered_map<std::string, int> map;
     for (auto in_node : op_node->inputs) {
       map[in_node->Name()] = 0;
       if (!in_node->Var()->Persistable()) {
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
index f6901d4d78..6741ac8bc2 100644
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -42,6 +42,8 @@ struct SimpleOpTypeSetTeller : public Teller {
     teller_set.insert("group_norm");
     teller_set.insert("multiclass_nms3");
     teller_set.insert("multiclass_nms");
+    int8_teller_set.insert("multiclass_nms3");
+    int8_teller_set.insert("multiclass_nms");
 #endif
 #if IS_TRT_VERSION_GE(7000)
     teller_set.insert("tile");
diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc
index c01e966d53..185b37a7cc 100644
--- a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc
@@ -40,7 +40,7 @@ TEST(Analyzer_seq_pool1_fuse_statis, fuse_statis) {
   EXPECT_EQ(fuse_statis.at("squared_mat_sub_fuse"), 0);
   EXPECT_EQ(fuse_statis.at("repeated_fc_relu_fuse"), 2);
   LOG(INFO) << "num_ops: " << num_ops;
-  EXPECT_EQ(num_ops, 183);
+  EXPECT_EQ(num_ops, 181);
 }
 
 }  // namespace seq_pool1_tester
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_expand_v2.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_expand_v2.py
index 6eb0228103..bfde710287 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_expand_v2.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_expand_v2.py
@@ -246,9 +246,10 @@ class TrtConvertExpandV2Test2(TrtLayerAutoScanTest):
         # for dynamic_shape
         generate_dynamic_shape()
         self.trt_param.precision = paddle_infer.PrecisionType.Float32
-        yield self.create_inference_config(), (1, 3), 1e-5
+        # fill_constant will be folded by constnt folding pass!
+        yield self.create_inference_config(), (0, 3), 1e-5
         self.trt_param.precision = paddle_infer.PrecisionType.Half
-        yield self.create_inference_config(), (1, 3), 1e-3
+        yield self.create_inference_config(), (0, 3), 1e-3
 
     def add_skip_trt_case(self):
         pass
@@ -389,9 +390,10 @@ class TrtConvertExpandV2Test3(TrtLayerAutoScanTest):
         # for dynamic_shape
         generate_dynamic_shape()
         self.trt_param.precision = paddle_infer.PrecisionType.Float32
-        yield self.create_inference_config(), (4, 3), 1e-5
+        # fill_constant will be folded by constnt folding pass!
+        yield self.create_inference_config(), (0, 3), 1e-5
         self.trt_param.precision = paddle_infer.PrecisionType.Half
-        yield self.create_inference_config(), (4, 3), 1e-3
+        yield self.create_inference_config(), (0, 3), 1e-3
 
     def add_skip_trt_case(self):
         pass
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_fill_constant.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_fill_constant.py
index 6e22f5db13..87efe0ee0e 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_fill_constant.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_fill_constant.py
@@ -21,7 +21,7 @@ from functools import partial
 from typing import Any, Dict, List
 
 
-class TrtConvertSplitTest(TrtLayerAutoScanTest):
+class TrtConvertFillConstantTest(TrtLayerAutoScanTest):
     def is_program_valid(self, program_config: ProgramConfig) -> bool:
         return True
 
@@ -36,7 +36,7 @@ class TrtConvertSplitTest(TrtLayerAutoScanTest):
             return np.array([4]).astype(np.int32)
 
         for shape in [[2, 3, 4]]:
-            for num_input in [0, 1, 2, 3]:
+            for num_input in [0, 1, 2]:
                 for dtype in [5, 2, 3]:
                     for str_value in ["2", "23", "-1"]:
                         self.num_input = num_input
-- 
GitLab