support ernie-int8 test and prune op attribute test (#35890)

* support ernie-int8 test and prune op attribute test * remove using and use namespace * remove macro and use shell instead * Revert "remove macro and use shell instead" This reverts commit 615964b149d7de7825b341936b42be22a4bc0091. * fix grammar error * fix shell error

support ernie-int8 test and prune op attribute test (#35890)
* support ernie-int8 test and prune op attribute test * remove using and use namespace * remove macro and use shell instead * Revert "remove macro and use shell instead" This reverts commit 615964b149d7de7825b341936b42be22a4bc0091. * fix grammar error * fix shell error
e8789c11 · Peihan · GitHub · 7ebbcbbc · e8789c11 · e8789c11
6 changed file
--- a/paddle/fluid/inference/tests/infer_ut/CMakeLists.txt
+++ b/paddle/fluid/inference/tests/infer_ut/CMakeLists.txt
@@ -93,20 +93,42 @@ if (USE_TENSORRT AND WITH_GPU)
  file(READ ${TENSORRT_INCLUDE_DIR}/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS)
  string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION
    "${TENSORRT_VERSION_FILE_CONTENTS}")
+  string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION
+    "${TENSORRT_VERSION_FILE_CONTENTS}")
+  string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION
+    "${TENSORRT_VERSION_FILE_CONTENTS}")
+  string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION
+    "${TENSORRT_VERSION_FILE_CONTENTS}")
  if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
    file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h TENSORRT_VERSION_FILE_CONTENTS)
    string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION
      "${TENSORRT_VERSION_FILE_CONTENTS}")
+    string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION
+      "${TENSORRT_VERSION_FILE_CONTENTS}")
+    string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION
+      "${TENSORRT_VERSION_FILE_CONTENTS}")
+    string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION
+      "${TENSORRT_VERSION_FILE_CONTENTS}")
  endif()
  if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
    message(SEND_ERROR "Failed to detect TensorRT version.")
  endif()
  string(REGEX REPLACE "define NV_TENSORRT_MAJOR +([0-9]+)" "\\1"
    TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}")
+  string(REGEX REPLACE "define NV_TENSORRT_MINOR +([0-9]+)" "\\1"
+    TENSORRT_MINOR_VERSION "${TENSORRT_MINOR_VERSION}")
+  string(REGEX REPLACE "define NV_TENSORRT_PATCH +([0-9]+)" "\\1"
+    TENSORRT_PATCH_VERSION "${TENSORRT_PATCH_VERSION}")
+  string(REGEX REPLACE "define NV_TENSORRT_BUILD +([0-9]+)" "\\1"
+    TENSORRT_BUILD_VERSION "${TENSORRT_BUILD_VERSION}")
  message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. "
-    "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ")
+    "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION} ")
  include_directories("${TENSORRT_INCLUDE_DIR}")
  link_directories("${TENSORRT_LIB_DIR}")
+  add_compile_definitions(NV_TENSORRT_MAJOR=${TENSORRT_MAJOR_VERSION})
+  add_compile_definitions(NV_TENSORRT_MINOR=${TENSORRT_MINOR_VERSION})
+  add_compile_definitions(NV_TENSORRT_PATCH=${TENSORRT_PATCH_VERSION})
+  add_compile_definitions(NV_TENSORRT_BUILD=${TENSORRT_BUILD_VERSION})
 endif()
 if(WITH_MKL)

--- a/paddle/fluid/inference/tests/infer_ut/run.sh
+++ b/paddle/fluid/inference/tests/infer_ut/run.sh
@@ -115,6 +115,20 @@ for model_name in $unknown_download_list; do
    download $url_prefix $model_name
 done
+# ernie int8 quant with matmul
+unknown_nlp_download_list='quant_post_model_xnli_predict_matmul'
+for model_name in $unknown_nlp_download_list; do
+    url_prefix="https://paddle-qa.bj.bcebos.com/inference_model/unknown/nlp"
+    download $url_prefix $model_name
+done
+# mobilnetv1 with prune op attribute
+dev_class_download_list='MobileNetV1'
+for model_name in $dev_class_download_list; do
+    url_prefix="https://paddle-qa.bj.bcebos.com/inference_model/2021-09-16/class"
+    download $url_prefix $model_name
+done
 function compile_test() {
    mkdir -p ${build_dir}
    cd ${build_dir}
@@ -255,6 +269,31 @@ if [ $? -ne 0 ]; then
    EXIT_CODE=8
 fi
+printf "${YELLOW} start test_ernie_xnli_int8 ${NC} \n";
+compile_test "test_ernie_xnli_int8"
+ernie_qat_model="quant_post_model_xnli_predict_matmul"
+${exe_dir}/test_ernie_xnli_int8 \
+    --modeldir=$DATA_DIR/$ernie_qat_model/$ernie_qat_model \
+    --datadir=$DATA_DIR/$ernie_qat_model/$ernie_qat_model/xnli_var_len \
+    --truth_data=$DATA_DIR/$ernie_qat_model/$ernie_qat_model/truth_data \
+    --gtest_filter=${test_suite_list} \
+    --gtest_output=xml:${log_dir}/test_ernie_xnli_int8.xml
+if [ $? -ne 0 ]; then
+    echo "${RED} test_ernie_xnli_int8 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
+    EXIT_CODE=8
+fi
+printf "${YELLOW} start test_mobilnetv1 ${NC} \n";
+compile_test "test_mobilnetv1"
+${exe_dir}/test_mobilnetv1 \
+    --modeldir=$DATA_DIR/MobileNetV1/MobileNetV1 \
+    --gtest_filter=${test_suite_list} \
+    --gtest_output=xml:${log_dir}/test_mobilnetv1.xml
+if [ $? -ne 0 ]; then
+    echo "${RED} test_mobilnetv1 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
+    EXIT_CODE=8
+fi
 set +x
 test_suites=$(echo ${test_suite_list} | sed 's/:/ /g')

--- a/paddle/fluid/inference/tests/infer_ut/test_ernie_xnli_int8.cc
+++ b/paddle/fluid/inference/tests/infer_ut/test_ernie_xnli_int8.cc
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "test_helper.h"  // NOLINT
+#include "test_suite.h"   // NOLINT
+DEFINE_string(modeldir, "", "Directory of the inference model.");
+DEFINE_string(datadir, "", "dataset.");
+DEFINE_string(truth_data, "", "Directory of the inference data truth result");
+namespace paddle_infer {
+std::shared_ptr<Predictor> InitPredictor() {
+  Config config;
+  config.SetModel(FLAGS_modeldir + "/__model__",
+                  FLAGS_modeldir + "/__params__");
+  config.EnableUseGpu(1000, 0);
+  // Open the memory optim.
+  config.EnableMemoryOptim();
+  int max_batch = 32;
+  int max_single_seq_len = 128;
+  int opt_single_seq_len = 64;
+  int min_batch_seq_len = 1;
+  int max_batch_seq_len = 512;
+  int opt_batch_seq_len = 256;
+  std::string input_name0 = "eval_placeholder_0";
+  std::string input_name1 = "eval_placeholder_1";
+  std::string input_name2 = "eval_placeholder_2";
+  std::string input_name3 = "eval_placeholder_3";
+  std::vector<int> min_shape = {min_batch_seq_len};
+  std::vector<int> max_shape = {max_batch_seq_len};
+  std::vector<int> opt_shape = {opt_batch_seq_len};
+  // Set the input's min, max, opt shape
+  std::map<std::string, std::vector<int>> min_input_shape = {
+      {input_name0, min_shape},
+      {input_name1, min_shape},
+      {input_name2, {1}},
+      {input_name3, {1, min_batch_seq_len, 1}}};
+  std::map<std::string, std::vector<int>> max_input_shape = {
+      {input_name0, max_shape},
+      {input_name1, max_shape},
+      {input_name2, {max_batch + 1}},
+      {input_name3, {1, max_single_seq_len, 1}}};
+  std::map<std::string, std::vector<int>> opt_input_shape = {
+      {input_name0, opt_shape},
+      {input_name1, opt_shape},
+      {input_name2, {max_batch + 1}},
+      {input_name3, {1, opt_single_seq_len, 1}}};
+  // only kHalf supported
+  config.EnableTensorRtEngine(1 << 30, 1, 5, Config::Precision::kInt8, false,
+                              false);
+  // erinie varlen must be used with dynamic shape
+  config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
+                                opt_input_shape);
+  // erinie varlen must be used with oss
+  config.EnableTensorRtOSS();
+  return CreatePredictor(config);
+}
+// Parse tensor from string
+template <typename T>
+std::vector<T> ParseTensor(const std::string &field) {
+  std::string mat_str = field;
+  std::vector<T> mat;
+  paddle::test::Split(mat_str, ' ', &mat);
+  return mat;
+}
+void run(Predictor *predictor, std::vector<float> *out_data) {
+  clock_t start, end;
+  start = clock();
+  CHECK(predictor->Run());
+  end = clock();
+  auto output_names = predictor->GetOutputNames();
+  auto output_t = predictor->GetOutputHandle(output_names[0]);
+  std::vector<int> output_shape = output_t->shape();
+  int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
+                                std::multiplies<int>());
+  out_data->resize(out_num);
+  output_t->CopyToCpu(out_data->data());
+  return;
+}
+auto PrepareOutput(std::string input_file) -> std::deque<float> {
+  std::ifstream fin(input_file);
+  std::string line;
+  std::vector<std::string> buffer;
+  while (std::getline(fin, line)) {
+    buffer.emplace_back(line);
+  }
+  std::deque<float> resDeque(buffer.size());
+  std::transform(buffer.begin(), buffer.end(), resDeque.begin(),
+                 [](const std::string &val) { return std::stof(val); });
+  return resDeque;
+}  // PrepareOutput
+TEST(tensorrt_tester_ernie_xnli, oss_varlen_truth_data_int8) {
+  auto resDeque = PrepareOutput(FLAGS_truth_data);
+  auto predictor = InitPredictor();
+  ASSERT_FALSE(FLAGS_datadir.empty());
+  std::ifstream fin(FLAGS_datadir);
+  std::string line;
+  int lineno = 0;
+  int max_seq_len = 128;
+  const int run_batch = 1;
+  int correct_num = 0;
+  while (std::getline(fin, line)) {
+    std::vector<std::string> fields;
+    paddle::test::Split(line, ';', &fields);
+    auto src_ids = ParseTensor<int32_t>(fields[0]);
+    auto sent_ids = ParseTensor<int32_t>(fields[1]);
+    auto pos_ids = ParseTensor<int64_t>(fields[2]);
+    int run_seq_len = src_ids.size();
+    int32_t i3[2] = {0, run_seq_len};
+    int32_t i4[max_seq_len] = {0};
+    auto input_names = predictor->GetInputNames();
+    // first input
+    auto input_t1 = predictor->GetInputHandle(input_names[0]);
+    input_t1->Reshape({run_seq_len});
+    input_t1->CopyFromCpu(src_ids.data());
+    // second input
+    auto input_t2 = predictor->GetInputHandle(input_names[1]);
+    input_t2->Reshape({run_seq_len});
+    input_t2->CopyFromCpu(sent_ids.data());
+    // third input
+    auto input_t3 = predictor->GetInputHandle(input_names[2]);
+    input_t3->Reshape({run_batch + 1});
+    input_t3->CopyFromCpu(i3);
+    // fourth input
+    auto input_t4 = predictor->GetInputHandle(input_names[3]);
+    input_t4->Reshape({1, max_seq_len, 1});
+    input_t4->CopyFromCpu(i4);
+    std::vector<float> out_data;
+    run(predictor.get(), &out_data);
+    lineno++;
+    int maxPosition =
+        max_element(out_data.begin(), out_data.end()) - out_data.begin();
+    if (maxPosition == resDeque[0]) {
+      correct_num += 1;
+    }
+    resDeque.pop_front();
+    VLOG(2) << "predict result: " << maxPosition;
+    for (auto r : out_data) {
+      VLOG(2) << r;
+    }
+  }
+  ASSERT_GT(correct_num,
+            4741);  // total input 5010, int8 res should greater than 4741
+  LOG(INFO) << "=== finish oss test ===";
+}
+}  // namespace paddle_infer
+int main(int argc, char **argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  ::google::ParseCommandLineFlags(&argc, &argv, true);
+#if IS_TRT_VERSION_GE(7200)
+  return RUN_ALL_TESTS();
+#endif
+  return 0;
+}
--- a/paddle/fluid/inference/tests/infer_ut/test_helper.h
+++ b/paddle/fluid/inference/tests/infer_ut/test_helper.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <sstream>
+#include <string>
+#include <vector>
+namespace paddle {
+namespace test {
+// split string to vector<string> by sep
+static void split(const std::string &str, char sep,
+                  std::vector<std::string> *pieces, bool ignore_null = true) {
+  pieces->clear();
+  if (str.empty()) {
+    if (!ignore_null) {
+      pieces->push_back(str);
+    }
+    return;
+  }
+  size_t pos = 0;
+  size_t next = str.find(sep, pos);
+  while (next != std::string::npos) {
+    pieces->push_back(str.substr(pos, next - pos));
+    pos = next + 1;
+    next = str.find(sep, pos);
+  }
+  if (!str.substr(pos).empty()) {
+    pieces->push_back(str.substr(pos));
+  }
+}
+template <typename T>
+void GetValueFromStream(std::stringstream *ss, T *t) {
+  (*ss) >> (*t);
+}
+template <>
+void GetValueFromStream<std::string>(std::stringstream *ss, std::string *t) {
+  *t = ss->str();
+}
+// Split string to multiple vector
+template <typename T>
+void Split(const std::string &line, char sep, std::vector<T> *v) {
+  std::stringstream ss;
+  T t;
+  for (auto c : line) {
+    if (c != sep) {
+      ss << c;
+    } else {
+      GetValueFromStream<T>(&ss, &t);
+      v->push_back(std::move(t));
+      ss.str({});
+      ss.clear();
+    }
+  }
+  if (!ss.str().empty()) {
+    GetValueFromStream<T>(&ss, &t);
+    v->push_back(std::move(t));
+    ss.str({});
+    ss.clear();
+  }
+}
+}  // namespace test
+}  // namespace paddle
--- a/paddle/fluid/inference/tests/infer_ut/test_mobilnetv1.cc
+++ b/paddle/fluid/inference/tests/infer_ut/test_mobilnetv1.cc
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "test_helper.h"  // NOLINT
+#include "test_suite.h"   // NOLINT
+DEFINE_string(modeldir, "", "Directory of the inference model.");
+namespace paddle_infer {
+paddle::test::Record PrepareInput(int batch_size, int shape_size = 224) {
+  // init input data
+  int channel = 3;
+  int width = shape_size;   // w = 224
+  int height = shape_size;  // h = 224
+  paddle::test::Record image_Record;
+  int input_num = batch_size * channel * width * height;
+  std::vector<float> input_data(input_num, 1);
+  image_Record.data = input_data;
+  image_Record.shape = std::vector<int>{batch_size, channel, width, height};
+  image_Record.type = paddle::PaddleDType::FLOAT32;
+  return image_Record;
+}
+TEST(tensorrt_tester_mobilenetv1, tuned_dynamic_trt_fp32_bz2) {
+  bool tuned_shape = true;
+  std::string shape_range_info = FLAGS_modeldir + "/shape_range_info.pbtxt";
+  LOG(INFO) << "tensorrt tuned info saved to " << shape_range_info;
+  // init input data
+  std::map<std::string, paddle::test::Record> my_input_data_map;
+  my_input_data_map["x"] = PrepareInput(2, 448);
+  // init output data
+  std::map<std::string, paddle::test::Record> infer_output_data,
+      truth_output_data;
+  if (tuned_shape) {
+    // NOTE: shape_range_info will be saved after destructor of predictor
+    // function
+    // prepare groudtruth config
+    paddle_infer::Config tune_config;
+    tune_config.SetModel(FLAGS_modeldir + "/inference.pdmodel",
+                         FLAGS_modeldir + "/inference.pdiparams");
+    tune_config.SwitchIrOptim(false);
+    tune_config.EnableUseGpu(1000, 0);
+    tune_config.CollectShapeRangeInfo(shape_range_info);
+    auto predictor_tune = paddle_infer::CreatePredictor(tune_config);
+    SingleThreadPrediction(predictor_tune.get(), &my_input_data_map,
+                           &truth_output_data, 1);
+  }
+  // prepare inference config
+  paddle_infer::Config config;
+  config.SetModel(FLAGS_modeldir + "/inference.pdmodel",
+                  FLAGS_modeldir + "/inference.pdiparams");
+  config.EnableUseGpu(1000, 0);
+  config.EnableTensorRtEngine(
+      1 << 20, 2, 5, paddle_infer::PrecisionType::kFloat32, false, false);
+  config.EnableTunedTensorRtDynamicShape(shape_range_info, true);
+  LOG(INFO) << config.Summary();
+  paddle_infer::services::PredictorPool pred_pool(config, 1);
+  SingleThreadPrediction(pred_pool.Retrive(0), &my_input_data_map,
+                         &infer_output_data);
+  // check outputs
+  CompareRecord(&truth_output_data, &infer_output_data);
+  VLOG(1) << "finish test";
+}
+}  // namespace paddle_infer
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  ::google::ParseCommandLineFlags(&argc, &argv, true);
+  return RUN_ALL_TESTS();
+}
--- a/paddle/fluid/inference/tests/infer_ut/test_suite.h
+++ b/paddle/fluid/inference/tests/infer_ut/test_suite.h
@@ -14,6 +14,7 @@
 #pragma once
 #include <math.h>
 #include <algorithm>
+#include <deque>
 #include <fstream>
 #include <future>
 #include <iostream>
@@ -31,6 +32,18 @@
 namespace paddle {
 namespace test {
+#define IS_TRT_VERSION_GE(version)                       \
+  ((NV_TENSORRT_MAJOR * 1000 + NV_TENSORRT_MINOR * 100 + \
+    NV_TENSORRT_PATCH * 10 + NV_TENSORRT_BUILD) >= version)
+#define IS_TRT_VERSION_LT(version)                       \
+  ((NV_TENSORRT_MAJOR * 1000 + NV_TENSORRT_MINOR * 100 + \
+    NV_TENSORRT_PATCH * 10 + NV_TENSORRT_BUILD) < version)
+#define TRT_VERSION                                    \
+  NV_TENSORRT_MAJOR * 1000 + NV_TENSORRT_MINOR * 100 + \
+      NV_TENSORRT_PATCH * 10 + NV_TENSORRT_BUILD
 class Record {
 public:
  std::vector<float> data;
@@ -96,7 +109,7 @@ void SingleThreadPrediction(paddle_infer::Predictor *predictor,
    switch (output_tensor->type()) {
      case paddle::PaddleDType::INT64: {
-        std::cout << "int64" << std::endl;
+        VLOG(1) << "output_tensor dtype: int64";
        std::vector<int64_t> out_data;
        output_Record.type = paddle::PaddleDType::INT64;
        out_data.resize(out_num);
@@ -108,7 +121,7 @@ void SingleThreadPrediction(paddle_infer::Predictor *predictor,
        break;
      }
      case paddle::PaddleDType::FLOAT32: {
-        std::cout << "float32" << std::endl;
+        VLOG(1) << "output_tensor dtype: float32";
        std::vector<float> out_data;
        output_Record.type = paddle::PaddleDType::FLOAT32;
        out_data.resize(out_num);
@@ -119,7 +132,7 @@ void SingleThreadPrediction(paddle_infer::Predictor *predictor,
        break;
      }
      case paddle::PaddleDType::INT32: {
-        std::cout << "int32" << std::endl;
+        VLOG(1) << "output_tensor dtype: int32";
        std::vector<int32_t> out_data;
        output_Record.type = paddle::PaddleDType::INT32;
        out_data.resize(out_num);
@@ -139,10 +152,12 @@ void CompareRecord(std::map<std::string, Record> *truth_output_data,
                   float epislon = 1e-5) {
  for (const auto & [ key, value ] : *infer_output_data) {
    auto truth_record = (*truth_output_data)[key];
-    LOG(INFO) << "output name: " << key;
+    VLOG(1) << "output name: " << key;
    size_t numel = value.data.size() / sizeof(float);
    EXPECT_EQ(value.data.size(), truth_record.data.size());
    for (size_t i = 0; i < numel; ++i) {
+      VLOG(1) << "compare: " << value.data.data()[i] << ",\t"
+              << truth_record.data.data()[i];
      ASSERT_LT(fabs(value.data.data()[i] - truth_record.data.data()[i]),
                epislon);
    }