Merge pull request #16681 from luotao1/release/1.4_int8_cache

test_analyzer_int8 tests use default pass order

Merge pull request #16681 from luotao1/release/1.4_int8_cache
test_analyzer_int8 tests use default pass order
c6e6cdc9 · Tao Luo · GitHub · 1e21e8b5 · 53c6890a · c6e6cdc9
3 changed file
--- a/paddle/fluid/inference/api/paddle_pass_builder.cc
+++ b/paddle/fluid/inference/api/paddle_pass_builder.cc
@@ -87,6 +87,7 @@ const std::vector<std::string> kAnakinSubgraphPasses({
 GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
  passes_.assign({
    "infer_clean_graph_pass",          //
+        "runtime_context_cache_pass",  //
        //   "identity_scale_op_clean_pass",              //
        "conv_affine_channel_fuse_pass",             //
        "conv_eltwiseadd_affine_channel_fuse_pass",  //
@@ -96,7 +97,6 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
        "conv_elementwise_add_act_fuse_pass",   //
        "conv_elementwise_add2_act_fuse_pass",  //
        "conv_elementwise_add_fuse_pass",       //
-        "runtime_context_cache_pass",           //
 #endif                                          //
        "transpose_flatten_concat_fuse_pass",
  });
@@ -117,6 +117,10 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
  // not be damaged by smaller ones.
  passes_.assign({
      "infer_clean_graph_pass",  //
+      // TODO(luotao): runtime_context_cache_pass should be located in the
+      // front, see https://github.com/PaddlePaddle/Paddle/issues/16609,
+      // will enhance this pass later.
+      "runtime_context_cache_pass",     //
      "attention_lstm_fuse_pass",       //
      "seqpool_concat_fuse_pass",       //
      "seqconv_eltadd_relu_fuse_pass",  //
@@ -132,8 +136,6 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
      "conv_bn_fuse_pass",             //
      "conv_eltwiseadd_bn_fuse_pass",  //
      "is_test_pass",                  //
-      "identity_scale_op_clean_pass",  //
-      "runtime_context_cache_pass",    //
  });
  use_gpu_ = false;
 }

--- a/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc
@@ -23,18 +23,11 @@ namespace analysis {

 void SetConfig(AnalysisConfig *cfg) {
  cfg->SetModel(FLAGS_infer_model);
-  cfg->SetProgFile("__model__");
  cfg->DisableGpu();
  cfg->SwitchIrOptim();
-  cfg->SwitchSpecifyInputNames(false);
+  cfg->SwitchSpecifyInputNames();
  cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads);
  cfg->EnableMKLDNN();
-  cfg->pass_builder()->SetPasses(
-      {"infer_clean_graph_pass", "mkldnn_placement_pass",
-       "depthwise_conv_mkldnn_pass", "conv_bn_fuse_pass",
-       "conv_eltwiseadd_bn_fuse_pass", "conv_bias_mkldnn_fuse_pass",
-       "conv_elementwise_add_mkldnn_fuse_pass", "conv_relu_mkldnn_fuse_pass",
-       "fc_fuse_pass", "is_test_pass"});
 }

 template <typename T>
@@ -84,13 +77,13 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
          std::to_string(num_images) + " is bigger than all test data size.");

  PaddleTensor images;
-  images.name = "input";
+  images.name = "image";
  images.shape = {num_images, 3, 224, 224};
  images.dtype = PaddleDType::FLOAT32;
  images.data.Resize(sizeof(float) * num_images * 3 * 224 * 224);

  PaddleTensor labels;
-  labels.name = "labels";
+  labels.name = "label";
  labels.shape = {num_images, 1};
  labels.dtype = PaddleDType::INT64;
  labels.data.Resize(sizeof(int64_t) * num_images);
@@ -132,7 +125,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs,
      images_offset_in_file + sizeof(float) * total_images * 3 * 224 * 224;

  TensorReader<float> image_reader(file, images_offset_in_file,
-                                   image_batch_shape, "input");
+                                   image_batch_shape, "image");
  TensorReader<int64_t> label_reader(file, labels_offset_in_file,
                                     label_batch_shape, "label");


--- a/paddle/fluid/inference/tests/api/tester_helper.h
+++ b/paddle/fluid/inference/tests/api/tester_helper.h
@@ -316,7 +316,8 @@ void PredictionRun(PaddlePredictor *predictor,
                   int num_threads, int tid) {
  int num_times = FLAGS_repeat;
  int iterations = inputs.size();  // process the whole dataset ...
-  if (FLAGS_iterations > 0 && FLAGS_iterations < inputs.size())
+  if (FLAGS_iterations > 0 &&
+      FLAGS_iterations < static_cast<int64_t>(inputs.size()))
    iterations =
        FLAGS_iterations;  // ... unless the number of iterations is set
  outputs->resize(iterations);
@@ -329,14 +330,14 @@ void PredictionRun(PaddlePredictor *predictor,
 #endif
  if (!FLAGS_zero_copy) {
    run_timer.tic();
-    for (size_t i = 0; i < iterations; i++) {
+    for (int i = 0; i < iterations; i++) {
      for (int j = 0; j < num_times; j++) {
        predictor->Run(inputs[i], &(*outputs)[i], FLAGS_batch_size);
      }
    }
    elapsed_time = run_timer.toc();
  } else {
-    for (size_t i = 0; i < iterations; i++) {
+    for (int i = 0; i < iterations; i++) {
      ConvertPaddleTensorToZeroCopyTensor(predictor, inputs[i]);
      run_timer.tic();
      for (int j = 0; j < num_times; j++) {
@@ -366,9 +367,8 @@ void TestOneThreadPrediction(
    const std::vector<std::vector<PaddleTensor>> &inputs,
    std::vector<std::vector<PaddleTensor>> *outputs, bool use_analysis = true) {
  auto predictor = CreateTestPredictor(config, use_analysis);
-  PredictionWarmUp(predictor.get(), inputs, outputs, FLAGS_paddle_num_threads,
-                   0);
-  PredictionRun(predictor.get(), inputs, outputs, FLAGS_paddle_num_threads, 0);
+  PredictionWarmUp(predictor.get(), inputs, outputs, 1, 0);
+  PredictionRun(predictor.get(), inputs, outputs, 1, 0);
 }

 void TestMultiThreadPrediction(