cherry-pick from #4348. test=develop

e507e7b5 · ysh329 · 开心的小妮 · e439268f · e507e7b5 · e507e7b5
4 changed file
--- a/lite/core/optimizer.h
+++ b/lite/core/optimizer.h
@@ -143,10 +143,7 @@ class Optimizer {

           "mlu_postprocess_pass",

-#ifndef LITE_WITH_PRECISION_PROFILE
-           "memory_optimize_pass"
-#endif
-          }};
+           "memory_optimize_pass"}};

      if (passes.size() == 1) {
        // multi_stream_analysis_pass must be in the front of

--- a/lite/core/profile/precision_profiler.h
+++ b/lite/core/profile/precision_profiler.h
@@ -23,6 +23,8 @@
 #include <time.h>

 #include <cmath>
+#include <cstdlib>
+#include <map>
 #include <memory>
 #include <string>
 #include <vector>
@@ -127,7 +129,14 @@ class PrecisionProfiler {
    std::string inst_precison_str = GetInstPrecision(inst);
  }

-  PrecisionProfiler() { MkDirRecur(log_dir_); }
+  PrecisionProfiler() {
+    MkDirRecur(log_dir_);
+    const char* write_to_file_raw =
+        std::getenv("PADDLELITE_PRECISION_WRITE_TO_FILE");
+    write_result_to_file_ = (write_to_file_raw && atoi(write_to_file_raw) > 0)
+                                ? atoi(write_to_file_raw) > 0
+                                : false;
+  }

  std::string GetSummaryHeader() {
    using std::setw;
@@ -154,6 +163,18 @@ class PrecisionProfiler {
    return ss.str();
  }

+  std::string GetSummaryTail() {
+    STL::stringstream ss;
+    ss << "[note]" << std::endl;
+    ss << "1. `ave_grow_rate`: show the sequence value of tensor when std_dev "
+          "& mean are same."
+       << std::endl;
+    ss << "2. Enable write each output tensor to file: `export "
+          "PADDLELITE_PRECISION_WRITE_TO_FILE=1` on ADB command line."
+       << std::endl;
+    return ss.str();
+  }
+
  template <typename T>
  double compute_mean(const T* in, const size_t length) {
    double sum = 0.;
@@ -199,6 +220,17 @@ class PrecisionProfiler {
    return false;
  }

+  std::string rename_out_for_mem_reuse_pass(const std::string& old_name) {
+    if (out_tensor_names_map.find(old_name) == out_tensor_names_map.end()) {
+      out_tensor_names_map[old_name] = 1;
+    } else {
+      ++out_tensor_names_map[old_name];
+    }
+    std::string new_name =
+        old_name + "_" + std::to_string(out_tensor_names_map[old_name]);
+    return new_name;
+  }
+
  void compute_tensor_precision_info(const Tensor* in,
                                     TargetType target_type,
                                     PrecisionType precision_type,
@@ -350,13 +382,12 @@ class PrecisionProfiler {
    using std::left;
    using std::fixed;
    STL::stringstream ss;
-    bool write_result_to_file = true;

    VLOG(1) << ">> Running kernel: " << inst->op()->op_info()->Repr()
            << " registered on " << TargetToStr(inst->kernel()->target()) << "/"
            << PrecisionToStr(inst->kernel()->precision()) << "/"
            << DataLayoutToStr(inst->kernel()->layout())
-            << ", write_result_to_file:" << write_result_to_file;
+            << ", write_result_to_file_:" << write_result_to_file_;

    std::string kernel_repr = inst->op()->op_info()->Repr();
    std::string kernel_place = TargetToStr(inst->kernel()->target()) + "/" +
@@ -383,6 +414,7 @@ class PrecisionProfiler {
          std::string mean_str{"unused"};
          std::string std_dev_str{"unused"};
          std::string ave_grow_rate_str{"unused"};
+          std::string new_out_name = rename_out_for_mem_reuse_pass(out_name);

          if (!is_unused(tout)) {
            compute_tensor_precision_info(tout,
@@ -392,14 +424,14 @@ class PrecisionProfiler {
                                          &mean,
                                          &std_dev,
                                          &ave_grow_rate,
-                                          out_name,
-                                          write_result_to_file);
+                                          new_out_name,
+                                          write_result_to_file_);
            mean_str = std::to_string(mean);
            std_dev_str = std::to_string(std_dev);
            ave_grow_rate_str = std::to_string(ave_grow_rate);
          }
          std::string kernel_info = op_name + ":" + kernel_place;
-          std::string output_arg_info = out_name + ":" +
+          std::string output_arg_info = new_out_name + ":" +
                                        TargetToStr(type->target()) + "/" +
                                        PrecisionToStr(type->precision()) +
                                        "/" + DataLayoutToStr(type->layout());
@@ -420,6 +452,7 @@ class PrecisionProfiler {
            std::string mean_str{"unused"};
            std::string std_dev_str{"unused"};
            std::string ave_grow_rate_str{"unused"};
+            std::string new_out_name = rename_out_for_mem_reuse_pass(out_name);

            if (!is_unused(tout)) {
              compute_tensor_precision_info(tout,
@@ -429,14 +462,14 @@ class PrecisionProfiler {
                                            &mean,
                                            &std_dev,
                                            &ave_grow_rate,
-                                            out_name,
-                                            write_result_to_file);
+                                            new_out_name,
+                                            write_result_to_file_);
              mean_str = std::to_string(mean);
              std_dev_str = std::to_string(std_dev);
              ave_grow_rate_str = std::to_string(ave_grow_rate);
            }
            std::string kernel_info = op_name + ":" + kernel_place;
-            std::string output_arg_info = out_name + ":" +
+            std::string output_arg_info = new_out_name + ":" +
                                          TargetToStr(type->target()) + "/" +
                                          PrecisionToStr(type->precision()) +
                                          "/" + DataLayoutToStr(type->layout());
@@ -458,6 +491,8 @@ class PrecisionProfiler {
  std::string log_dir_{"/storage/emulated/0/PaddleLite_" + get_date_str() +
                       "/"};
  std::string summary_log_dir_{log_dir_ + "precision_summary.log"};
+  std::map<std::string, size_t> out_tensor_names_map;
+  bool write_result_to_file_{false};
 };

 }  // namespace profile

--- a/lite/core/program.cc
+++ b/lite/core/program.cc
@@ -284,7 +284,9 @@ void RuntimeProgram::Run() {
  LOG(INFO) << "\n" << profiler_.Summary(profile::Type::kDispatch, false, 1);
 #endif
 #ifdef LITE_WITH_PRECISION_PROFILE
-  LOG(INFO) << "\n" << precision_profiler_summary;
+  LOG(INFO) << "\n"
+            << precision_profiler_summary
+            << inst_precision_profiler.GetSummaryTail();
 #endif
 }


--- a/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc
+++ b/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc
@@ -29,6 +29,21 @@ int64_t ShapeProduction(const shape_t& shape) {
  return res;
 }

+std::string ShapePrint(const std::vector<shape_t>& shapes) {
+  std::string shapes_str{""};
+  for (size_t shape_idx = 0; shape_idx < shapes.size(); ++shape_idx) {
+    auto shape = shapes[shape_idx];
+    std::string shape_str;
+    for (auto i : shape) {
+      shape_str += std::to_string(i) + ",";
+    }
+    shapes_str += shape_str;
+    shapes_str +=
+        (shape_idx != 0 && shape_idx == shapes.size() - 1) ? "" : " : ";
+  }
+  return shapes_str;
+}
+
 std::string ShapePrint(const shape_t& shape) {
  std::string shape_str{""};
  for (auto i : shape) {
@@ -37,6 +52,37 @@ std::string ShapePrint(const shape_t& shape) {
  return shape_str;
 }

+std::vector<std::string> split_string(const std::string& str_in) {
+  std::vector<std::string> str_out;
+  std::string tmp_str = str_in;
+  while (!tmp_str.empty()) {
+    size_t next_offset = tmp_str.find(":");
+    str_out.push_back(tmp_str.substr(0, next_offset));
+    if (next_offset == std::string::npos) {
+      break;
+    } else {
+      tmp_str = tmp_str.substr(next_offset + 1);
+    }
+  }
+  return str_out;
+}
+
+std::vector<int64_t> get_shape(const std::string& str_shape) {
+  std::vector<int64_t> shape;
+  std::string tmp_str = str_shape;
+  while (!tmp_str.empty()) {
+    int dim = atoi(tmp_str.data());
+    shape.push_back(dim);
+    size_t next_offset = tmp_str.find(",");
+    if (next_offset == std::string::npos) {
+      break;
+    } else {
+      tmp_str = tmp_str.substr(next_offset + 1);
+    }
+  }
+  return shape;
+}
+
 template <typename T>
 double compute_mean(const T* in, const size_t length) {
  double sum = 0.;
@@ -70,7 +116,7 @@ inline double GetCurrentUS() {
 }

 void RunModel(std::string model_dir,
-              const shape_t& input_shape,
+              const std::vector<shape_t>& input_shapes,
              size_t repeats,
              size_t warmup,
              size_t print_output_elem,
@@ -111,12 +157,19 @@ void RunModel(std::string model_dir,
      CreatePaddlePredictor<MobileConfig>(config);

  // 3. Prepare input data
-  std::unique_ptr<Tensor> input_tensor(std::move(predictor->GetInput(0)));
-  input_tensor->Resize(
-      {input_shape[0], input_shape[1], input_shape[2], input_shape[3]});
-  auto* data = input_tensor->mutable_data<float>();
-  for (int i = 0; i < ShapeProduction(input_tensor->shape()); ++i) {
-    data[i] = 1;
+  std::cout << "input_shapes.size():" << input_shapes.size() << std::endl;
+  for (int j = 0; j < input_shapes.size(); ++j) {
+    auto input_tensor = predictor->GetInput(j);
+    input_tensor->Resize(input_shapes[j]);
+    auto input_data = input_tensor->mutable_data<float>();
+    int input_num = 1;
+    for (int i = 0; i < input_shapes[j].size(); ++i) {
+      input_num *= input_shapes[j][i];
+    }
+
+    for (int i = 0; i < input_num; ++i) {
+      input_data[i] = 1.f;
+    }
  }

  // 4. Run predictor
@@ -142,7 +195,7 @@ void RunModel(std::string model_dir,
  }
  avg_duration = sum_duration / static_cast<float>(repeats);
  std::cout << "\n======= benchmark summary =======\n"
-            << "input_shape(NCHW):" << ShapePrint(input_shape) << "\n"
+            << "input_shape(s) (NCHW):" << ShapePrint(input_shapes) << "\n"
            << "model_dir:" << model_dir << "\n"
            << "warmup:" << warmup << "\n"
            << "repeats:" << repeats << "\n"
@@ -184,18 +237,19 @@ void RunModel(std::string model_dir,
 }

 int main(int argc, char** argv) {
-  shape_t input_shape{1, 3, 224, 224};  // shape_t ==> std::vector<int64_t>
+  std::vector<std::string> str_input_shapes;
+  std::vector<shape_t> input_shapes{
+      {1, 3, 224, 224}};  // shape_t ==> std::vector<int64_t>
+
  int repeats = 10;
  int warmup = 10;
  int print_output_elem = 0;

-  if (argc > 2 && argc < 9) {
+  if (argc > 2 && argc < 6) {
    std::cerr << "usage: ./" << argv[0] << "\n"
              << "  <naive_buffer_model_dir>\n"
-              << "  <input_n>\n"
-              << "  <input_c>\n"
-              << "  <input_h>\n"
-              << "  <input_w>\n"
+              << "  <raw_input_shapes>, eg: 1,3,224,224 for 1 input; "
+                 "1,3,224,224:1,5 for 2 inputs\n"
              << "  <repeats>\n"
              << "  <warmup>\n"
              << "  <print_output>" << std::endl;
@@ -203,14 +257,19 @@ int main(int argc, char** argv) {
  }

  std::string model_dir = argv[1];
-  if (argc >= 9) {
-    input_shape[0] = atoi(argv[2]);
-    input_shape[1] = atoi(argv[3]);
-    input_shape[2] = atoi(argv[4]);
-    input_shape[3] = atoi(argv[5]);
-    repeats = atoi(argv[6]);
-    warmup = atoi(argv[7]);
-    print_output_elem = atoi(argv[8]);
+  if (argc >= 6) {
+    input_shapes.clear();
+    std::string raw_input_shapes = argv[2];
+    std::cout << "raw_input_shapes: " << raw_input_shapes << std::endl;
+    str_input_shapes = split_string(raw_input_shapes);
+    for (size_t i = 0; i < str_input_shapes.size(); ++i) {
+      std::cout << "input shape: " << str_input_shapes[i] << std::endl;
+      input_shapes.push_back(get_shape(str_input_shapes[i]));
+    }
+
+    repeats = atoi(argv[3]);
+    warmup = atoi(argv[4]);
+    print_output_elem = atoi(argv[5]);
  }
  // set arm power mode:
  // 0 for big cluster, high performance
@@ -220,7 +279,7 @@ int main(int argc, char** argv) {
  size_t power_mode = 0;

  RunModel(
-      model_dir, input_shape, repeats, warmup, print_output_elem, power_mode);
+      model_dir, input_shapes, repeats, warmup, print_output_elem, power_mode);

  return 0;
 }