improved unit test output (#17266)

added printing data type to differentiate int8 and fp32 latency results test=develop

improved unit test output (#17266)
added printing data type to differentiate int8 and fp32 latency results test=develop
984aa905 · Wojciech Uss · Tao Luo · 65541d83 · 984aa905 · 984aa905
Showing with 29 addition and 14 deletion

paddle/fluid/inference/api/helper.h paddle/fluid/inference/api/helper.h +12 -3

paddle/fluid/inference/tests/api/tester_helper.h paddle/fluid/inference/tests/api/tester_helper.h +17 -11

未找到文件。
--- a/paddle/fluid/inference/api/helper.h
+++ b/paddle/fluid/inference/api/helper.h
@@ -26,14 +26,20 @@
 #include <sstream>
 #include <string>
 #include <vector>
+#include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/inference/api/paddle_inference_api.h"
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/port.h"
 #include "paddle/fluid/string/printf.h"

+extern std::string paddle::framework::DataTypeToString(
+    const framework::proto::VarType::Type type);
+
 namespace paddle {
 namespace inference {

+using paddle::framework::DataTypeToString;
+
 // Timer for timer
 class Timer {
 public:
@@ -267,17 +273,20 @@ static std::string DescribeZeroCopyTensor(const ZeroCopyTensor &tensor) {
 }

 static void PrintTime(int batch_size, int repeat, int num_threads, int tid,
-                      double batch_latency, int epoch = 1) {
+                      double batch_latency, int epoch = 1,
+                      const framework::proto::VarType::Type data_type =
+                          framework::proto::VarType::FP32) {
  PADDLE_ENFORCE(batch_size > 0, "Non-positive batch size.");
  double sample_latency = batch_latency / batch_size;
  LOG(INFO) << "====== threads: " << num_threads << ", thread id: " << tid
            << " ======";
-  LOG(INFO) << "====== batch_size: " << batch_size << ", iterations: " << epoch
+  LOG(INFO) << "====== batch size: " << batch_size << ", iterations: " << epoch
            << ", repetitions: " << repeat << " ======";
  LOG(INFO) << "====== batch latency: " << batch_latency
            << "ms, number of samples: " << batch_size * epoch
            << ", sample latency: " << sample_latency
-            << "ms, fps: " << 1000.f / sample_latency << " ======";
+            << "ms, fps: " << 1000.f / sample_latency
+            << ", data type: " << DataTypeToString(data_type) << " ======";
 }

 static bool IsFileExists(const std::string &path) {

--- a/paddle/fluid/inference/tests/api/tester_helper.h
+++ b/paddle/fluid/inference/tests/api/tester_helper.h
@@ -65,6 +65,8 @@ DECLARE_int32(paddle_num_threads);
 namespace paddle {
 namespace inference {

+using paddle::framework::proto::VarType;
+
 template <typename T>
 constexpr paddle::PaddleDType GetPaddleDType();

@@ -293,7 +295,8 @@ void ConvertPaddleTensorToZeroCopyTensor(
 void PredictionWarmUp(PaddlePredictor *predictor,
                      const std::vector<std::vector<PaddleTensor>> &inputs,
                      std::vector<std::vector<PaddleTensor>> *outputs,
-                      int num_threads, int tid) {
+                      int num_threads, int tid,
+                      const VarType::Type data_type = VarType::FP32) {
  int batch_size = FLAGS_batch_size;
  LOG(INFO) << "Running thread " << tid << ", warm up run...";
  if (FLAGS_zero_copy) {
@@ -307,7 +310,7 @@ void PredictionWarmUp(PaddlePredictor *predictor,
  } else {
    predictor->ZeroCopyRun();
  }
-  PrintTime(batch_size, 1, num_threads, tid, warmup_timer.toc(), 1);
+  PrintTime(batch_size, 1, num_threads, tid, warmup_timer.toc(), 1, data_type);
  if (FLAGS_profile) {
    paddle::platform::ResetProfiler();
  }
@@ -316,7 +319,8 @@ void PredictionWarmUp(PaddlePredictor *predictor,
 void PredictionRun(PaddlePredictor *predictor,
                   const std::vector<std::vector<PaddleTensor>> &inputs,
                   std::vector<std::vector<PaddleTensor>> *outputs,
-                   int num_threads, int tid) {
+                   int num_threads, int tid,
+                   const VarType::Type data_type = VarType::FP32) {
  int num_times = FLAGS_repeat;
  int iterations = inputs.size();  // process the whole dataset ...
  if (FLAGS_iterations > 0 &&
@@ -355,7 +359,7 @@ void PredictionRun(PaddlePredictor *predictor,

  auto batch_latency = elapsed_time / (iterations * num_times);
  PrintTime(FLAGS_batch_size, num_times, num_threads, tid, batch_latency,
-            iterations);
+            iterations, data_type);
  if (FLAGS_record_benchmark) {
    Benchmark benchmark;
    benchmark.SetName(FLAGS_model_name);
@@ -368,12 +372,13 @@ void PredictionRun(PaddlePredictor *predictor,
 void TestOneThreadPrediction(
    const PaddlePredictor::Config *config,
    const std::vector<std::vector<PaddleTensor>> &inputs,
-    std::vector<std::vector<PaddleTensor>> *outputs, bool use_analysis = true) {
+    std::vector<std::vector<PaddleTensor>> *outputs, bool use_analysis = true,
+    const VarType::Type data_type = VarType::FP32) {
  auto predictor = CreateTestPredictor(config, use_analysis);
  if (FLAGS_warmup) {
-    PredictionWarmUp(predictor.get(), inputs, outputs, 1, 0);
+    PredictionWarmUp(predictor.get(), inputs, outputs, 1, 0, data_type);
  }
-  PredictionRun(predictor.get(), inputs, outputs, 1, 0);
+  PredictionRun(predictor.get(), inputs, outputs, 1, 0, data_type);
 }

 void TestMultiThreadPrediction(
@@ -505,13 +510,14 @@ void CompareQuantizedAndAnalysis(
  auto *cfg = reinterpret_cast<const PaddlePredictor::Config *>(config);
  PrintConfig(cfg, true);
  std::vector<std::vector<PaddleTensor>> analysis_outputs;
-  TestOneThreadPrediction(cfg, inputs, &analysis_outputs, true);
+  TestOneThreadPrediction(cfg, inputs, &analysis_outputs, true, VarType::FP32);

  LOG(INFO) << "--- INT8 prediction start ---";
  auto *qcfg = reinterpret_cast<const PaddlePredictor::Config *>(qconfig);
  PrintConfig(qcfg, true);
  std::vector<std::vector<PaddleTensor>> quantized_outputs;
-  TestOneThreadPrediction(qcfg, inputs, &quantized_outputs, true);
+  TestOneThreadPrediction(qcfg, inputs, &quantized_outputs, true,
+                          VarType::INT8);

  LOG(INFO) << "--- comparing outputs --- ";
  CompareTopAccuracy(quantized_outputs, analysis_outputs);
@@ -640,7 +646,7 @@ static bool CompareTensorData(const framework::LoDTensor &a,
  }

  for (size_t i = 0; i < a_size; i++) {
-    if (a.type() == framework::proto::VarType::FP32) {
+    if (a.type() == VarType::FP32) {
      const auto *a_data = a.data<float>();
      const auto *b_data = b.data<float>();
      if (std::abs(a_data[i] - b_data[i]) > 1e-3) {
@@ -649,7 +655,7 @@ static bool CompareTensorData(const framework::LoDTensor &a,
            b_data[i]);
        return false;
      }
-    } else if (a.type() == framework::proto::VarType::INT64) {
+    } else if (a.type() == VarType::INT64) {
      const auto *a_data = a.data<int64_t>();
      const auto *b_data = b.data<int64_t>();
      if (std::abs(a_data[i] - b_data[i]) > 1e-3) {