Add two extra flags for test_analyzer_int8_image_classification to disable fp32/int8 (#19840)

test=develop

Add two extra flags for test_analyzer_int8_image_classification to disable fp32/int8 (#19840)
test=develop
2c5c6365 · pawelpiotrowicz · Tao Luo · cb65439d · 2c5c6365
隐藏空白更改
内联并排

Showing with 78 addition and 50 deletion

paddle/fluid/inference/tests/api/tester_helper.h paddle/fluid/inference/tests/api/tester_helper.h +78 -50

未找到文件。
--- a/paddle/fluid/inference/tests/api/tester_helper.h
+++ b/paddle/fluid/inference/tests/api/tester_helper.h
@@ -42,6 +42,8 @@ DEFINE_string(infer_model, "", "model path");
 DEFINE_string(infer_data, "", "data file");
 DEFINE_string(refer_result, "", "reference result for comparison");
 DEFINE_int32(batch_size, 1, "batch size");
+DEFINE_bool(enable_fp32, true, "Enable FP32 type prediction");
+DEFINE_bool(enable_int8, true, "Enable INT8 type prediction");
 DEFINE_int32(warmup_batch_size, 100, "batch size for quantization warmup");
 // setting iterations to 0 means processing the whole dataset
 DEFINE_int32(iterations, 0, "number of batches to process");
@@ -482,68 +484,88 @@ void SummarizeAccuracy(float avg_acc_fp32, float avg_acc_int8,
            << std::setprecision(4) << avg_acc_int8;
 }
+void SummarizePerformance(const char *title, float sample) {
+  CHECK_GT(sample, 0.0);
+  auto throughput = 1000.0 / sample;
+  LOG(INFO) << title << ": avg fps: " << std::fixed << std::setw(6)
+            << std::setprecision(4) << throughput << ", avg latency: " << sample
+            << " ms";
+}
 void SummarizePerformance(float sample_latency_fp32,
                          float sample_latency_int8) {
-  // sample latency in ms
+  if (FLAGS_enable_fp32) SummarizePerformance("FP32", sample_latency_fp32);
-  auto throughput_fp32 = 1000.0 / sample_latency_fp32;
+  if (FLAGS_enable_int8) SummarizePerformance("INT8", sample_latency_int8);
-  auto throughput_int8 = 1000.0 / sample_latency_int8;
-  LOG(INFO) << "--- Performance summary --- ";
-  LOG(INFO) << "FP32: avg fps: " << std::fixed << std::setw(6)
-            << std::setprecision(4) << throughput_fp32
-            << ", avg latency: " << sample_latency_fp32 << " ms";
-  LOG(INFO) << "INT8: avg fps: " << std::fixed << std::setw(6)
-            << std::setprecision(4) << throughput_int8
-            << ", avg latency: " << sample_latency_int8 << " ms";
 }
-void CompareAccuracy(
+float CompareAccuracyOne(
-    const std::vector<std::vector<PaddleTensor>> &output_slots_quant,
+    const std::vector<std::vector<PaddleTensor>> &output_slots,
-    const std::vector<std::vector<PaddleTensor>> &output_slots_ref,
    int compared_idx) {
-  if (output_slots_quant.size() == 0 || output_slots_ref.size() == 0)
+  if (output_slots.size() == 0)
    throw std::invalid_argument(
        "CompareAccuracy: output_slots vector is empty.");
-  float total_accs_quant{0};
+  float total_accs{0};
-  float total_accs_ref{0};
-  for (size_t i = 0; i < output_slots_quant.size(); ++i) {
+  for (size_t i = 0; i < output_slots.size(); ++i) {
-    if (compared_idx == 1) {
+    switch (compared_idx) {
-      PADDLE_ENFORCE_GE(
+      case 1:
-          output_slots_quant[i].size(), 2UL,
+        PADDLE_ENFORCE_GE(
-          "To achieve top 1 accuracy, output_slots_quant[i].size()>=2");
+            output_slots[i].size(), 2UL,
-      PADDLE_ENFORCE_GE(
+            "To achieve top 1 accuracy, output_slots_quant[i].size()>=2");
-          output_slots_ref[i].size(), 2UL,
+        break;
-          "To achieve top 1 accuracy, output_slots_ref[i].size()>=2");
+      case 2:
-    } else if (compared_idx == 2) {
+        PADDLE_ENFORCE_GE(
-      PADDLE_ENFORCE_GE(output_slots_quant[i].size(), 3UL,
+            output_slots[i].size(), 2UL,
-                        "To achieve mAP, output_slots_quant[i].size()>=3");
+            "To achieve top 1 accuracy, output_slots_ref[i].size()>=2");
-      PADDLE_ENFORCE_GE(output_slots_ref[i].size(), 3UL,
+        break;
-                        "To achieve mAP, output_slots_ref[i].size()>=3");
+      default:
-    } else {
+        throw std::invalid_argument(
-      throw std::invalid_argument(
+            "CompareAccuracy: compared_idx is out of range.");
-          "CompareAccuracy: compared_idx is out of range.");
    }
-    if (output_slots_quant[i][compared_idx].lod.size() > 0 ||
+    if (output_slots[i][compared_idx].lod.size() > 0)
-        output_slots_ref[i][compared_idx].lod.size() > 0)
      throw std::invalid_argument("CompareAccuracy: output has nonempty LoD.");
-    if (output_slots_quant[i][compared_idx].dtype !=
-            paddle::PaddleDType::FLOAT32 ||
+    if (output_slots[i][compared_idx].dtype != paddle::PaddleDType::FLOAT32)
-        output_slots_ref[i][compared_idx].dtype != paddle::PaddleDType::FLOAT32)
      throw std::invalid_argument(
          "CompareAccuracy: output is of a wrong type.");
-    total_accs_quant +=
-        *static_cast<float *>(output_slots_quant[i][compared_idx].data.data());
+    total_accs +=
-    total_accs_ref +=
+        *static_cast<float *>(output_slots[i][compared_idx].data.data());
-        *static_cast<float *>(output_slots_ref[i][compared_idx].data.data());
  }
-  float avg_acc_quant = total_accs_quant / output_slots_quant.size();
-  float avg_acc_ref = total_accs_ref / output_slots_ref.size();
+  CHECK_GT(output_slots.size(), 0);
+  return total_accs / output_slots.size();
+}
+void CompareAccuracy(
+    const std::vector<std::vector<PaddleTensor>> &output_slots_quant,
+    const std::vector<std::vector<PaddleTensor>> &output_slots_ref,
+    int compared_idx) {
+  if ((FLAGS_enable_fp32 && FLAGS_enable_int8) &&
+      (output_slots_quant.size() == 0 || output_slots_ref.size()) == 0)
+    throw std::invalid_argument(
+        "CompareAccuracy: output_slots vector is empty.");
+  float avg_acc_quant = 0.0;
+  float avg_acc_ref = 0.0;
+  if (FLAGS_enable_int8)
+    avg_acc_quant = CompareAccuracyOne(output_slots_quant, compared_idx);
+  if (FLAGS_enable_fp32)
+    avg_acc_ref = CompareAccuracyOne(output_slots_ref, compared_idx);
  SummarizeAccuracy(avg_acc_ref, avg_acc_quant, compared_idx);
-  CHECK_GT(avg_acc_ref, 0.0);
-  CHECK_GT(avg_acc_quant, 0.0);
+  if (FLAGS_enable_fp32) CHECK_GT(avg_acc_ref, 0.0);
-  CHECK_LE(avg_acc_ref - avg_acc_quant, FLAGS_quantized_accuracy);
+  if (FLAGS_enable_int8) CHECK_GT(avg_acc_quant, 0.0);
+  if (FLAGS_enable_fp32 && FLAGS_enable_int8)
+    CHECK_LE(avg_acc_ref - avg_acc_quant, FLAGS_quantized_accuracy);
 }
 void CompareDeterministic(
@@ -591,18 +613,24 @@ void CompareQuantizedAndAnalysis(
  PrintConfig(cfg, true);
  std::vector<std::vector<PaddleTensor>> analysis_outputs;
  float sample_latency_fp32{-1};
-  TestOneThreadPrediction(cfg, inputs, &analysis_outputs, true, VarType::FP32,
-                          &sample_latency_fp32);
+  if (FLAGS_enable_fp32) {
+    TestOneThreadPrediction(cfg, inputs, &analysis_outputs, true, VarType::FP32,
+                            &sample_latency_fp32);
+  }
  LOG(INFO) << "--- INT8 prediction start ---";
  auto *qcfg = reinterpret_cast<const PaddlePredictor::Config *>(qconfig);
  PrintConfig(qcfg, true);
  std::vector<std::vector<PaddleTensor>> quantized_outputs;
  float sample_latency_int8{-1};
-  TestOneThreadPrediction(qcfg, inputs, &quantized_outputs, true, VarType::INT8,
-                          &sample_latency_int8);
+  if (FLAGS_enable_int8) {
+    TestOneThreadPrediction(qcfg, inputs, &quantized_outputs, true,
+                            VarType::INT8, &sample_latency_int8);
+  }
  SummarizePerformance(sample_latency_fp32, sample_latency_int8);
  CompareAccuracy(quantized_outputs, analysis_outputs, compared_idx);
 }