Fix problem with flags fp32 and int8 (#27954)

840c521b · joanna.wozna.intel · GitHub · 5ccaaab8 · 840c521b · 840c521b
Showing with 21 addition and 15 deletion

paddle/fluid/inference/tests/api/CMakeLists.txt paddle/fluid/inference/tests/api/CMakeLists.txt +4 -0

paddle/fluid/inference/tests/api/tester_helper.h paddle/fluid/inference/tests/api/tester_helper.h +17 -15

未找到文件。
--- a/paddle/fluid/inference/tests/api/CMakeLists.txt
+++ b/paddle/fluid/inference/tests/api/CMakeLists.txt
@@ -66,6 +66,7 @@ function(inference_analysis_api_int8_test_run TARGET_NAME test_binary model_dir
             --infer_data=${data_path}
             --warmup_batch_size=${WARMUP_BATCH_SIZE}
             --batch_size=50
+             --enable_int8=true
             --cpu_num_threads=${CPU_NUM_THREADS_ON_CI}
 	     --iterations=2)
 endfunction()
@@ -81,6 +82,7 @@ function(inference_analysis_api_bfloat16_test_run TARGET_NAME test_binary model_
        ARGS --infer_model=${model_dir}/model
             --infer_data=${data_path}
             --batch_size=50
+             --enable_bf16=true
             --paddle_num_threads=${CPU_NUM_THREADS_ON_CI}
 	     --iterations=2)
 endfunction()
@@ -92,6 +94,7 @@ function(inference_analysis_api_object_dection_int8_test_run TARGET_NAME test_bi
             --infer_data=${data_path}
             --warmup_batch_size=10
             --batch_size=300
+             --enable_int8=true
             --cpu_num_threads=${CPU_NUM_THREADS_ON_CI}
 	     --iterations=1)
 endfunction()
@@ -115,6 +118,7 @@ function(inference_analysis_api_quant_test_run TARGET_NAME test_binary fp32_mode
             --int8_model=${int8_model_dir}
             --infer_data=${data_path}
             --batch_size=50
+             --enable_int8=true
             --cpu_num_threads=${CPU_NUM_THREADS_ON_CI}
             --with_accuracy_layer=false
             --iterations=2)

--- a/paddle/fluid/inference/tests/api/tester_helper.h
+++ b/paddle/fluid/inference/tests/api/tester_helper.h
@@ -50,8 +50,8 @@ DEFINE_bool(ernie_large, false, "Test ernie large");
 DEFINE_bool(with_accuracy_layer, true,
            "Calculate the accuracy while label is in the input");
 DEFINE_bool(enable_fp32, true, "Enable FP32 type prediction");
-DEFINE_bool(enable_bf16, true, "Enable BF16 type prediction");
-DEFINE_bool(enable_int8, true, "Enable INT8 type prediction");
+DEFINE_bool(enable_bf16, false, "Enable BF16 type prediction");
+DEFINE_bool(enable_int8, false, "Enable INT8 type prediction");
 DEFINE_int32(warmup_batch_size, 100, "batch size for quantization warmup");
 // setting iterations to 0 means processing the whole dataset
 DEFINE_int32(iterations, 0, "number of batches to process");
@@ -639,8 +639,9 @@ void TestPrediction(const PaddlePredictor::Config *config,
  }
 }

-void SummarizeAccuracy(float avg_acc_fp32, float avg_acc_int8,
-                       int compared_idx) {
+void SummarizeAccuracy(float avg_acc_ref, float avg_acc, int compared_idx) {
+  std::string data_type_name = "INT8";
+  if (FLAGS_enable_bf16) data_type_name = "BF16";
  PADDLE_ENFORCE_LE(
      compared_idx, 2,
      platform::errors::InvalidArgument(
@@ -659,12 +660,12 @@ void SummarizeAccuracy(float avg_acc_fp32, float avg_acc_int8,
  LOG(INFO) << "--- Accuracy summary --- ";
  LOG(INFO) << "Accepted " << prefix
            << "drop threshold: " << FLAGS_quantized_accuracy
-            << ". (condition: (FP32_" << prefix << " - INT8_" << prefix
-            << ") <= threshold)";
+            << ". (condition: (FP32_" << prefix << " - " << data_type_name
+            << "_" << prefix << ") <= threshold)";
  LOG(INFO) << "FP32: avg " << prefix << std::fixed << std::setw(6)
-            << std::setprecision(4) << avg_acc_fp32;
-  LOG(INFO) << "INT8: avg " << prefix << std::fixed << std::setw(6)
-            << std::setprecision(4) << avg_acc_int8;
+            << std::setprecision(4) << avg_acc_ref;
+  LOG(INFO) << data_type_name << ": avg " << prefix << std::fixed
+            << std::setw(6) << std::setprecision(4) << avg_acc;
 }

 void SummarizePerformance(const char *title, float sample) {
@@ -677,8 +678,9 @@ void SummarizePerformance(const char *title, float sample) {

 void SummarizePerformance(const char *title_fp32, float sample_latency_fp32,
                          const char *title, float sample_latency) {
-  SummarizePerformance(title_fp32, sample_latency_fp32);
-  SummarizePerformance(title, sample_latency);
+  if (FLAGS_enable_fp32) SummarizePerformance(title_fp32, sample_latency_fp32);
+  if (FLAGS_enable_int8 || FLAGS_enable_bf16)
+    SummarizePerformance(title, sample_latency);
 }

 float CompareAccuracyOne(
@@ -733,7 +735,7 @@ void CompareAccuracy(
    const std::vector<std::vector<PaddleTensor>> &output_slots_quant,
    const std::vector<std::vector<PaddleTensor>> &output_slots_ref,
    int compared_idx) {
-  if ((FLAGS_enable_fp32 && FLAGS_enable_int8) &&
+  if ((FLAGS_enable_fp32 && (FLAGS_enable_int8 || FLAGS_enable_bf16)) &&
      (output_slots_quant.size() == 0 || output_slots_ref.size()) == 0)
    throw std::invalid_argument(
        "CompareAccuracy: output_slots vector is empty.");
@@ -741,7 +743,7 @@ void CompareAccuracy(
  float avg_acc_quant = 0.0;
  float avg_acc_ref = 0.0;

-  if (FLAGS_enable_int8)
+  if (FLAGS_enable_int8 || FLAGS_enable_bf16)
    avg_acc_quant = CompareAccuracyOne(output_slots_quant, compared_idx);

  if (FLAGS_enable_fp32)
@@ -751,9 +753,9 @@ void CompareAccuracy(

  if (FLAGS_enable_fp32) CHECK_GT(avg_acc_ref, 0.0);

-  if (FLAGS_enable_int8) CHECK_GT(avg_acc_quant, 0.0);
+  if (FLAGS_enable_int8 || FLAGS_enable_bf16) CHECK_GT(avg_acc_quant, 0.0);

-  if (FLAGS_enable_fp32 && FLAGS_enable_int8)
+  if (FLAGS_enable_fp32 && (FLAGS_enable_int8 || FLAGS_enable_bf16))
    CHECK_LE(avg_acc_ref - avg_acc_quant, FLAGS_quantized_accuracy);
 }