diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 1907bb93ccbfbcdb127e8b28de26fb499ab170b4..17d2c0c0eef8b6b35770157e1773c7f8efa3ab1b 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -66,6 +66,7 @@ function(inference_analysis_api_int8_test_run TARGET_NAME test_binary model_dir --infer_data=${data_path} --warmup_batch_size=${WARMUP_BATCH_SIZE} --batch_size=50 + --enable_int8=true --cpu_num_threads=${CPU_NUM_THREADS_ON_CI} --iterations=2) endfunction() @@ -81,6 +82,7 @@ function(inference_analysis_api_bfloat16_test_run TARGET_NAME test_binary model_ ARGS --infer_model=${model_dir}/model --infer_data=${data_path} --batch_size=50 + --enable_bf16=true --paddle_num_threads=${CPU_NUM_THREADS_ON_CI} --iterations=2) endfunction() @@ -92,6 +94,7 @@ function(inference_analysis_api_object_dection_int8_test_run TARGET_NAME test_bi --infer_data=${data_path} --warmup_batch_size=10 --batch_size=300 + --enable_int8=true --cpu_num_threads=${CPU_NUM_THREADS_ON_CI} --iterations=1) endfunction() @@ -115,6 +118,7 @@ function(inference_analysis_api_quant_test_run TARGET_NAME test_binary fp32_mode --int8_model=${int8_model_dir} --infer_data=${data_path} --batch_size=50 + --enable_int8=true --cpu_num_threads=${CPU_NUM_THREADS_ON_CI} --with_accuracy_layer=false --iterations=2) diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index db22ba59073bc5025355331f700c0acf0f3918fd..c9292ddc710e7e55d7767b03a552be7a8ee9c95d 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -50,8 +50,8 @@ DEFINE_bool(ernie_large, false, "Test ernie large"); DEFINE_bool(with_accuracy_layer, true, "Calculate the accuracy while label is in the input"); DEFINE_bool(enable_fp32, true, "Enable FP32 type prediction"); -DEFINE_bool(enable_bf16, true, "Enable BF16 type prediction"); -DEFINE_bool(enable_int8, true, "Enable INT8 type prediction"); +DEFINE_bool(enable_bf16, false, "Enable BF16 type prediction"); +DEFINE_bool(enable_int8, false, "Enable INT8 type prediction"); DEFINE_int32(warmup_batch_size, 100, "batch size for quantization warmup"); // setting iterations to 0 means processing the whole dataset DEFINE_int32(iterations, 0, "number of batches to process"); @@ -639,8 +639,9 @@ void TestPrediction(const PaddlePredictor::Config *config, } } -void SummarizeAccuracy(float avg_acc_fp32, float avg_acc_int8, - int compared_idx) { +void SummarizeAccuracy(float avg_acc_ref, float avg_acc, int compared_idx) { + std::string data_type_name = "INT8"; + if (FLAGS_enable_bf16) data_type_name = "BF16"; PADDLE_ENFORCE_LE( compared_idx, 2, platform::errors::InvalidArgument( @@ -659,12 +660,12 @@ void SummarizeAccuracy(float avg_acc_fp32, float avg_acc_int8, LOG(INFO) << "--- Accuracy summary --- "; LOG(INFO) << "Accepted " << prefix << "drop threshold: " << FLAGS_quantized_accuracy - << ". (condition: (FP32_" << prefix << " - INT8_" << prefix - << ") <= threshold)"; + << ". (condition: (FP32_" << prefix << " - " << data_type_name + << "_" << prefix << ") <= threshold)"; LOG(INFO) << "FP32: avg " << prefix << std::fixed << std::setw(6) - << std::setprecision(4) << avg_acc_fp32; - LOG(INFO) << "INT8: avg " << prefix << std::fixed << std::setw(6) - << std::setprecision(4) << avg_acc_int8; + << std::setprecision(4) << avg_acc_ref; + LOG(INFO) << data_type_name << ": avg " << prefix << std::fixed + << std::setw(6) << std::setprecision(4) << avg_acc; } void SummarizePerformance(const char *title, float sample) { @@ -677,8 +678,9 @@ void SummarizePerformance(const char *title, float sample) { void SummarizePerformance(const char *title_fp32, float sample_latency_fp32, const char *title, float sample_latency) { - SummarizePerformance(title_fp32, sample_latency_fp32); - SummarizePerformance(title, sample_latency); + if (FLAGS_enable_fp32) SummarizePerformance(title_fp32, sample_latency_fp32); + if (FLAGS_enable_int8 || FLAGS_enable_bf16) + SummarizePerformance(title, sample_latency); } float CompareAccuracyOne( @@ -733,7 +735,7 @@ void CompareAccuracy( const std::vector> &output_slots_quant, const std::vector> &output_slots_ref, int compared_idx) { - if ((FLAGS_enable_fp32 && FLAGS_enable_int8) && + if ((FLAGS_enable_fp32 && (FLAGS_enable_int8 || FLAGS_enable_bf16)) && (output_slots_quant.size() == 0 || output_slots_ref.size()) == 0) throw std::invalid_argument( "CompareAccuracy: output_slots vector is empty."); @@ -741,7 +743,7 @@ void CompareAccuracy( float avg_acc_quant = 0.0; float avg_acc_ref = 0.0; - if (FLAGS_enable_int8) + if (FLAGS_enable_int8 || FLAGS_enable_bf16) avg_acc_quant = CompareAccuracyOne(output_slots_quant, compared_idx); if (FLAGS_enable_fp32) @@ -751,9 +753,9 @@ void CompareAccuracy( if (FLAGS_enable_fp32) CHECK_GT(avg_acc_ref, 0.0); - if (FLAGS_enable_int8) CHECK_GT(avg_acc_quant, 0.0); + if (FLAGS_enable_int8 || FLAGS_enable_bf16) CHECK_GT(avg_acc_quant, 0.0); - if (FLAGS_enable_fp32 && FLAGS_enable_int8) + if (FLAGS_enable_fp32 && (FLAGS_enable_int8 || FLAGS_enable_bf16)) CHECK_LE(avg_acc_ref - avg_acc_quant, FLAGS_quantized_accuracy); }