From 2c5c6365144a1bc9f36be2af53758c391b321f84 Mon Sep 17 00:00:00 2001 From: pawelpiotrowicz <48519735+pawelpiotrowicz@users.noreply.github.com> Date: Sat, 21 Sep 2019 13:16:21 +0200 Subject: [PATCH] Add two extra flags for test_analyzer_int8_image_classification to disable fp32/int8 (#19840) test=develop --- .../fluid/inference/tests/api/tester_helper.h | 128 +++++++++++------- 1 file changed, 78 insertions(+), 50 deletions(-) diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 4b751a1cc94..463fc4b12fc 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -42,6 +42,8 @@ DEFINE_string(infer_model, "", "model path"); DEFINE_string(infer_data, "", "data file"); DEFINE_string(refer_result, "", "reference result for comparison"); DEFINE_int32(batch_size, 1, "batch size"); +DEFINE_bool(enable_fp32, true, "Enable FP32 type prediction"); +DEFINE_bool(enable_int8, true, "Enable INT8 type prediction"); DEFINE_int32(warmup_batch_size, 100, "batch size for quantization warmup"); // setting iterations to 0 means processing the whole dataset DEFINE_int32(iterations, 0, "number of batches to process"); @@ -482,68 +484,88 @@ void SummarizeAccuracy(float avg_acc_fp32, float avg_acc_int8, << std::setprecision(4) << avg_acc_int8; } +void SummarizePerformance(const char *title, float sample) { + CHECK_GT(sample, 0.0); + auto throughput = 1000.0 / sample; + LOG(INFO) << title << ": avg fps: " << std::fixed << std::setw(6) + << std::setprecision(4) << throughput << ", avg latency: " << sample + << " ms"; +} + void SummarizePerformance(float sample_latency_fp32, float sample_latency_int8) { - // sample latency in ms - auto throughput_fp32 = 1000.0 / sample_latency_fp32; - auto throughput_int8 = 1000.0 / sample_latency_int8; - LOG(INFO) << "--- Performance summary --- "; - LOG(INFO) << "FP32: avg fps: " << std::fixed << std::setw(6) - << std::setprecision(4) << throughput_fp32 - << ", avg latency: " << sample_latency_fp32 << " ms"; - LOG(INFO) << "INT8: avg fps: " << std::fixed << std::setw(6) - << std::setprecision(4) << throughput_int8 - << ", avg latency: " << sample_latency_int8 << " ms"; + if (FLAGS_enable_fp32) SummarizePerformance("FP32", sample_latency_fp32); + if (FLAGS_enable_int8) SummarizePerformance("INT8", sample_latency_int8); } -void CompareAccuracy( - const std::vector> &output_slots_quant, - const std::vector> &output_slots_ref, +float CompareAccuracyOne( + const std::vector> &output_slots, int compared_idx) { - if (output_slots_quant.size() == 0 || output_slots_ref.size() == 0) + if (output_slots.size() == 0) throw std::invalid_argument( "CompareAccuracy: output_slots vector is empty."); - float total_accs_quant{0}; - float total_accs_ref{0}; - for (size_t i = 0; i < output_slots_quant.size(); ++i) { - if (compared_idx == 1) { - PADDLE_ENFORCE_GE( - output_slots_quant[i].size(), 2UL, - "To achieve top 1 accuracy, output_slots_quant[i].size()>=2"); - PADDLE_ENFORCE_GE( - output_slots_ref[i].size(), 2UL, - "To achieve top 1 accuracy, output_slots_ref[i].size()>=2"); - } else if (compared_idx == 2) { - PADDLE_ENFORCE_GE(output_slots_quant[i].size(), 3UL, - "To achieve mAP, output_slots_quant[i].size()>=3"); - PADDLE_ENFORCE_GE(output_slots_ref[i].size(), 3UL, - "To achieve mAP, output_slots_ref[i].size()>=3"); - } else { - throw std::invalid_argument( - "CompareAccuracy: compared_idx is out of range."); + float total_accs{0}; + + for (size_t i = 0; i < output_slots.size(); ++i) { + switch (compared_idx) { + case 1: + PADDLE_ENFORCE_GE( + output_slots[i].size(), 2UL, + "To achieve top 1 accuracy, output_slots_quant[i].size()>=2"); + break; + case 2: + PADDLE_ENFORCE_GE( + output_slots[i].size(), 2UL, + "To achieve top 1 accuracy, output_slots_ref[i].size()>=2"); + break; + default: + throw std::invalid_argument( + "CompareAccuracy: compared_idx is out of range."); } - if (output_slots_quant[i][compared_idx].lod.size() > 0 || - output_slots_ref[i][compared_idx].lod.size() > 0) + if (output_slots[i][compared_idx].lod.size() > 0) throw std::invalid_argument("CompareAccuracy: output has nonempty LoD."); - if (output_slots_quant[i][compared_idx].dtype != - paddle::PaddleDType::FLOAT32 || - output_slots_ref[i][compared_idx].dtype != paddle::PaddleDType::FLOAT32) + + if (output_slots[i][compared_idx].dtype != paddle::PaddleDType::FLOAT32) throw std::invalid_argument( "CompareAccuracy: output is of a wrong type."); - total_accs_quant += - *static_cast(output_slots_quant[i][compared_idx].data.data()); - total_accs_ref += - *static_cast(output_slots_ref[i][compared_idx].data.data()); + + total_accs += + *static_cast(output_slots[i][compared_idx].data.data()); } - float avg_acc_quant = total_accs_quant / output_slots_quant.size(); - float avg_acc_ref = total_accs_ref / output_slots_ref.size(); + + CHECK_GT(output_slots.size(), 0); + + return total_accs / output_slots.size(); +} + +void CompareAccuracy( + const std::vector> &output_slots_quant, + const std::vector> &output_slots_ref, + int compared_idx) { + if ((FLAGS_enable_fp32 && FLAGS_enable_int8) && + (output_slots_quant.size() == 0 || output_slots_ref.size()) == 0) + throw std::invalid_argument( + "CompareAccuracy: output_slots vector is empty."); + + float avg_acc_quant = 0.0; + float avg_acc_ref = 0.0; + + if (FLAGS_enable_int8) + avg_acc_quant = CompareAccuracyOne(output_slots_quant, compared_idx); + + if (FLAGS_enable_fp32) + avg_acc_ref = CompareAccuracyOne(output_slots_ref, compared_idx); SummarizeAccuracy(avg_acc_ref, avg_acc_quant, compared_idx); - CHECK_GT(avg_acc_ref, 0.0); - CHECK_GT(avg_acc_quant, 0.0); - CHECK_LE(avg_acc_ref - avg_acc_quant, FLAGS_quantized_accuracy); + + if (FLAGS_enable_fp32) CHECK_GT(avg_acc_ref, 0.0); + + if (FLAGS_enable_int8) CHECK_GT(avg_acc_quant, 0.0); + + if (FLAGS_enable_fp32 && FLAGS_enable_int8) + CHECK_LE(avg_acc_ref - avg_acc_quant, FLAGS_quantized_accuracy); } void CompareDeterministic( @@ -591,18 +613,24 @@ void CompareQuantizedAndAnalysis( PrintConfig(cfg, true); std::vector> analysis_outputs; float sample_latency_fp32{-1}; - TestOneThreadPrediction(cfg, inputs, &analysis_outputs, true, VarType::FP32, - &sample_latency_fp32); + + if (FLAGS_enable_fp32) { + TestOneThreadPrediction(cfg, inputs, &analysis_outputs, true, VarType::FP32, + &sample_latency_fp32); + } LOG(INFO) << "--- INT8 prediction start ---"; auto *qcfg = reinterpret_cast(qconfig); PrintConfig(qcfg, true); std::vector> quantized_outputs; float sample_latency_int8{-1}; - TestOneThreadPrediction(qcfg, inputs, &quantized_outputs, true, VarType::INT8, - &sample_latency_int8); + if (FLAGS_enable_int8) { + TestOneThreadPrediction(qcfg, inputs, &quantized_outputs, true, + VarType::INT8, &sample_latency_int8); + } SummarizePerformance(sample_latency_fp32, sample_latency_int8); + CompareAccuracy(quantized_outputs, analysis_outputs, compared_idx); } -- GitLab