From eb7c211a762c0961915c0f9a5d7b0010cd2746e2 Mon Sep 17 00:00:00 2001 From: "joanna.wozna.intel" Date: Tue, 1 Mar 2022 11:33:10 +0100 Subject: [PATCH] Add mobilenetv3_large performance test for bf16 and int8 (#39738) * Add mobilenetv3_large performance test * Disable the BF16 test if the device does not support BF16 computations * Change test timeout --- .../fluid/inference/tests/api/CMakeLists.txt | 29 ++++++++++++++++++ ...er_bfloat16_image_classification_tester.cc | 15 ++++++++-- ...alyzer_int8_image_classification_tester.cc | 7 ++++- .../fluid/inference/tests/api/tester_helper.h | 30 +++++++++++-------- .../fluid/contrib/slim/tests/CMakeLists.txt | 11 +++++-- 5 files changed, 75 insertions(+), 17 deletions(-) diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 37214534f3c..0281fd91765 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -453,6 +453,23 @@ if(WITH_MKLDNN) download_int8_data_without_verify(${INT8_GOOGLENET_MODEL_DIR} "GoogleNet_int8_model.tar.gz" ) inference_analysis_api_int8_test_run_custom_warmup_batch_size(test_analyzer_int8_googlenet ${INT8_IMG_CLASS_TEST_APP} ${INT8_GOOGLENET_MODEL_DIR} ${IMAGENET_DATA_PATH} 10) + # mobilenetv3_large_x1_0 int8 + set(INT8_MOBILENETV3_LARGE_MODEL_DIR "${INT8_DATA_DIR}/mobilenetv3_large") + set(INT8_MOBILENETV3_FILE_NAME "MobileNetV3_large_x1_0_infer.tar") + if (NOT EXISTS ${INT8_MOBILENETV3_LARGE_MODEL_DIR}/${INT8_MOBILENETV3_FILE_NAME}) + inference_download_and_uncompress_without_verify(${INT8_MOBILENETV3_LARGE_MODEL_DIR} "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/" ${INT8_MOBILENETV3_FILE_NAME}) + endif() + inference_analysis_test_run(test_analyzer_int8_mobilenetv3_large + COMMAND ${INT8_IMG_CLASS_TEST_APP} + ARGS --infer_model=${INT8_MOBILENETV3_LARGE_MODEL_DIR}/MobileNetV3_large_x1_0_infer + --infer_data=${IMAGENET_DATA_PATH} + --warmup_batch_size=50 + --batch_size=1 + --enable_int8=true + --cpu_num_threads=${CPU_NUM_THREADS_ON_CI} + --iterations=100 + --with_accuracy_layer=false) + ### BFLOAT16 tests # build test binary to be used in subsequent tests @@ -472,6 +489,17 @@ if(WITH_MKLDNN) # mobilenetv2 bfloat16 inference_analysis_api_bfloat16_test_run(test_analyzer_bfloat16_mobilenetv2 ${BF16_IMG_CLASS_TEST_APP} ${INT8_MOBILENETV2_MODEL_DIR} ${IMAGENET_DATA_PATH}) + # mobilenetv3_large + inference_analysis_test_run(test_analyzer_bfloat16_mobilenetv3_large + COMMAND ${BF16_IMG_CLASS_TEST_APP} + ARGS --infer_model=${INT8_MOBILENETV3_LARGE_MODEL_DIR}/MobileNetV3_large_x1_0_infer + --infer_data=${IMAGENET_DATA_PATH} + --batch_size=1 + --enable_bf16=true + --paddle_num_threads=${CPU_NUM_THREADS_ON_CI} + --iterations=100 + --with_accuracy_layer=false) + ### Object detection models set(PASCALVOC_DATA_PATH "${INT8_DATA_DIR}/pascalvoc_val_head_300.bin") set(INT8_OBJ_DETECT_TEST_APP "test_analyzer_int8_object_detection") @@ -739,6 +767,7 @@ if(WITH_MKLDNN) set_tests_properties(test_analyzer_quant_performance_benchmark PROPERTIES TIMEOUT 120) set_tests_properties(test_analyzer_int8_mobilenetv2 PROPERTIES TIMEOUT 120) set_tests_properties(test_analyzer_int8_mobilenetv1 PROPERTIES TIMEOUT 120) + set_tests_properties(test_analyzer_int8_mobilenetv3_large PROPERTIES TIMEOUT 120) endif() set_tests_properties(lite_resnet50_test PROPERTIES TIMEOUT 120) diff --git a/paddle/fluid/inference/tests/api/analyzer_bfloat16_image_classification_tester.cc b/paddle/fluid/inference/tests/api/analyzer_bfloat16_image_classification_tester.cc index 3b16b0d34fd..f267f0f28d6 100644 --- a/paddle/fluid/inference/tests/api/analyzer_bfloat16_image_classification_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_bfloat16_image_classification_tester.cc @@ -14,13 +14,19 @@ limitations under the License. */ #include "paddle/fluid/inference/api/paddle_analysis_config.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" +#include "paddle/fluid/platform/cpu_info.h" namespace paddle { namespace inference { namespace analysis { void SetConfig(AnalysisConfig *cfg) { - cfg->SetModel(FLAGS_infer_model); + std::ifstream model_file(FLAGS_infer_model + "/__model__"); + if (model_file.good()) + cfg->SetModel(FLAGS_infer_model); + else + cfg->SetModel(FLAGS_infer_model + "/inference.pdmodel", + FLAGS_infer_model + "/inference.pdiparams"); cfg->DisableGpu(); cfg->SwitchIrOptim(); cfg->SwitchSpecifyInputNames(); @@ -38,7 +44,12 @@ TEST(Analyzer_bfloat16_image_classification, bfloat16) { // read data from file and prepare batches with test data std::vector> input_slots_all; SetInputs(&input_slots_all); - b_cfg.EnableMkldnnBfloat16(); + if (FLAGS_enable_bf16 && + platform::MayIUse(platform::cpu_isa_t::avx512_bf16)) { + b_cfg.EnableMkldnnBfloat16(); + } else { + FLAGS_enable_bf16 = false; + } CompareBFloat16AndAnalysis(&cfg, &b_cfg, input_slots_all); } diff --git a/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc b/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc index 8f8b7304423..b07163b518b 100644 --- a/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc @@ -22,7 +22,12 @@ namespace inference { namespace analysis { void SetConfig(AnalysisConfig *cfg) { - cfg->SetModel(FLAGS_infer_model); + std::ifstream model_file(FLAGS_infer_model + "/__model__"); + if (model_file.good()) + cfg->SetModel(FLAGS_infer_model); + else + cfg->SetModel(FLAGS_infer_model + "/inference.pdmodel", + FLAGS_infer_model + "/inference.pdiparams"); cfg->DisableGpu(); cfg->SwitchIrOptim(); cfg->SwitchSpecifyInputNames(); diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 637fa16e31b..e63dfd14175 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -213,15 +213,15 @@ std::shared_ptr> GetWarmupData( element_in_batch * 3 * 224 * 224, 3 * 224 * 224, static_cast(images.data.data()) + i * 3 * 224 * 224); - - std::copy_n(static_cast(test_data[batch][1].data.data()) + - element_in_batch, - 1, static_cast(labels.data.data()) + i); + if (FLAGS_with_accuracy_layer) + std::copy_n(static_cast(test_data[batch][1].data.data()) + + element_in_batch, + 1, static_cast(labels.data.data()) + i); } - - auto warmup_data = std::make_shared>(2); + auto warmup_data = std::make_shared>( + FLAGS_with_accuracy_layer ? 2 : 1); (*warmup_data)[0] = std::move(images); - (*warmup_data)[1] = std::move(labels); + if (FLAGS_with_accuracy_layer) (*warmup_data)[1] = std::move(labels); return warmup_data; } @@ -254,9 +254,13 @@ void SetInputs(std::vector> *inputs, } for (auto i = 0; i < iterations; i++) { auto images = image_reader.NextBatch(); - auto labels = label_reader.NextBatch(); - inputs->emplace_back( - std::vector{std::move(images), std::move(labels)}); + std::vector tmp_vec; + tmp_vec.push_back(std::move(images)); + if (FLAGS_with_accuracy_layer) { + auto labels = label_reader.NextBatch(); + tmp_vec.push_back(std::move(labels)); + } + inputs->push_back(std::move(tmp_vec)); } } @@ -825,7 +829,8 @@ void CompareQuantizedAndAnalysis( SummarizePerformance("FP32", sample_latency_fp32, "INT8", sample_latency_int8); - CompareAccuracy(quantized_outputs, analysis_outputs, compared_idx); + if (FLAGS_with_accuracy_layer) + CompareAccuracy(quantized_outputs, analysis_outputs, compared_idx); } void CompareBFloat16AndAnalysis( @@ -864,7 +869,8 @@ void CompareBFloat16AndAnalysis( SummarizePerformance("FP32", sample_latency_fp32, "BF16", sample_latency_bf16); - CompareAccuracy(bf16_outputs, analysis_outputs, compared_idx); + if (FLAGS_with_accuracy_layer) + CompareAccuracy(bf16_outputs, analysis_outputs, compared_idx); } void CompareAnalysisAndAnalysis( diff --git a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt index 494ea969797..f75a0fa50a5 100644 --- a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt +++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt @@ -25,6 +25,12 @@ function(inference_analysis_python_api_int8_test_mkldnn target model_dir data_pa _inference_analysis_python_api_int8_test(${target} ${model_dir} ${data_path} ${filename} True) endfunction() +function(download_data install_dir url data_file check_sum) + if (NOT EXISTS ${install_dir}/${data_file}) + inference_download_and_uncompress(${install_dir} ${url} ${data_file} ${check_sum}) + endif() +endfunction() + function(download_quant_data install_dir data_file check_sum) if (NOT EXISTS ${install_dir}/${data_file}) inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/int8 ${data_file} ${check_sum}) @@ -290,8 +296,9 @@ if(LINUX AND WITH_MKLDNN) ### PTQ INT8 # PTQ int8 lstm model - set(LSTM_DATA_ARCHIVE "unittest_model_data/quant_lstm_input_data.tar.gz") - download_quant_data(${QUANT2_INT8_LSTM_SAVE_PATH} ${LSTM_DATA_ARCHIVE} add84c754e9b792fea1fbd728d134ab7) + set(LSTM_DATA_FILE "quant_lstm_input_data.tar.gz") + set(LSTM_URL "${INFERENCE_URL}/int8/unittest_model_data") + download_data(${QUANT2_INT8_LSTM_SAVE_PATH} ${LSTM_URL} ${LSTM_DATA_FILE} add84c754e9b792fea1fbd728d134ab7) set(QUANT2_FP32_LSTM_MODEL_ARCHIVE "lstm_fp32_model.tar.gz") download_lstm_model(${QUANT2_INT8_LSTM_SAVE_PATH} ${QUANT2_FP32_LSTM_MODEL_ARCHIVE} eecd9f44d69a84acc1cf2235c4b8b743) inference_quant2_int8_lstm_model_test(test_quant2_int8_lstm_mkldnn ${QUANT2_INT8_LSTM_SAVE_PATH}/lstm_fp32_model ${QUANT2_LSTM_MODEL_DIR}/lstm_quant ${QUANT2_INT8_LSTM_SAVE_PATH}/quant_lstm_input_data) -- GitLab