Add mobilenetv3_large performance test for bf16 and int8 (#39738)

* Add mobilenetv3_large performance test * Disable the BF16 test if the device does not support BF16 computations * Change test timeout

Add mobilenetv3_large performance test for bf16 and int8 (#39738)
* Add mobilenetv3_large performance test * Disable the BF16 test if the device does not support BF16 computations * Change test timeout
eb7c211a · joanna.wozna.intel · GitHub · ce8ed978 · eb7c211a · eb7c211a
5 changed file
--- a/paddle/fluid/inference/tests/api/CMakeLists.txt
+++ b/paddle/fluid/inference/tests/api/CMakeLists.txt
@@ -453,6 +453,23 @@ if(WITH_MKLDNN)
  download_int8_data_without_verify(${INT8_GOOGLENET_MODEL_DIR} "GoogleNet_int8_model.tar.gz" )
  inference_analysis_api_int8_test_run_custom_warmup_batch_size(test_analyzer_int8_googlenet ${INT8_IMG_CLASS_TEST_APP} ${INT8_GOOGLENET_MODEL_DIR} ${IMAGENET_DATA_PATH} 10)

+ # mobilenetv3_large_x1_0 int8
+ set(INT8_MOBILENETV3_LARGE_MODEL_DIR "${INT8_DATA_DIR}/mobilenetv3_large")
+ set(INT8_MOBILENETV3_FILE_NAME "MobileNetV3_large_x1_0_infer.tar")
+ if (NOT EXISTS ${INT8_MOBILENETV3_LARGE_MODEL_DIR}/${INT8_MOBILENETV3_FILE_NAME})
+    inference_download_and_uncompress_without_verify(${INT8_MOBILENETV3_LARGE_MODEL_DIR} "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/" ${INT8_MOBILENETV3_FILE_NAME})
+ endif()
+ inference_analysis_test_run(test_analyzer_int8_mobilenetv3_large 
+    COMMAND ${INT8_IMG_CLASS_TEST_APP} 
+    ARGS --infer_model=${INT8_MOBILENETV3_LARGE_MODEL_DIR}/MobileNetV3_large_x1_0_infer
+        --infer_data=${IMAGENET_DATA_PATH} 
+        --warmup_batch_size=50
+        --batch_size=1
+        --enable_int8=true 
+        --cpu_num_threads=${CPU_NUM_THREADS_ON_CI} 
+        --iterations=100
+        --with_accuracy_layer=false)
+
  ### BFLOAT16 tests

  # build test binary to be used in subsequent tests
@@ -472,6 +489,17 @@ if(WITH_MKLDNN)
  # mobilenetv2 bfloat16
  inference_analysis_api_bfloat16_test_run(test_analyzer_bfloat16_mobilenetv2 ${BF16_IMG_CLASS_TEST_APP} ${INT8_MOBILENETV2_MODEL_DIR} ${IMAGENET_DATA_PATH})

+  # mobilenetv3_large 
+  inference_analysis_test_run(test_analyzer_bfloat16_mobilenetv3_large
+  COMMAND ${BF16_IMG_CLASS_TEST_APP}
+      ARGS --infer_model=${INT8_MOBILENETV3_LARGE_MODEL_DIR}/MobileNetV3_large_x1_0_infer
+        --infer_data=${IMAGENET_DATA_PATH} 
+        --batch_size=1
+        --enable_bf16=true
+        --paddle_num_threads=${CPU_NUM_THREADS_ON_CI}
+        --iterations=100
+        --with_accuracy_layer=false)
+
  ### Object detection models
  set(PASCALVOC_DATA_PATH "${INT8_DATA_DIR}/pascalvoc_val_head_300.bin")
  set(INT8_OBJ_DETECT_TEST_APP "test_analyzer_int8_object_detection")
@@ -739,6 +767,7 @@ if(WITH_MKLDNN)
    set_tests_properties(test_analyzer_quant_performance_benchmark PROPERTIES TIMEOUT 120)
    set_tests_properties(test_analyzer_int8_mobilenetv2 PROPERTIES TIMEOUT 120)
    set_tests_properties(test_analyzer_int8_mobilenetv1 PROPERTIES TIMEOUT 120)
+    set_tests_properties(test_analyzer_int8_mobilenetv3_large PROPERTIES TIMEOUT 120)
 endif()

 set_tests_properties(lite_resnet50_test PROPERTIES TIMEOUT 120)

--- a/paddle/fluid/inference/tests/api/analyzer_bfloat16_image_classification_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_bfloat16_image_classification_tester.cc
@@ -14,13 +14,19 @@ limitations under the License. */

 #include "paddle/fluid/inference/api/paddle_analysis_config.h"
 #include "paddle/fluid/inference/tests/api/tester_helper.h"
+#include "paddle/fluid/platform/cpu_info.h"

 namespace paddle {
 namespace inference {
 namespace analysis {

 void SetConfig(AnalysisConfig *cfg) {
+  std::ifstream model_file(FLAGS_infer_model + "/__model__");
+  if (model_file.good())
    cfg->SetModel(FLAGS_infer_model);
+  else
+    cfg->SetModel(FLAGS_infer_model + "/inference.pdmodel",
+                  FLAGS_infer_model + "/inference.pdiparams");
  cfg->DisableGpu();
  cfg->SwitchIrOptim();
  cfg->SwitchSpecifyInputNames();
@@ -38,7 +44,12 @@ TEST(Analyzer_bfloat16_image_classification, bfloat16) {
  // read data from file and prepare batches with test data
  std::vector<std::vector<PaddleTensor>> input_slots_all;
  SetInputs(&input_slots_all);
+  if (FLAGS_enable_bf16 &&
+      platform::MayIUse(platform::cpu_isa_t::avx512_bf16)) {
    b_cfg.EnableMkldnnBfloat16();
+  } else {
+    FLAGS_enable_bf16 = false;
+  }
  CompareBFloat16AndAnalysis(&cfg, &b_cfg, input_slots_all);
 }


--- a/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc
@@ -22,7 +22,12 @@ namespace inference {
 namespace analysis {

 void SetConfig(AnalysisConfig *cfg) {
+  std::ifstream model_file(FLAGS_infer_model + "/__model__");
+  if (model_file.good())
    cfg->SetModel(FLAGS_infer_model);
+  else
+    cfg->SetModel(FLAGS_infer_model + "/inference.pdmodel",
+                  FLAGS_infer_model + "/inference.pdiparams");
  cfg->DisableGpu();
  cfg->SwitchIrOptim();
  cfg->SwitchSpecifyInputNames();

--- a/paddle/fluid/inference/tests/api/tester_helper.h
+++ b/paddle/fluid/inference/tests/api/tester_helper.h
@@ -213,15 +213,15 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
                    element_in_batch * 3 * 224 * 224,
                3 * 224 * 224,
                static_cast<float *>(images.data.data()) + i * 3 * 224 * 224);
-
+    if (FLAGS_with_accuracy_layer)
      std::copy_n(static_cast<int64_t *>(test_data[batch][1].data.data()) +
                      element_in_batch,
                  1, static_cast<int64_t *>(labels.data.data()) + i);
  }
-
-  auto warmup_data = std::make_shared<std::vector<PaddleTensor>>(2);
+  auto warmup_data = std::make_shared<std::vector<PaddleTensor>>(
+      FLAGS_with_accuracy_layer ? 2 : 1);
  (*warmup_data)[0] = std::move(images);
-  (*warmup_data)[1] = std::move(labels);
+  if (FLAGS_with_accuracy_layer) (*warmup_data)[1] = std::move(labels);
  return warmup_data;
 }

@@ -254,9 +254,13 @@ void SetInputs(std::vector<std::vector<PaddleTensor>> *inputs,
  }
  for (auto i = 0; i < iterations; i++) {
    auto images = image_reader.NextBatch();
+    std::vector<PaddleTensor> tmp_vec;
+    tmp_vec.push_back(std::move(images));
+    if (FLAGS_with_accuracy_layer) {
      auto labels = label_reader.NextBatch();
-    inputs->emplace_back(
-        std::vector<PaddleTensor>{std::move(images), std::move(labels)});
+      tmp_vec.push_back(std::move(labels));
+    }
+    inputs->push_back(std::move(tmp_vec));
  }
 }

@@ -825,6 +829,7 @@ void CompareQuantizedAndAnalysis(
  SummarizePerformance("FP32", sample_latency_fp32, "INT8",
                       sample_latency_int8);

+  if (FLAGS_with_accuracy_layer)
    CompareAccuracy(quantized_outputs, analysis_outputs, compared_idx);
 }

@@ -864,6 +869,7 @@ void CompareBFloat16AndAnalysis(
  SummarizePerformance("FP32", sample_latency_fp32, "BF16",
                       sample_latency_bf16);

+  if (FLAGS_with_accuracy_layer)
    CompareAccuracy(bf16_outputs, analysis_outputs, compared_idx);
 }


--- a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
+++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
@@ -25,6 +25,12 @@ function(inference_analysis_python_api_int8_test_mkldnn target model_dir data_pa
    _inference_analysis_python_api_int8_test(${target} ${model_dir} ${data_path} ${filename} True)
 endfunction()

+function(download_data install_dir url data_file check_sum)
+    if (NOT EXISTS ${install_dir}/${data_file})
+	    inference_download_and_uncompress(${install_dir} ${url} ${data_file} ${check_sum})
+    endif()
+endfunction()
+
 function(download_quant_data install_dir data_file check_sum)
    if (NOT EXISTS ${install_dir}/${data_file})
 	    inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/int8 ${data_file} ${check_sum})
@@ -290,8 +296,9 @@ if(LINUX AND WITH_MKLDNN)
 	### PTQ INT8

 	# PTQ int8 lstm model
-	set(LSTM_DATA_ARCHIVE "unittest_model_data/quant_lstm_input_data.tar.gz")
-	download_quant_data(${QUANT2_INT8_LSTM_SAVE_PATH} ${LSTM_DATA_ARCHIVE} add84c754e9b792fea1fbd728d134ab7)
+	set(LSTM_DATA_FILE "quant_lstm_input_data.tar.gz")
+	set(LSTM_URL "${INFERENCE_URL}/int8/unittest_model_data")
+	download_data(${QUANT2_INT8_LSTM_SAVE_PATH} ${LSTM_URL} ${LSTM_DATA_FILE} add84c754e9b792fea1fbd728d134ab7)
 	set(QUANT2_FP32_LSTM_MODEL_ARCHIVE "lstm_fp32_model.tar.gz")
 	download_lstm_model(${QUANT2_INT8_LSTM_SAVE_PATH} ${QUANT2_FP32_LSTM_MODEL_ARCHIVE} eecd9f44d69a84acc1cf2235c4b8b743)
 	inference_quant2_int8_lstm_model_test(test_quant2_int8_lstm_mkldnn ${QUANT2_INT8_LSTM_SAVE_PATH}/lstm_fp32_model ${QUANT2_LSTM_MODEL_DIR}/lstm_quant ${QUANT2_INT8_LSTM_SAVE_PATH}/quant_lstm_input_data)