未验证 提交 eb7c211a 编写于 作者: J joanna.wozna.intel 提交者: GitHub

Add mobilenetv3_large performance test for bf16 and int8 (#39738)

* Add mobilenetv3_large performance test

* Disable the BF16 test if the device does not support BF16 computations

* Change test timeout
上级 ce8ed978
......@@ -453,6 +453,23 @@ if(WITH_MKLDNN)
download_int8_data_without_verify(${INT8_GOOGLENET_MODEL_DIR} "GoogleNet_int8_model.tar.gz" )
inference_analysis_api_int8_test_run_custom_warmup_batch_size(test_analyzer_int8_googlenet ${INT8_IMG_CLASS_TEST_APP} ${INT8_GOOGLENET_MODEL_DIR} ${IMAGENET_DATA_PATH} 10)
# mobilenetv3_large_x1_0 int8
set(INT8_MOBILENETV3_LARGE_MODEL_DIR "${INT8_DATA_DIR}/mobilenetv3_large")
set(INT8_MOBILENETV3_FILE_NAME "MobileNetV3_large_x1_0_infer.tar")
if (NOT EXISTS ${INT8_MOBILENETV3_LARGE_MODEL_DIR}/${INT8_MOBILENETV3_FILE_NAME})
inference_download_and_uncompress_without_verify(${INT8_MOBILENETV3_LARGE_MODEL_DIR} "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/" ${INT8_MOBILENETV3_FILE_NAME})
endif()
inference_analysis_test_run(test_analyzer_int8_mobilenetv3_large
COMMAND ${INT8_IMG_CLASS_TEST_APP}
ARGS --infer_model=${INT8_MOBILENETV3_LARGE_MODEL_DIR}/MobileNetV3_large_x1_0_infer
--infer_data=${IMAGENET_DATA_PATH}
--warmup_batch_size=50
--batch_size=1
--enable_int8=true
--cpu_num_threads=${CPU_NUM_THREADS_ON_CI}
--iterations=100
--with_accuracy_layer=false)
### BFLOAT16 tests
# build test binary to be used in subsequent tests
......@@ -472,6 +489,17 @@ if(WITH_MKLDNN)
# mobilenetv2 bfloat16
inference_analysis_api_bfloat16_test_run(test_analyzer_bfloat16_mobilenetv2 ${BF16_IMG_CLASS_TEST_APP} ${INT8_MOBILENETV2_MODEL_DIR} ${IMAGENET_DATA_PATH})
# mobilenetv3_large
inference_analysis_test_run(test_analyzer_bfloat16_mobilenetv3_large
COMMAND ${BF16_IMG_CLASS_TEST_APP}
ARGS --infer_model=${INT8_MOBILENETV3_LARGE_MODEL_DIR}/MobileNetV3_large_x1_0_infer
--infer_data=${IMAGENET_DATA_PATH}
--batch_size=1
--enable_bf16=true
--paddle_num_threads=${CPU_NUM_THREADS_ON_CI}
--iterations=100
--with_accuracy_layer=false)
### Object detection models
set(PASCALVOC_DATA_PATH "${INT8_DATA_DIR}/pascalvoc_val_head_300.bin")
set(INT8_OBJ_DETECT_TEST_APP "test_analyzer_int8_object_detection")
......@@ -739,6 +767,7 @@ if(WITH_MKLDNN)
set_tests_properties(test_analyzer_quant_performance_benchmark PROPERTIES TIMEOUT 120)
set_tests_properties(test_analyzer_int8_mobilenetv2 PROPERTIES TIMEOUT 120)
set_tests_properties(test_analyzer_int8_mobilenetv1 PROPERTIES TIMEOUT 120)
set_tests_properties(test_analyzer_int8_mobilenetv3_large PROPERTIES TIMEOUT 120)
endif()
set_tests_properties(lite_resnet50_test PROPERTIES TIMEOUT 120)
......
......@@ -14,13 +14,19 @@ limitations under the License. */
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
#include "paddle/fluid/platform/cpu_info.h"
namespace paddle {
namespace inference {
namespace analysis {
void SetConfig(AnalysisConfig *cfg) {
cfg->SetModel(FLAGS_infer_model);
std::ifstream model_file(FLAGS_infer_model + "/__model__");
if (model_file.good())
cfg->SetModel(FLAGS_infer_model);
else
cfg->SetModel(FLAGS_infer_model + "/inference.pdmodel",
FLAGS_infer_model + "/inference.pdiparams");
cfg->DisableGpu();
cfg->SwitchIrOptim();
cfg->SwitchSpecifyInputNames();
......@@ -38,7 +44,12 @@ TEST(Analyzer_bfloat16_image_classification, bfloat16) {
// read data from file and prepare batches with test data
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInputs(&input_slots_all);
b_cfg.EnableMkldnnBfloat16();
if (FLAGS_enable_bf16 &&
platform::MayIUse(platform::cpu_isa_t::avx512_bf16)) {
b_cfg.EnableMkldnnBfloat16();
} else {
FLAGS_enable_bf16 = false;
}
CompareBFloat16AndAnalysis(&cfg, &b_cfg, input_slots_all);
}
......
......@@ -22,7 +22,12 @@ namespace inference {
namespace analysis {
void SetConfig(AnalysisConfig *cfg) {
cfg->SetModel(FLAGS_infer_model);
std::ifstream model_file(FLAGS_infer_model + "/__model__");
if (model_file.good())
cfg->SetModel(FLAGS_infer_model);
else
cfg->SetModel(FLAGS_infer_model + "/inference.pdmodel",
FLAGS_infer_model + "/inference.pdiparams");
cfg->DisableGpu();
cfg->SwitchIrOptim();
cfg->SwitchSpecifyInputNames();
......
......@@ -213,15 +213,15 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
element_in_batch * 3 * 224 * 224,
3 * 224 * 224,
static_cast<float *>(images.data.data()) + i * 3 * 224 * 224);
std::copy_n(static_cast<int64_t *>(test_data[batch][1].data.data()) +
element_in_batch,
1, static_cast<int64_t *>(labels.data.data()) + i);
if (FLAGS_with_accuracy_layer)
std::copy_n(static_cast<int64_t *>(test_data[batch][1].data.data()) +
element_in_batch,
1, static_cast<int64_t *>(labels.data.data()) + i);
}
auto warmup_data = std::make_shared<std::vector<PaddleTensor>>(2);
auto warmup_data = std::make_shared<std::vector<PaddleTensor>>(
FLAGS_with_accuracy_layer ? 2 : 1);
(*warmup_data)[0] = std::move(images);
(*warmup_data)[1] = std::move(labels);
if (FLAGS_with_accuracy_layer) (*warmup_data)[1] = std::move(labels);
return warmup_data;
}
......@@ -254,9 +254,13 @@ void SetInputs(std::vector<std::vector<PaddleTensor>> *inputs,
}
for (auto i = 0; i < iterations; i++) {
auto images = image_reader.NextBatch();
auto labels = label_reader.NextBatch();
inputs->emplace_back(
std::vector<PaddleTensor>{std::move(images), std::move(labels)});
std::vector<PaddleTensor> tmp_vec;
tmp_vec.push_back(std::move(images));
if (FLAGS_with_accuracy_layer) {
auto labels = label_reader.NextBatch();
tmp_vec.push_back(std::move(labels));
}
inputs->push_back(std::move(tmp_vec));
}
}
......@@ -825,7 +829,8 @@ void CompareQuantizedAndAnalysis(
SummarizePerformance("FP32", sample_latency_fp32, "INT8",
sample_latency_int8);
CompareAccuracy(quantized_outputs, analysis_outputs, compared_idx);
if (FLAGS_with_accuracy_layer)
CompareAccuracy(quantized_outputs, analysis_outputs, compared_idx);
}
void CompareBFloat16AndAnalysis(
......@@ -864,7 +869,8 @@ void CompareBFloat16AndAnalysis(
SummarizePerformance("FP32", sample_latency_fp32, "BF16",
sample_latency_bf16);
CompareAccuracy(bf16_outputs, analysis_outputs, compared_idx);
if (FLAGS_with_accuracy_layer)
CompareAccuracy(bf16_outputs, analysis_outputs, compared_idx);
}
void CompareAnalysisAndAnalysis(
......
......@@ -25,6 +25,12 @@ function(inference_analysis_python_api_int8_test_mkldnn target model_dir data_pa
_inference_analysis_python_api_int8_test(${target} ${model_dir} ${data_path} ${filename} True)
endfunction()
function(download_data install_dir url data_file check_sum)
if (NOT EXISTS ${install_dir}/${data_file})
inference_download_and_uncompress(${install_dir} ${url} ${data_file} ${check_sum})
endif()
endfunction()
function(download_quant_data install_dir data_file check_sum)
if (NOT EXISTS ${install_dir}/${data_file})
inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/int8 ${data_file} ${check_sum})
......@@ -290,8 +296,9 @@ if(LINUX AND WITH_MKLDNN)
### PTQ INT8
# PTQ int8 lstm model
set(LSTM_DATA_ARCHIVE "unittest_model_data/quant_lstm_input_data.tar.gz")
download_quant_data(${QUANT2_INT8_LSTM_SAVE_PATH} ${LSTM_DATA_ARCHIVE} add84c754e9b792fea1fbd728d134ab7)
set(LSTM_DATA_FILE "quant_lstm_input_data.tar.gz")
set(LSTM_URL "${INFERENCE_URL}/int8/unittest_model_data")
download_data(${QUANT2_INT8_LSTM_SAVE_PATH} ${LSTM_URL} ${LSTM_DATA_FILE} add84c754e9b792fea1fbd728d134ab7)
set(QUANT2_FP32_LSTM_MODEL_ARCHIVE "lstm_fp32_model.tar.gz")
download_lstm_model(${QUANT2_INT8_LSTM_SAVE_PATH} ${QUANT2_FP32_LSTM_MODEL_ARCHIVE} eecd9f44d69a84acc1cf2235c4b8b743)
inference_quant2_int8_lstm_model_test(test_quant2_int8_lstm_mkldnn ${QUANT2_INT8_LSTM_SAVE_PATH}/lstm_fp32_model ${QUANT2_LSTM_MODEL_DIR}/lstm_quant ${QUANT2_INT8_LSTM_SAVE_PATH}/quant_lstm_input_data)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册