提交 bc93a209 编写于 作者: W Wojciech Uss 提交者: Tao Luo

Cherry pick #18077 and #18111 unify FP32 vs. INT8 comparison tests output,...

Cherry pick #18077 and  #18111 unify FP32 vs. INT8 comparison tests output, reuse C-API INT8 unit test application (#18145)

* unify FP32 vs. INT8 comparison tests output (#18111)

test=release/1.5

* reuse C-API INT8 unit test application (#18077)

test=release/1.5
上级 ca8a5680
...@@ -363,10 +363,10 @@ function(cc_binary TARGET_NAME) ...@@ -363,10 +363,10 @@ function(cc_binary TARGET_NAME)
target_link_libraries(${TARGET_NAME} ${os_dependency_modules}) target_link_libraries(${TARGET_NAME} ${os_dependency_modules})
endfunction(cc_binary) endfunction(cc_binary)
function(cc_test TARGET_NAME) function(cc_test_build TARGET_NAME)
if(WITH_TESTING) if(WITH_TESTING)
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS ARGS) set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_test_SRCS}) add_executable(${TARGET_NAME} ${cc_test_SRCS})
if(WIN32) if(WIN32)
...@@ -379,9 +379,18 @@ function(cc_test TARGET_NAME) ...@@ -379,9 +379,18 @@ function(cc_test TARGET_NAME)
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} ${os_dependency_modules} paddle_gtest_main lod_tensor memory gtest gflags glog) target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} ${os_dependency_modules} paddle_gtest_main lod_tensor memory gtest gflags glog)
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
common_link(${TARGET_NAME}) common_link(${TARGET_NAME})
endif()
endfunction()
function(cc_test_run TARGET_NAME)
if(WITH_TESTING)
set(oneValueArgs "")
set(multiValueArgs COMMAND ARGS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND ${TARGET_NAME} ${cc_test_ARGS} COMMAND ${cc_test_COMMAND}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) ARGS ${cc_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true) set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true) set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_limit_of_tmp_allocation=4294967296) # 4G set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_limit_of_tmp_allocation=4294967296) # 4G
...@@ -389,6 +398,20 @@ function(cc_test TARGET_NAME) ...@@ -389,6 +398,20 @@ function(cc_test TARGET_NAME)
# No unit test should exceed 10 minutes. # No unit test should exceed 10 minutes.
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600) set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600)
endif() endif()
endfunction()
function(cc_test TARGET_NAME)
if(WITH_TESTING)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS ARGS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cc_test_build(${TARGET_NAME}
SRCS ${cc_test_SRCS}
DEPS ${cc_test_DEPS})
cc_test_run(${TARGET_NAME}
COMMAND ${TARGET_NAME}
ARGS ${cc_test_ARGS})
endif()
endfunction(cc_test) endfunction(cc_test)
function(nv_library TARGET_NAME) function(nv_library TARGET_NAME)
......
...@@ -23,18 +23,46 @@ cc_library(analysis SRCS ...@@ -23,18 +23,46 @@ cc_library(analysis SRCS
cc_test(test_dot SRCS dot_tester.cc DEPS analysis) cc_test(test_dot SRCS dot_tester.cc DEPS analysis)
function(inference_analysis_test_build TARGET)
if(WITH_TESTING)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS EXTRA_DEPS)
cmake_parse_arguments(analysis_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
inference_base_test_build(${TARGET}
SRCS ${analysis_test_SRCS}
DEPS analysis pass ${GLOB_PASS_LIB} ${analysis_test_EXTRA_DEPS})
endif()
endfunction()
function(inference_analysis_test_run TARGET)
if(WITH_TESTING)
set(options "")
set(oneValueArgs "")
set(multiValueArgs COMMAND ARGS)
cmake_parse_arguments(analysis_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
inference_base_test_run(${TARGET}
COMMAND ${analysis_test_COMMAND}
ARGS ${analysis_test_ARGS})
endif()
endfunction()
function(inference_analysis_test TARGET) function(inference_analysis_test TARGET)
if(WITH_TESTING) if(WITH_TESTING)
set(options "") set(options "")
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS ARGS EXTRA_DEPS) set(multiValueArgs SRCS ARGS EXTRA_DEPS)
cmake_parse_arguments(analysis_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(analysis_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
inference_base_test(${TARGET} inference_base_test_build(${TARGET}
SRCS ${analysis_test_SRCS} SRCS ${analysis_test_SRCS}
DEPS analysis pass ${GLOB_PASS_LIB} ${analysis_test_EXTRA_DEPS} DEPS analysis pass ${GLOB_PASS_LIB} ${analysis_test_EXTRA_DEPS})
ARGS --inference_model_dir=${WORD2VEC_MODEL_DIR} ${analysis_test_ARGS}) inference_base_test_run(${TARGET}
COMMAND ${TARGET}
ARGS ${analysis_test_ARGS})
endif() endif()
endfunction(inference_analysis_test) endfunction(inference_analysis_test)
inference_analysis_test(test_analyzer SRCS analyzer_tester.cc inference_analysis_test(test_analyzer
EXTRA_DEPS reset_tensor_array paddle_inference_api) SRCS analyzer_tester.cc
EXTRA_DEPS reset_tensor_array paddle_inference_api
ARGS --inference_model_dir=${WORD2VEC_MODEL_DIR})
...@@ -4,9 +4,15 @@ if(WITH_GPU AND TENSORRT_FOUND) ...@@ -4,9 +4,15 @@ if(WITH_GPU AND TENSORRT_FOUND)
set(INFERENCE_EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} analysis ${analysis_deps} ir_pass_manager analysis_predictor) set(INFERENCE_EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} analysis ${analysis_deps} ir_pass_manager analysis_predictor)
endif() endif()
function(download_model install_dir model_name) function(download_data install_dir data_file)
if (NOT EXISTS ${install_dir}) if (NOT EXISTS ${install_dir})
inference_download_and_uncompress(${install_dir} ${INFERENCE_URL} ${model_name}) inference_download_and_uncompress(${install_dir} ${INFERENCE_URL} ${data_file})
endif()
endfunction()
function(download_int8_data install_dir data_file)
if (NOT EXISTS ${install_dir})
inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/int8 ${data_file})
endif() endif()
endfunction() endfunction()
...@@ -23,21 +29,31 @@ function(inference_analysis_api_test target install_dir filename) ...@@ -23,21 +29,31 @@ function(inference_analysis_api_test target install_dir filename)
ARGS --infer_model=${install_dir}/model --infer_data=${install_dir}/data.txt) ARGS --infer_model=${install_dir}/model --infer_data=${install_dir}/data.txt)
endfunction() endfunction()
function(inference_analysis_api_int8_test target model_dir data_dir filename) function(inference_analysis_api_int8_test_build TARGET_NAME filename)
inference_analysis_test(${target} SRCS ${filename} inference_analysis_test_build(${TARGET_NAME} SRCS ${filename}
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} benchmark EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} benchmark)
endfunction()
function(inference_analysis_api_int8_test_run TARGET_NAME test_binary model_dir data_path)
inference_analysis_test_run(${TARGET_NAME}
COMMAND ${test_binary}
ARGS --infer_model=${model_dir}/model ARGS --infer_model=${model_dir}/model
--infer_data=${data_dir}/data.bin --infer_data=${data_path}
--warmup_batch_size=100 --warmup_batch_size=100
--batch_size=50 --batch_size=50
--paddle_num_threads=${CPU_NUM_THREADS_ON_CI} --paddle_num_threads=${CPU_NUM_THREADS_ON_CI}
--iterations=2) --iterations=2)
endfunction() endfunction()
function(inference_analysis_api_test_with_fake_data target install_dir filename model_name disable_fc)
download_model(${install_dir} ${model_name}) function(inference_analysis_api_test_with_fake_data_build TARGET_NAME filename)
inference_analysis_test(${target} SRCS ${filename} inference_analysis_test_build(${TARGET_NAME} SRCS ${filename}
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} EXTRA_DEPS ${INFERENCE_EXTRA_DEPS})
ARGS --infer_model=${install_dir}/model endfunction()
function(inference_analysis_api_test_with_fake_data_run TARGET_NAME test_binary model_dir disable_fc)
inference_analysis_test_run(${TARGET_NAME}
COMMAND ${test_binary}
ARGS --infer_model=${model_dir}/model
--disable_mkldnn_fc=${disable_fc}) --disable_mkldnn_fc=${disable_fc})
endfunction() endfunction()
...@@ -141,73 +157,82 @@ if (NOT EXISTS ${MOBILENET_INSTALL_DIR}) ...@@ -141,73 +157,82 @@ if (NOT EXISTS ${MOBILENET_INSTALL_DIR})
endif() endif()
inference_analysis_api_test_with_refer_result(test_analyzer_mobilenet_transpose ${MOBILENET_INSTALL_DIR} analyzer_vis_tester.cc) inference_analysis_api_test_with_refer_result(test_analyzer_mobilenet_transpose ${MOBILENET_INSTALL_DIR} analyzer_vis_tester.cc)
### Image classification tests with fake data
set(IMG_CLASS_TEST_APP "test_analyzer_image_classification")
set(IMG_CLASS_TEST_APP_SRC "analyzer_image_classification_tester.cc")
# build test binary to be used in subsequent tests
inference_analysis_api_test_with_fake_data_build(${IMG_CLASS_TEST_APP} ${IMG_CLASS_TEST_APP_SRC})
# googlenet # googlenet
inference_analysis_api_test_with_fake_data(test_analyzer_googlenet set(GOOGLENET_MODEL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/googlenet")
"${INFERENCE_DEMO_INSTALL_DIR}/googlenet" analyzer_resnet50_tester.cc "googlenet.tar.gz" false) download_data(${GOOGLENET_MODEL_DIR} "googlenet.tar.gz")
inference_analysis_api_test_with_fake_data_run(test_analyzer_googlenet ${IMG_CLASS_TEST_APP}
${GOOGLENET_MODEL_DIR} false)
# resnet50 # resnet50
inference_analysis_api_test_with_fake_data(test_analyzer_resnet50 set(RESNET50_MODEL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/resnet50")
"${INFERENCE_DEMO_INSTALL_DIR}/resnet50" analyzer_resnet50_tester.cc "resnet50_model.tar.gz" true) download_data(${RESNET50_MODEL_DIR} "resnet50_model.tar.gz")
inference_analysis_api_test_with_fake_data_run(test_analyzer_resnet50 ${IMG_CLASS_TEST_APP}
${RESNET50_MODEL_DIR} true)
# mobilenet with depthwise_conv op # mobilenet with depthwise_conv op
inference_analysis_api_test_with_fake_data(test_analyzer_mobilenet_depthwise_conv set(MOBILENET_MODEL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/mobilenet_depthwise_conv")
"${INFERENCE_DEMO_INSTALL_DIR}/mobilenet_depthwise_conv" analyzer_resnet50_tester.cc "mobilenet_model.tar.gz" false) download_data(${MOBILENET_MODEL_DIR} "mobilenet_model.tar.gz")
inference_analysis_api_test_with_fake_data_run(test_analyzer_mobilenet_depthwise_conv ${IMG_CLASS_TEST_APP}
${MOBILENET_MODEL_DIR} false)
# int8 image classification tests ### INT8 tests
if(WITH_MKLDNN) if(WITH_MKLDNN)
set(INT8_DATA_DIR "${INFERENCE_DEMO_INSTALL_DIR}/int8v2") set(INT8_DATA_DIR "${INFERENCE_DEMO_INSTALL_DIR}/int8v2")
if (NOT EXISTS ${INT8_DATA_DIR})
inference_download_and_uncompress(${INT8_DATA_DIR} "${INFERENCE_URL}/int8" "imagenet_val_100_tail.tar.gz")
endif()
#resnet50 int8 ### Image classification tests
set(IMAGENET_DATA_PATH "${INT8_DATA_DIR}/data.bin")
set(INT8_IMG_CLASS_TEST_APP "test_analyzer_int8_image_classification")
set(INT8_IMG_CLASS_TEST_APP_SRC "analyzer_int8_image_classification_tester.cc")
# download dataset if necessary
download_int8_data(${INT8_DATA_DIR} "imagenet_val_100_tail.tar.gz")
# build test binary to be used in subsequent tests
inference_analysis_api_int8_test_build(${INT8_IMG_CLASS_TEST_APP} ${INT8_IMG_CLASS_TEST_APP_SRC})
# resnet50 int8
set(INT8_RESNET50_MODEL_DIR "${INT8_DATA_DIR}/resnet50") set(INT8_RESNET50_MODEL_DIR "${INT8_DATA_DIR}/resnet50")
if (NOT EXISTS ${INT8_RESNET50_MODEL_DIR}) download_int8_data(${INT8_RESNET50_MODEL_DIR} "resnet50_int8_model.tar.gz" )
inference_download_and_uncompress(${INT8_RESNET50_MODEL_DIR} "${INFERENCE_URL}/int8" "resnet50_int8_model.tar.gz" ) inference_analysis_api_int8_test_run(test_analyzer_int8_resnet50 ${INT8_IMG_CLASS_TEST_APP} ${INT8_RESNET50_MODEL_DIR} ${IMAGENET_DATA_PATH})
endif()
inference_analysis_api_int8_test(test_analyzer_int8_resnet50 ${INT8_RESNET50_MODEL_DIR} ${INT8_DATA_DIR} analyzer_int8_image_classification_tester.cc) # mobilenetv1 int8
set(INT8_MOBILENETV1_MODEL_DIR "${INT8_DATA_DIR}/mobilenetv1")
#mobilenet int8 download_int8_data(${INT8_MOBILENETV1_MODEL_DIR} "mobilenetv1_int8_model.tar.gz" )
set(INT8_MOBILENET_MODEL_DIR "${INT8_DATA_DIR}/mobilenet") inference_analysis_api_int8_test_run(test_analyzer_int8_mobilenetv1 ${INT8_IMG_CLASS_TEST_APP} ${INT8_MOBILENETV1_MODEL_DIR} ${IMAGENET_DATA_PATH})
if (NOT EXISTS ${INT8_MOBILENET_MODEL_DIR})
inference_download_and_uncompress(${INT8_MOBILENET_MODEL_DIR} "${INFERENCE_URL}/int8" "mobilenetv1_int8_model.tar.gz" )
endif()
inference_analysis_api_int8_test(test_analyzer_int8_mobilenet ${INT8_MOBILENET_MODEL_DIR} ${INT8_DATA_DIR} analyzer_int8_image_classification_tester.cc)
#mobilenetv2 int8 # mobilenetv2 int8
set(INT8_MOBILENETV2_MODEL_DIR "${INT8_DATA_DIR}/mobilenetv2") set(INT8_MOBILENETV2_MODEL_DIR "${INT8_DATA_DIR}/mobilenetv2")
if (NOT EXISTS ${INT8_MOBILENETV2_MODEL_DIR}) download_int8_data(${INT8_MOBILENETV2_MODEL_DIR} "mobilenet_v2_int8_model.tar.gz" )
inference_download_and_uncompress(${INT8_MOBILENETV2_MODEL_DIR} "${INFERENCE_URL}/int8" "mobilenet_v2_int8_model.tar.gz" ) inference_analysis_api_int8_test_run(test_analyzer_int8_mobilenetv2 ${INT8_IMG_CLASS_TEST_APP} ${INT8_MOBILENETV2_MODEL_DIR} ${IMAGENET_DATA_PATH})
endif()
inference_analysis_api_int8_test(test_analyzer_int8_mobilenetv2 ${INT8_MOBILENETV2_MODEL_DIR} ${INT8_DATA_DIR} analyzer_int8_image_classification_tester.cc)
#resnet101 int8 # resnet101 int8
set(INT8_RESNET101_MODEL_DIR "${INT8_DATA_DIR}/resnet101") set(INT8_RESNET101_MODEL_DIR "${INT8_DATA_DIR}/resnet101")
if (NOT EXISTS ${INT8_RESNET101_MODEL_DIR}) download_int8_data(${INT8_RESNET101_MODEL_DIR} "Res101_int8_model.tar.gz" )
inference_download_and_uncompress(${INT8_RESNET101_MODEL_DIR} "${INFERENCE_URL}/int8" "Res101_int8_model.tar.gz" ) inference_analysis_api_int8_test_run(test_analyzer_int8_resnet101 ${INT8_IMG_CLASS_TEST_APP} ${INT8_RESNET101_MODEL_DIR} ${IMAGENET_DATA_PATH})
endif()
inference_analysis_api_int8_test(test_analyzer_int8_resnet101 ${INT8_RESNET101_MODEL_DIR} ${INT8_DATA_DIR} analyzer_int8_image_classification_tester.cc)
#vgg16 int8 # vgg16 int8
set(INT8_VGG16_MODEL_DIR "${INT8_DATA_DIR}/vgg16") set(INT8_VGG16_MODEL_DIR "${INT8_DATA_DIR}/vgg16")
if (NOT EXISTS ${INT8_VGG16_MODEL_DIR}) download_int8_data(${INT8_VGG16_MODEL_DIR} "VGG16_int8_model.tar.gz" )
inference_download_and_uncompress(${INT8_VGG16_MODEL_DIR} "${INFERENCE_URL}/int8" "VGG16_int8_model.tar.gz" ) inference_analysis_api_int8_test_run(test_analyzer_int8_vgg16 ${INT8_IMG_CLASS_TEST_APP} ${INT8_VGG16_MODEL_DIR} ${IMAGENET_DATA_PATH})
endif()
inference_analysis_api_int8_test(test_analyzer_int8_vgg16 ${INT8_VGG16_MODEL_DIR} ${INT8_DATA_DIR} analyzer_int8_image_classification_tester.cc)
#vgg19 int8 # vgg19 int8
set(INT8_VGG19_MODEL_DIR "${INT8_DATA_DIR}/vgg19") set(INT8_VGG19_MODEL_DIR "${INT8_DATA_DIR}/vgg19")
if (NOT EXISTS ${INT8_VGG19_MODEL_DIR}) download_int8_data(${INT8_VGG19_MODEL_DIR} "VGG19_int8_model.tar.gz" )
inference_download_and_uncompress(${INT8_VGG19_MODEL_DIR} "${INFERENCE_URL}/int8" "VGG19_int8_model.tar.gz" ) inference_analysis_api_int8_test_run(test_analyzer_int8_vgg19 ${INT8_IMG_CLASS_TEST_APP} ${INT8_VGG19_MODEL_DIR} ${IMAGENET_DATA_PATH})
endif()
inference_analysis_api_int8_test(test_analyzer_int8_vgg19 ${INT8_VGG19_MODEL_DIR} ${INT8_DATA_DIR} analyzer_int8_image_classification_tester.cc)
#googlenet int8 # googlenet int8
set(INT8_GOOGLENET_MODEL_DIR "${INT8_DATA_DIR}/googlenet") set(INT8_GOOGLENET_MODEL_DIR "${INT8_DATA_DIR}/googlenet")
if (NOT EXISTS ${INT8_GOOGLENET_MODEL_DIR}) download_int8_data(${INT8_GOOGLENET_MODEL_DIR} "GoogleNet_int8_model.tar.gz" )
inference_download_and_uncompress(${INT8_GOOGLENET_MODEL_DIR} "${INFERENCE_URL}/int8" "GoogleNet_int8_model.tar.gz" ) inference_analysis_api_int8_test_run(test_analyzer_int8_googlenet ${INT8_IMG_CLASS_TEST_APP} ${INT8_GOOGLENET_MODEL_DIR} ${IMAGENET_DATA_PATH})
endif()
inference_analysis_api_int8_test(test_analyzer_int8_googlenet ${INT8_GOOGLENET_MODEL_DIR} ${INT8_DATA_DIR} analyzer_int8_image_classification_tester.cc SERIAL)
endif() endif()
# bert, max_len=20, embedding_dim=128 # bert, max_len=20, embedding_dim=128
......
...@@ -320,7 +320,8 @@ void PredictionRun(PaddlePredictor *predictor, ...@@ -320,7 +320,8 @@ void PredictionRun(PaddlePredictor *predictor,
const std::vector<std::vector<PaddleTensor>> &inputs, const std::vector<std::vector<PaddleTensor>> &inputs,
std::vector<std::vector<PaddleTensor>> *outputs, std::vector<std::vector<PaddleTensor>> *outputs,
int num_threads, int tid, int num_threads, int tid,
const VarType::Type data_type = VarType::FP32) { const VarType::Type data_type = VarType::FP32,
float *sample_latency = nullptr) {
int num_times = FLAGS_repeat; int num_times = FLAGS_repeat;
int iterations = inputs.size(); // process the whole dataset ... int iterations = inputs.size(); // process the whole dataset ...
if (FLAGS_iterations > 0 && if (FLAGS_iterations > 0 &&
...@@ -360,6 +361,10 @@ void PredictionRun(PaddlePredictor *predictor, ...@@ -360,6 +361,10 @@ void PredictionRun(PaddlePredictor *predictor,
auto batch_latency = elapsed_time / (iterations * num_times); auto batch_latency = elapsed_time / (iterations * num_times);
PrintTime(FLAGS_batch_size, num_times, num_threads, tid, batch_latency, PrintTime(FLAGS_batch_size, num_times, num_threads, tid, batch_latency,
iterations, data_type); iterations, data_type);
if (sample_latency != nullptr)
*sample_latency = batch_latency / FLAGS_batch_size;
if (FLAGS_record_benchmark) { if (FLAGS_record_benchmark) {
Benchmark benchmark; Benchmark benchmark;
benchmark.SetName(FLAGS_model_name); benchmark.SetName(FLAGS_model_name);
...@@ -373,12 +378,14 @@ void TestOneThreadPrediction( ...@@ -373,12 +378,14 @@ void TestOneThreadPrediction(
const PaddlePredictor::Config *config, const PaddlePredictor::Config *config,
const std::vector<std::vector<PaddleTensor>> &inputs, const std::vector<std::vector<PaddleTensor>> &inputs,
std::vector<std::vector<PaddleTensor>> *outputs, bool use_analysis = true, std::vector<std::vector<PaddleTensor>> *outputs, bool use_analysis = true,
const VarType::Type data_type = VarType::FP32) { const VarType::Type data_type = VarType::FP32,
float *sample_latency = nullptr) {
auto predictor = CreateTestPredictor(config, use_analysis); auto predictor = CreateTestPredictor(config, use_analysis);
if (FLAGS_warmup) { if (FLAGS_warmup) {
PredictionWarmUp(predictor.get(), inputs, outputs, 1, 0, data_type); PredictionWarmUp(predictor.get(), inputs, outputs, 1, 0, data_type);
} }
PredictionRun(predictor.get(), inputs, outputs, 1, 0, data_type); PredictionRun(predictor.get(), inputs, outputs, 1, 0, data_type,
sample_latency);
} }
void TestMultiThreadPrediction( void TestMultiThreadPrediction(
...@@ -430,6 +437,31 @@ void TestPrediction(const PaddlePredictor::Config *config, ...@@ -430,6 +437,31 @@ void TestPrediction(const PaddlePredictor::Config *config,
} }
} }
void SummarizeAccuracy(float avg_acc1_fp32, float avg_acc1_int8) {
LOG(INFO) << "--- Accuracy summary --- ";
LOG(INFO) << "Accepted top1 accuracy drop threshold: "
<< FLAGS_quantized_accuracy
<< ". (condition: (FP32_top1_acc - INT8_top1_acc) <= threshold)";
LOG(INFO) << "FP32: avg top1 accuracy: " << std::fixed << std::setw(6)
<< std::setprecision(4) << avg_acc1_fp32;
LOG(INFO) << "INT8: avg top1 accuracy: " << std::fixed << std::setw(6)
<< std::setprecision(4) << avg_acc1_int8;
}
void SummarizePerformance(float sample_latency_fp32,
float sample_latency_int8) {
// sample latency in ms
auto throughput_fp32 = 1000.0 / sample_latency_fp32;
auto throughput_int8 = 1000.0 / sample_latency_int8;
LOG(INFO) << "--- Performance summary --- ";
LOG(INFO) << "FP32: avg fps: " << std::fixed << std::setw(6)
<< std::setprecision(4) << throughput_fp32
<< ", avg latency: " << sample_latency_fp32 << " ms";
LOG(INFO) << "INT8: avg fps: " << std::fixed << std::setw(6)
<< std::setprecision(4) << throughput_int8
<< ", avg latency: " << sample_latency_int8 << " ms";
}
void CompareTopAccuracy( void CompareTopAccuracy(
const std::vector<std::vector<PaddleTensor>> &output_slots_quant, const std::vector<std::vector<PaddleTensor>> &output_slots_quant,
const std::vector<std::vector<PaddleTensor>> &output_slots_ref) { const std::vector<std::vector<PaddleTensor>> &output_slots_ref) {
...@@ -459,12 +491,10 @@ void CompareTopAccuracy( ...@@ -459,12 +491,10 @@ void CompareTopAccuracy(
float avg_acc1_quant = total_accs1_quant / output_slots_quant.size(); float avg_acc1_quant = total_accs1_quant / output_slots_quant.size();
float avg_acc1_ref = total_accs1_ref / output_slots_ref.size(); float avg_acc1_ref = total_accs1_ref / output_slots_ref.size();
LOG(INFO) << "Avg top1 INT8 accuracy: " << std::fixed << std::setw(6) SummarizeAccuracy(avg_acc1_ref, avg_acc1_quant);
<< std::setprecision(4) << avg_acc1_quant; CHECK_GT(avg_acc1_ref, 0.0);
LOG(INFO) << "Avg top1 FP32 accuracy: " << std::fixed << std::setw(6) CHECK_GT(avg_acc1_quant, 0.0);
<< std::setprecision(4) << avg_acc1_ref; CHECK_LE(avg_acc1_ref - avg_acc1_quant, FLAGS_quantized_accuracy);
LOG(INFO) << "Accepted accuracy drop threshold: " << FLAGS_quantized_accuracy;
CHECK_LE(std::abs(avg_acc1_quant - avg_acc1_ref), FLAGS_quantized_accuracy);
} }
void CompareDeterministic( void CompareDeterministic(
...@@ -510,16 +540,19 @@ void CompareQuantizedAndAnalysis( ...@@ -510,16 +540,19 @@ void CompareQuantizedAndAnalysis(
auto *cfg = reinterpret_cast<const PaddlePredictor::Config *>(config); auto *cfg = reinterpret_cast<const PaddlePredictor::Config *>(config);
PrintConfig(cfg, true); PrintConfig(cfg, true);
std::vector<std::vector<PaddleTensor>> analysis_outputs; std::vector<std::vector<PaddleTensor>> analysis_outputs;
TestOneThreadPrediction(cfg, inputs, &analysis_outputs, true, VarType::FP32); float sample_latency_fp32{-1};
TestOneThreadPrediction(cfg, inputs, &analysis_outputs, true, VarType::FP32,
&sample_latency_fp32);
LOG(INFO) << "--- INT8 prediction start ---"; LOG(INFO) << "--- INT8 prediction start ---";
auto *qcfg = reinterpret_cast<const PaddlePredictor::Config *>(qconfig); auto *qcfg = reinterpret_cast<const PaddlePredictor::Config *>(qconfig);
PrintConfig(qcfg, true); PrintConfig(qcfg, true);
std::vector<std::vector<PaddleTensor>> quantized_outputs; std::vector<std::vector<PaddleTensor>> quantized_outputs;
TestOneThreadPrediction(qcfg, inputs, &quantized_outputs, true, float sample_latency_int8{-1};
VarType::INT8); TestOneThreadPrediction(qcfg, inputs, &quantized_outputs, true, VarType::INT8,
&sample_latency_int8);
LOG(INFO) << "--- comparing outputs --- "; SummarizePerformance(sample_latency_fp32, sample_latency_int8);
CompareTopAccuracy(quantized_outputs, analysis_outputs); CompareTopAccuracy(quantized_outputs, analysis_outputs);
} }
......
...@@ -48,13 +48,35 @@ if(NOT EXISTS ${WORD2VEC_INSTALL_DIR} AND NOT WIN32) ...@@ -48,13 +48,35 @@ if(NOT EXISTS ${WORD2VEC_INSTALL_DIR} AND NOT WIN32)
endif() endif()
set(WORD2VEC_MODEL_DIR "${WORD2VEC_INSTALL_DIR}/word2vec.inference.model") set(WORD2VEC_MODEL_DIR "${WORD2VEC_INSTALL_DIR}/word2vec.inference.model")
function (inference_base_test TARGET) function (inference_base_test_build TARGET)
set(options "") set(options "")
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS ARGS DEPS) set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(base_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cc_test_build(${TARGET} SRCS ${base_test_SRCS} DEPS ${base_test_DEPS})
endfunction()
function (inference_base_test_run TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs COMMAND ARGS)
cmake_parse_arguments(base_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(base_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(WITH_GPU) if(WITH_GPU)
set(mem_opt "--fraction_of_gpu_memory_to_use=0.5") set(mem_opt "--fraction_of_gpu_memory_to_use=0.5")
endif() endif()
cc_test(${TARGET} SRCS ${base_test_SRCS} DEPS ${base_test_DEPS} ARGS ${mem_opt} ${base_test_ARGS}) cc_test_run(${TARGET} COMMAND ${base_test_COMMAND} ARGS ${mem_opt} ${base_test_ARGS})
endfunction() endfunction()
function (inference_base_test TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS ARGS DEPS)
cmake_parse_arguments(base_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
inference_base_test_build(${TARGET}
SRCS ${base_test_SRCS}
DEPS ${base_test_DEPS})
inference_base_test_run(${TARGET}
COMMAND ${TARGET}
ARGS ${base_test_ARGS})
endfunction()
...@@ -83,8 +83,8 @@ class TestQatInt8Comparison(unittest.TestCase): ...@@ -83,8 +83,8 @@ class TestQatInt8Comparison(unittest.TestCase):
while step < num: while step < num:
fp.seek(imgs_offset + img_size * step) fp.seek(imgs_offset + img_size * step)
img = fp.read(img_size) img = fp.read(img_size)
img = struct.unpack_from('{}f'.format(img_ch * img_w * img = struct.unpack_from(
img_h), img) '{}f'.format(img_ch * img_w * img_h), img)
img = np.array(img) img = np.array(img)
img.shape = (img_ch, img_w, img_h) img.shape = (img_ch, img_w, img_h)
fp.seek(labels_offset + label_size * step) fp.seek(labels_offset + label_size * step)
...@@ -147,6 +147,7 @@ class TestQatInt8Comparison(unittest.TestCase): ...@@ -147,6 +147,7 @@ class TestQatInt8Comparison(unittest.TestCase):
def _predict(self, def _predict(self,
test_reader=None, test_reader=None,
model_path=None, model_path=None,
batch_size=1,
batch_num=1, batch_num=1,
skip_batch_num=0, skip_batch_num=0,
transform_to_int8=False): transform_to_int8=False):
...@@ -199,7 +200,7 @@ class TestQatInt8Comparison(unittest.TestCase): ...@@ -199,7 +200,7 @@ class TestQatInt8Comparison(unittest.TestCase):
out = exe.run(inference_program, out = exe.run(inference_program,
feed={feed_target_names[0]: images}, feed={feed_target_names[0]: images},
fetch_list=fetch_targets) fetch_list=fetch_targets)
batch_time = time.time() - start batch_time = (time.time() - start) * 1000 # in miliseconds
outputs.append(out[0]) outputs.append(out[0])
batch_acc1, batch_acc5 = self._get_batch_accuracy(out[0], batch_acc1, batch_acc5 = self._get_batch_accuracy(out[0],
labels) labels)
...@@ -212,14 +213,15 @@ class TestQatInt8Comparison(unittest.TestCase): ...@@ -212,14 +213,15 @@ class TestQatInt8Comparison(unittest.TestCase):
fpses.append(fps) fpses.append(fps)
iters += 1 iters += 1
appx = ' (warm-up)' if iters <= skip_batch_num else '' appx = ' (warm-up)' if iters <= skip_batch_num else ''
_logger.info( _logger.info('batch {0}{5}, acc1: {1:.4f}, acc5: {2:.4f}, '
'batch {0}{5}, acc1: {1:.4f}, acc5: {2:.4f}, ' 'latency: {3:.4f} ms, fps: {4:.2f}'.format(
'batch latency: {3:.4f} s, batch fps: {4:.2f}'.format( iters, batch_acc1, batch_acc5, batch_time /
iters, batch_acc1, batch_acc5, batch_time, fps, appx)) batch_size, fps, appx))
# Postprocess benchmark data # Postprocess benchmark data
latencies = batch_times[skip_batch_num:] batch_latencies = batch_times[skip_batch_num:]
latency_avg = np.average(latencies) batch_latency_avg = np.average(batch_latencies)
latency_avg = batch_latency_avg / batch_size
fpses = fpses[skip_batch_num:] fpses = fpses[skip_batch_num:]
fps_avg = np.average(fpses) fps_avg = np.average(fpses)
infer_total_time = time.time() - infer_start_time infer_total_time = time.time() - infer_start_time
...@@ -230,13 +232,25 @@ class TestQatInt8Comparison(unittest.TestCase): ...@@ -230,13 +232,25 @@ class TestQatInt8Comparison(unittest.TestCase):
return outputs, acc1_avg, acc5_avg, fps_avg, latency_avg return outputs, acc1_avg, acc5_avg, fps_avg, latency_avg
def _summarize_performance(self, fp32_fps, fp32_lat, int8_fps, int8_lat):
_logger.info('--- Performance summary ---')
_logger.info('FP32: avg fps: {0:.2f}, avg latency: {1:.4f} ms'.format(
fp32_fps, fp32_lat))
_logger.info('INT8: avg fps: {0:.2f}, avg latency: {1:.4f} ms'.format(
int8_fps, int8_lat))
def _compare_accuracy(self, fp32_acc1, fp32_acc5, int8_acc1, int8_acc5, def _compare_accuracy(self, fp32_acc1, fp32_acc5, int8_acc1, int8_acc5,
threshold): threshold):
_logger.info('Accepted acc1 diff threshold: {0}'.format(threshold)) _logger.info('--- Accuracy summary ---')
_logger.info('FP32: avg acc1: {0:.4f}, avg acc5: {1:.4f}'.format( _logger.info(
fp32_acc1, fp32_acc5)) 'Accepted top1 accuracy drop threshold: {0}. (condition: (FP32_top1_acc - IN8_top1_acc) <= threshold)'
_logger.info('INT8: avg acc1: {0:.4f}, avg acc5: {1:.4f}'.format( .format(threshold))
int8_acc1, int8_acc5)) _logger.info(
'FP32: avg top1 accuracy: {0:.4f}, avg top5 accuracy: {1:.4f}'.
format(fp32_acc1, fp32_acc5))
_logger.info(
'INT8: avg top1 accuracy: {0:.4f}, avg top5 accuracy: {1:.4f}'.
format(int8_acc1, int8_acc5))
assert fp32_acc1 > 0.0 assert fp32_acc1 > 0.0
assert int8_acc1 > 0.0 assert int8_acc1 > 0.0
assert fp32_acc1 - int8_acc1 <= threshold assert fp32_acc1 - int8_acc1 <= threshold
...@@ -257,9 +271,7 @@ class TestQatInt8Comparison(unittest.TestCase): ...@@ -257,9 +271,7 @@ class TestQatInt8Comparison(unittest.TestCase):
_logger.info('Dataset: {0}'.format(data_path)) _logger.info('Dataset: {0}'.format(data_path))
_logger.info('Batch size: {0}'.format(batch_size)) _logger.info('Batch size: {0}'.format(batch_size))
_logger.info('Batch number: {0}'.format(batch_num)) _logger.info('Batch number: {0}'.format(batch_num))
_logger.info('Accuracy diff threshold: {0}. ' _logger.info('Accuracy drop threshold: {0}.'.format(acc_diff_threshold))
'(condition: (fp32_acc - int8_acc) <= threshold)'
.format(acc_diff_threshold))
_logger.info('--- QAT FP32 prediction start ---') _logger.info('--- QAT FP32 prediction start ---')
val_reader = paddle.batch( val_reader = paddle.batch(
...@@ -267,6 +279,7 @@ class TestQatInt8Comparison(unittest.TestCase): ...@@ -267,6 +279,7 @@ class TestQatInt8Comparison(unittest.TestCase):
fp32_output, fp32_acc1, fp32_acc5, fp32_fps, fp32_lat = self._predict( fp32_output, fp32_acc1, fp32_acc5, fp32_fps, fp32_lat = self._predict(
val_reader, val_reader,
qat_model_path, qat_model_path,
batch_size,
batch_num, batch_num,
skip_batch_num, skip_batch_num,
transform_to_int8=False) transform_to_int8=False)
...@@ -277,17 +290,12 @@ class TestQatInt8Comparison(unittest.TestCase): ...@@ -277,17 +290,12 @@ class TestQatInt8Comparison(unittest.TestCase):
int8_output, int8_acc1, int8_acc5, int8_fps, int8_lat = self._predict( int8_output, int8_acc1, int8_acc5, int8_fps, int8_lat = self._predict(
val_reader, val_reader,
qat_model_path, qat_model_path,
batch_size,
batch_num, batch_num,
skip_batch_num, skip_batch_num,
transform_to_int8=True) transform_to_int8=True)
_logger.info('--- Performance summary ---') self._summarize_performance(fp32_fps, fp32_lat, int8_fps, int8_lat)
_logger.info('FP32: avg fps: {0:.2f}, avg latency: {1:.4f} s'.format(
fp32_fps, fp32_lat))
_logger.info('INT8: avg fps: {0:.2f}, avg latency: {1:.4f} s'.format(
int8_fps, int8_lat))
_logger.info('--- Comparing accuracy ---')
self._compare_accuracy(fp32_acc1, fp32_acc5, int8_acc1, int8_acc5, self._compare_accuracy(fp32_acc1, fp32_acc5, int8_acc1, int8_acc5,
acc_diff_threshold) acc_diff_threshold)
......
...@@ -172,6 +172,17 @@ class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase): ...@@ -172,6 +172,17 @@ class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase):
com_pass.config(config_path) com_pass.config(config_path)
com_pass.run() com_pass.run()
def _compare_accuracy(self, fp32_acc1, int8_acc1, threshold):
_logger.info('--- Accuracy summary ---')
_logger.info(
'Accepted top1 accuracy drop threshold: {0}. (condition: (FP32_top1_acc - IN8_top1_acc) <= threshold)'
.format(threshold))
_logger.info('FP32: avg top1 accuracy: {0:.4f}'.format(fp32_acc1))
_logger.info('INT8: avg top1 accuracy: {0:.4f}'.format(int8_acc1))
assert fp32_acc1 > 0.0
assert int8_acc1 > 0.0
assert fp32_acc1 - int8_acc1 <= threshold
def test_compression(self): def test_compression(self):
if not fluid.core.is_compiled_with_mkldnn(): if not fluid.core.is_compiled_with_mkldnn():
return return
...@@ -204,15 +215,8 @@ class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase): ...@@ -204,15 +215,8 @@ class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase):
self._reader_creator(data_path, False), batch_size=batch_size) self._reader_creator(data_path, False), batch_size=batch_size)
fp32_model_result = self._predict(val_reader, fp32_model_path) fp32_model_result = self._predict(val_reader, fp32_model_path)
_logger.info('--- comparing outputs ---') self._compare_accuracy(fp32_model_result[0], int8_model_result[0],
_logger.info('Avg top1 INT8 accuracy: {0:.4f}'.format(int8_model_result[ accuracy_diff_threshold)
0]))
_logger.info('Avg top1 FP32 accuracy: {0:.4f}'.format(fp32_model_result[
0]))
_logger.info('Accepted accuracy drop threshold: {0}'.format(
accuracy_diff_threshold))
assert fp32_model_result[0] - int8_model_result[
0] <= accuracy_diff_threshold
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册