未验证 提交 65b97d62 编写于 作者: L lidanqing 提交者: GitHub

GRU model xnli dataset C++ tester (#25534)

* Add laxical GRU unit test

performance works

* Get model accuracy

* model and data name to be confirmed
test=develop

* update model name and output format
test=develop

* update according to reviews
test=develop

* add accuracy check

* accuracy check between native and analysis
test=develop

* fix a reading bug, fix gru passes sequence
test=develop

* fix passes sequence
test=develop
上级 a86e8c0e
...@@ -20,6 +20,12 @@ function(download_int8_data install_dir data_file) ...@@ -20,6 +20,12 @@ function(download_int8_data install_dir data_file)
endif() endif()
endfunction() endfunction()
function(download_GRU_data install_dir data_file)
if (NOT EXISTS ${install_dir}/${data_file})
inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/gru ${data_file})
endif()
endfunction()
function(download_quant_data install_dir data_file) function(download_quant_data install_dir data_file)
if (NOT EXISTS ${install_dir}/${data_file}) if (NOT EXISTS ${install_dir}/${data_file})
inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/int8/QAT_models ${data_file}) inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/int8/QAT_models ${data_file})
...@@ -97,6 +103,18 @@ function(inference_analysis_api_quant_test_run TARGET_NAME test_binary fp32_mode ...@@ -97,6 +103,18 @@ function(inference_analysis_api_quant_test_run TARGET_NAME test_binary fp32_mode
--iterations=2) --iterations=2)
endfunction() endfunction()
function(inference_analysis_api_lexical_test_run TARGET_NAME test_binary infer_model data_path)
inference_analysis_test_run(${TARGET_NAME}
COMMAND ${test_binary}
ARGS --infer_model=${infer_model}
--infer_data=${data_path}
--batch_size=50
--cpu_num_threads=${CPU_NUM_THREADS_ON_CI}
--with_accuracy_layer=true
--use_analysis=true
--iterations=2)
endfunction()
function(preprocess_data2bin_test_run target py_script_source data_dir output_file) function(preprocess_data2bin_test_run target py_script_source data_dir output_file)
py_test(${target} SRCS ${CMAKE_CURRENT_SOURCE_DIR}/${py_script_source} py_test(${target} SRCS ${CMAKE_CURRENT_SOURCE_DIR}/${py_script_source}
ARGS --data_dir=${data_dir} ARGS --data_dir=${data_dir}
...@@ -315,6 +333,20 @@ if(WITH_MKLDNN) ...@@ -315,6 +333,20 @@ if(WITH_MKLDNN)
download_int8_data(${INT8_MOBILENET_SSD_MODEL_DIR} "mobilenet_ssd_int8_model.tar.gz" ) download_int8_data(${INT8_MOBILENET_SSD_MODEL_DIR} "mobilenet_ssd_int8_model.tar.gz" )
inference_analysis_api_object_dection_int8_test_run(test_analyzer_int8_mobilenet_ssd ${INT8_OBJ_DETECT_TEST_APP} ${INT8_MOBILENET_SSD_MODEL_DIR} ${PASCALVOC_DATA_PATH}) inference_analysis_api_object_dection_int8_test_run(test_analyzer_int8_mobilenet_ssd ${INT8_OBJ_DETECT_TEST_APP} ${INT8_MOBILENET_SSD_MODEL_DIR} ${PASCALVOC_DATA_PATH})
### Lexcial analysis GRU model
set(GRU_PATH "${INFERENCE_DEMO_INSTALL_DIR}/gru")
download_GRU_data("${GRU_PATH}" "GRU_eval_data.tar.gz")
download_GRU_data("${GRU_PATH}" "GRU_eval_model.tar.gz")
set(GRU_DATA_PATH "${GRU_PATH}/GRU_eval_data.bin")
set(GRU_MODEL_PATH "${GRU_PATH}/GRU_eval_model")
set(LEXICAL_TEST_APP "test_analyzer_lexical_analysis")
set(LEXICAL_TEST_APP_SRC "analyzer_lexical_analysis_gru_tester.cc")
# build test binary to be used in subsequent tests
inference_analysis_api_test_build(${LEXICAL_TEST_APP} ${LEXICAL_TEST_APP_SRC})
# run lexcial analysis test
inference_analysis_api_lexical_test_run(test_analyzer_lexical_gru ${LEXICAL_TEST_APP} ${GRU_MODEL_PATH} ${GRU_DATA_PATH})
### optimized FP32 vs. Quant INT8 tests ### optimized FP32 vs. Quant INT8 tests
set(QUANT_DATA_DIR "${INFERENCE_DEMO_INSTALL_DIR}/quant") set(QUANT_DATA_DIR "${INFERENCE_DEMO_INSTALL_DIR}/quant")
......
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
// setting iterations to 0 means processing the whole dataset
namespace paddle {
namespace inference {
namespace analysis {
void SetNativeConfig(AnalysisConfig *cfg,
const int &num_threads = FLAGS_cpu_num_threads) {
cfg->SwitchIrOptim(false);
cfg->DisableGpu();
cfg->SetModel(FLAGS_infer_model);
cfg->SetCpuMathLibraryNumThreads(num_threads);
}
void SetAnalysisConfig(AnalysisConfig *cfg,
const int &num_threads = FLAGS_cpu_num_threads) {
cfg->SetModel(FLAGS_infer_model);
cfg->DisableGpu();
cfg->SwitchIrOptim(true);
cfg->SwitchSpecifyInputNames(false);
cfg->SetCpuMathLibraryNumThreads(num_threads);
cfg->EnableMKLDNN();
}
std::vector<size_t> ReadSentenceLod(std::ifstream &file, size_t offset,
int64_t total_sentences_num) {
std::vector<size_t> sentence_lod(total_sentences_num);
file.clear();
file.seekg(offset);
file.read(reinterpret_cast<char *>(sentence_lod.data()),
total_sentences_num * sizeof(size_t));
if (file.eof()) LOG(ERROR) << "Reached end of stream";
if (file.fail()) throw std::runtime_error("Failed reading file.");
return sentence_lod;
}
template <typename T>
class TensorReader {
public:
TensorReader(std::ifstream &file, size_t beginning_offset, std::string name)
: file_(file), position_(beginning_offset), name_(name) {}
PaddleTensor NextBatch(std::vector<int> shape, std::vector<size_t> lod) {
int numel =
std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
PaddleTensor tensor;
tensor.name = name_;
tensor.shape = shape;
tensor.dtype = GetPaddleDType<T>();
tensor.data.Resize(numel * sizeof(T));
if (lod.empty() == false) {
tensor.lod.clear();
tensor.lod.push_back(lod);
}
file_.seekg(position_);
if (file_.eof()) LOG(ERROR) << name_ << ": reached end of stream";
if (file_.fail())
throw std::runtime_error(name_ + ": failed reading file.");
file_.read(reinterpret_cast<char *>(tensor.data.data()), numel * sizeof(T));
position_ = file_.tellg();
return tensor;
}
protected:
std::ifstream &file_;
size_t position_;
std::string name_;
};
void SetInput(std::vector<std::vector<PaddleTensor>> *inputs,
int32_t batch_size = FLAGS_batch_size) {
std::ifstream file(FLAGS_infer_data, std::ios::binary);
if (!file) {
FAIL() << "Couldn't open file: " << FLAGS_infer_data;
}
int64_t total_sentences_num = 0L;
int64_t total_words_num = 0L;
file.seekg(0);
file.read(reinterpret_cast<char *>(&total_sentences_num), sizeof(int64_t));
LOG(INFO) << "Total sentences in file: " << total_sentences_num;
file.read(reinterpret_cast<char *>(&total_words_num), sizeof(int64_t));
LOG(INFO) << "Total words in file: " << total_words_num;
size_t lods_beginning_offset = static_cast<size_t>(file.tellg());
auto words_begining_offset =
lods_beginning_offset + sizeof(size_t) * total_sentences_num;
auto targets_beginning_offset =
words_begining_offset + sizeof(int64_t) * total_words_num;
std::vector<size_t> lod_full =
ReadSentenceLod(file, lods_beginning_offset, total_sentences_num);
size_t lods_sum = std::accumulate(lod_full.begin(), lod_full.end(), 0UL);
EXPECT_EQ(lods_sum, static_cast<size_t>(total_words_num));
TensorReader<int64_t> words_reader(file, words_begining_offset, "words");
TensorReader<int64_t> targets_reader(file, targets_beginning_offset,
"targets");
// If FLAGS_iterations is set to 0, run all batches
auto iterations_max = total_sentences_num / batch_size;
auto iterations = iterations_max;
if (FLAGS_iterations > 0 && FLAGS_iterations < iterations_max) {
iterations = FLAGS_iterations;
}
for (auto i = 0; i < iterations; i++) {
// Calculate the words num. Shape=[words_num, 1]
std::vector<size_t> batch_lod = {0};
size_t num_words = 0L;
std::transform(lod_full.begin() + i * FLAGS_batch_size,
lod_full.begin() + (i + 1) * FLAGS_batch_size,
std::back_inserter(batch_lod),
[&num_words](const size_t lodtemp) -> size_t {
num_words += lodtemp;
return num_words;
});
auto words_tensor = words_reader.NextBatch(
{static_cast<int>(batch_lod[FLAGS_batch_size]), 1}, batch_lod);
if (FLAGS_with_accuracy_layer) {
auto targets_tensor = targets_reader.NextBatch(
{static_cast<int>(batch_lod[FLAGS_batch_size]), 1}, batch_lod);
inputs->emplace_back(std::vector<PaddleTensor>{
std::move(words_tensor), std::move(targets_tensor)});
} else {
inputs->emplace_back(std::vector<PaddleTensor>{std::move(words_tensor)});
}
}
}
std::vector<double> Lexical_Test(
const std::vector<std::vector<PaddleTensor>> &input_slots_all,
std::vector<std::vector<PaddleTensor>> *outputs, AnalysisConfig *config,
const bool use_analysis) {
TestOneThreadPrediction(
reinterpret_cast<const PaddlePredictor::Config *>(config),
input_slots_all, outputs, FLAGS_use_analysis);
std::vector<double> acc_res(3);
if (FLAGS_with_accuracy_layer) {
EXPECT_GT(outputs->size(), 0UL);
EXPECT_EQ(3UL, (*outputs)[0].size());
std::vector<int64_t> acc_sum(3);
for (size_t i = 0; i < outputs->size(); i++) {
for (size_t j = 0; j < 3UL; j++) {
acc_sum[j] =
acc_sum[j] + *static_cast<int64_t *>((*outputs)[i][j].data.data());
}
}
// nums_infer, nums_label, nums_correct
auto precision =
acc_sum[0]
? static_cast<double>(acc_sum[2]) / static_cast<double>(acc_sum[0])
: 0;
auto recall =
acc_sum[1]
? static_cast<double>(acc_sum[2]) / static_cast<double>(acc_sum[1])
: 0;
auto f1_score =
acc_sum[2]
? static_cast<float>(2 * precision * recall) / (precision + recall)
: 0;
LOG(INFO) << "Precision: " << std::fixed << std::setw(6)
<< std::setprecision(5) << precision;
LOG(INFO) << "Recall: " << std::fixed << std::setw(6)
<< std::setprecision(5) << recall;
LOG(INFO) << "F1 score: " << std::fixed << std::setw(6)
<< std::setprecision(5) << f1_score;
acc_res = {precision, recall, f1_score};
// return acc_res;
} else {
EXPECT_GT(outputs->size(), 0UL);
EXPECT_EQ(outputs[0].size(), 1UL);
LOG(INFO) << "No accuracy result. To get accuracy result provide a model "
"with accuracy layers in it and use --with_accuracy_layer "
"option.";
}
return acc_res;
}
TEST(Analyzer_lexical_test, Analyzer_lexical_analysis) {
AnalysisConfig native_cfg;
std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
SetNativeConfig(&native_cfg, FLAGS_cpu_num_threads);
std::vector<double> acc_ref(3);
acc_ref = Lexical_Test(input_slots_all, &outputs, &native_cfg, false);
if (FLAGS_use_analysis) {
AnalysisConfig analysis_cfg;
SetAnalysisConfig(&analysis_cfg, FLAGS_cpu_num_threads);
analysis_cfg.pass_builder()->AppendPass("mkldnn_placement_pass");
std::vector<double> acc_analysis(3);
acc_analysis = Lexical_Test(input_slots_all, &outputs, &analysis_cfg, true);
for (size_t i = 0; i < acc_analysis.size(); i++) {
CHECK_LE(std::abs(acc_ref[i] - acc_analysis[i]),
FLAGS_quantized_accuracy);
}
}
}
} // namespace analysis
} // namespace inference
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册