From 9d4f00bc5420c85af5c20b977e76c9fb5be72db0 Mon Sep 17 00:00:00 2001 From: Peihan Date: Tue, 17 Aug 2021 13:18:49 +0800 Subject: [PATCH] add mkl multi-thread test cases in PR-CI-INFERENCE (#34946) * add mkl multi-thread test cases * fix codestyle * fix codestyle & enable ernie mkl test --- paddle/fluid/inference/tests/infer_ut/run.sh | 26 ++++ .../tests/infer_ut/test_det_mv3_db.cc | 46 +++++- .../tests/infer_ut/test_ernie_text_cls.cc | 137 ++++++++++++++++++ .../inference/tests/infer_ut/test_suite.h | 41 +++++- 4 files changed, 243 insertions(+), 7 deletions(-) create mode 100644 paddle/fluid/inference/tests/infer_ut/test_ernie_text_cls.cc diff --git a/paddle/fluid/inference/tests/infer_ut/run.sh b/paddle/fluid/inference/tests/infer_ut/run.sh index 64ada23767f..7d17bb647a1 100755 --- a/paddle/fluid/inference/tests/infer_ut/run.sh +++ b/paddle/fluid/inference/tests/infer_ut/run.sh @@ -77,6 +77,12 @@ for model_name in $clas_download_list; do download $url_prefix $model_name done +nlp_download_list='ernie_text_cls' +for model_name in $nlp_download_list; do + url_prefix="https://paddle-qa.bj.bcebos.com/inference_model/2.1.1/nlp" + download $url_prefix $model_name +done + # compile and run test cd $current_dir mkdir -p build @@ -144,6 +150,26 @@ if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then fi fi +# ---------gpu ernie_text_cls on linux--------- +if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then + cmake .. -DPADDLE_LIB=${inference_install_dir} \ + -DWITH_MKL=$TURN_ON_MKL \ + -DDEMO_NAME=test_ernie_text_cls \ + -DWITH_GPU=$TEST_GPU_CPU \ + -DWITH_STATIC_LIB=OFF \ + -DUSE_TENSORRT=$USE_TENSORRT \ + -DTENSORRT_ROOT=$TENSORRT_ROOT_DIR \ + -DWITH_GTEST=ON + make -j$(nproc) + ./test_ernie_text_cls \ + --modeldir=$DATA_DIR/ernie_text_cls/ernie_text_cls \ + --gtest_output=xml:test_ernie_text_cls.xml + if [ $? -ne 0 ]; then + echo "test_ernie_text_cls runs failed" >> ${current_dir}/build/test_summary.txt + EXIT_CODE=1 + fi +fi + if [[ -f ${current_dir}/build/test_summary.txt ]];then echo "=====================test summary======================" cat ${current_dir}/build/test_summary.txt diff --git a/paddle/fluid/inference/tests/infer_ut/test_det_mv3_db.cc b/paddle/fluid/inference/tests/infer_ut/test_det_mv3_db.cc index c5920d3b2d8..ce7b8ce4637 100644 --- a/paddle/fluid/inference/tests/infer_ut/test_det_mv3_db.cc +++ b/paddle/fluid/inference/tests/infer_ut/test_det_mv3_db.cc @@ -123,7 +123,7 @@ TEST(test_det_mv3_db, multi_thread2_trt_fp32_dynamic_shape_bz2) { FLAGS_modeldir + "/inference.pdiparams"); config.EnableUseGpu(100, 0); config.EnableTensorRtEngine( - 1 << 20, 2, 3, paddle_infer::PrecisionType::kFloat32, false, false); + 1 << 20, 2, 3, paddle_infer::PrecisionType::kFloat32, true, false); PrepareDynamicShape(&config, 4); // get groudtruth by disbale ir paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1); @@ -149,6 +149,50 @@ TEST(test_det_mv3_db, multi_thread2_trt_fp32_dynamic_shape_bz2) { std::cout << "finish multi-thread test" << std::endl; } +TEST(test_det_mv3_db, multi_thread2_mkl_fp32_bz2) { + int thread_num = 2; // thread > 2 may OOM + // init input data + std::map my_input_data_map; + my_input_data_map["x"] = PrepareInput(2, 640); + // init output data + std::map infer_output_data, + truth_output_data; + // prepare groudtruth config + paddle_infer::Config config, config_no_ir; + config_no_ir.SetModel(FLAGS_modeldir + "/inference.pdmodel", + FLAGS_modeldir + "/inference.pdiparams"); + config_no_ir.SwitchIrOptim(false); + // prepare inference config + config.SetModel(FLAGS_modeldir + "/inference.pdmodel", + FLAGS_modeldir + "/inference.pdiparams"); + config.DisableGpu(); + config.EnableMKLDNN(); + config.SetMkldnnCacheCapacity(10); + config.SetCpuMathLibraryNumThreads(10); + // get groudtruth by disbale ir + paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1); + SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &my_input_data_map, + &truth_output_data, 1); + + // get infer results from multi threads + std::vector threads; + services::PredictorPool pred_pool(config, thread_num); + for (int i = 0; i < thread_num; ++i) { + threads.emplace_back(paddle::test::SingleThreadPrediction, + pred_pool.Retrive(i), &my_input_data_map, + &infer_output_data, 2); + } + + // thread join & check outputs + for (int i = 0; i < thread_num; ++i) { + LOG(INFO) << "join tid : " << i; + threads[i].join(); + CompareRecord(&truth_output_data, &infer_output_data, 1e-4); + } + + std::cout << "finish multi-thread test" << std::endl; +} + } // namespace paddle_infer int main(int argc, char** argv) { diff --git a/paddle/fluid/inference/tests/infer_ut/test_ernie_text_cls.cc b/paddle/fluid/inference/tests/infer_ut/test_ernie_text_cls.cc new file mode 100644 index 00000000000..f73803fe593 --- /dev/null +++ b/paddle/fluid/inference/tests/infer_ut/test_ernie_text_cls.cc @@ -0,0 +1,137 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test_suite.h" // NOLINT + +DEFINE_string(modeldir, "", "Directory of the inference model."); + +namespace paddle_infer { + +template +T cRandom(int min, int max) { + unsigned int seed = 100; + return (min + + static_cast(max * rand_r(&seed) / static_cast(RAND_MAX + 1))); +} + +std::map PrepareInput(int batch_size) { + // init input data + int digit_length = 115; + paddle::test::Record input_ids, segment_ids; + int input_num = batch_size * digit_length; + std::vector input_data(input_num, 1); + std::vector segment_data(input_num, 0); + srand((unsigned)time(NULL)); + for (int x = 0; x < input_data.size(); x++) { + input_data[x] = cRandom(1, 100); + } + input_ids.data = std::vector(input_data.begin(), input_data.end()); + input_ids.shape = std::vector{batch_size, digit_length}; + input_ids.type = paddle::PaddleDType::INT64; + + segment_ids.data = + std::vector(segment_data.begin(), segment_data.end()); + segment_ids.shape = std::vector{batch_size, digit_length}; + segment_ids.type = paddle::PaddleDType::INT64; + + std::map my_input_data_map; + my_input_data_map.insert({"input_ids", input_ids}); + my_input_data_map.insert({"token_type_ids", segment_ids}); + + return my_input_data_map; +} + +TEST(test_ernie_text_cls, analysis_gpu_bz2_buffer) { + // init input data + auto my_input_data_map = PrepareInput(2); + // init output data + std::map infer_output_data, + truth_output_data; + // prepare groudtruth config + paddle_infer::Config config, config_no_ir; + config_no_ir.SetModel(FLAGS_modeldir + "/inference.pdmodel", + FLAGS_modeldir + "/inference.pdiparams"); + config_no_ir.SwitchIrOptim(false); + + // prepare inference config from buffer + std::string prog_file = FLAGS_modeldir + "/inference.pdmodel"; + std::string params_file = FLAGS_modeldir + "/inference.pdiparams"; + std::string prog_str = paddle::test::read_file(prog_file); + std::string params_str = paddle::test::read_file(params_file); + config.SetModelBuffer(prog_str.c_str(), prog_str.size(), params_str.c_str(), + params_str.size()); + // get groudtruth by disbale ir + paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1); + SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &my_input_data_map, + &truth_output_data, 1); + // get infer results + paddle_infer::services::PredictorPool pred_pool(config, 1); + SingleThreadPrediction(pred_pool.Retrive(0), &my_input_data_map, + &infer_output_data); + // check outputs + CompareRecord(&truth_output_data, &infer_output_data); + std::cout << "finish test" << std::endl; +} + +TEST(test_ernie_text_cls, multi_thread4_mkl_fp32_bz2) { + int thread_num = 4; + // init input data + auto my_input_data_map = PrepareInput(2); + // init output data + std::map infer_output_data, + truth_output_data; + // prepare groudtruth config + paddle_infer::Config config, config_no_ir; + config_no_ir.SetModel(FLAGS_modeldir + "/inference.pdmodel", + FLAGS_modeldir + "/inference.pdiparams"); + config.DisableGpu(); + config_no_ir.SwitchIrOptim(false); + // prepare inference config + config.SetModel(FLAGS_modeldir + "/inference.pdmodel", + FLAGS_modeldir + "/inference.pdiparams"); + config.DisableGpu(); + config.EnableMKLDNN(); + config.SetMkldnnCacheCapacity(10); + config.SetCpuMathLibraryNumThreads(10); + // get groudtruth by disbale ir + paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1); + SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &my_input_data_map, + &truth_output_data, 1); + + // get infer results from multi threads + std::vector threads; + services::PredictorPool pred_pool(config, thread_num); + for (int i = 0; i < thread_num; ++i) { + threads.emplace_back(paddle::test::SingleThreadPrediction, + pred_pool.Retrive(i), &my_input_data_map, + &infer_output_data, 2); + } + + // thread join & check outputs + for (int i = 0; i < thread_num; ++i) { + LOG(INFO) << "join tid : " << i; + threads[i].join(); + CompareRecord(&truth_output_data, &infer_output_data); + } + + std::cout << "finish multi-thread test" << std::endl; +} + +} // namespace paddle_infer + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + ::google::ParseCommandLineFlags(&argc, &argv, true); + return RUN_ALL_TESTS(); +} diff --git a/paddle/fluid/inference/tests/infer_ut/test_suite.h b/paddle/fluid/inference/tests/infer_ut/test_suite.h index b0da828998c..2f1034d4df9 100644 --- a/paddle/fluid/inference/tests/infer_ut/test_suite.h +++ b/paddle/fluid/inference/tests/infer_ut/test_suite.h @@ -37,6 +37,12 @@ class Record { paddle::PaddleDType type; }; +std::string read_file(std::string filename) { + std::ifstream file(filename); + return std::string((std::istreambuf_iterator(file)), + std::istreambuf_iterator()); +} + void SingleThreadPrediction(paddle_infer::Predictor *predictor, std::map *input_data_map, std::map *output_data_map, @@ -44,14 +50,37 @@ void SingleThreadPrediction(paddle_infer::Predictor *predictor, // prepare input tensor auto input_names = predictor->GetInputNames(); for (const auto & [ key, value ] : *input_data_map) { - auto input_tensor = predictor->GetInputHandle(key); - input_tensor->Reshape(value.shape); - input_tensor->CopyFromCpu(value.data.data()); + switch (value.type) { + case paddle::PaddleDType::INT64: { + std::vector input_value = + std::vector(value.data.begin(), value.data.end()); + auto input_tensor = predictor->GetInputHandle(key); + input_tensor->Reshape(value.shape); + input_tensor->CopyFromCpu(input_value.data()); + break; + } + case paddle::PaddleDType::INT32: { + std::vector input_value = + std::vector(value.data.begin(), value.data.end()); + auto input_tensor = predictor->GetInputHandle(key); + input_tensor->Reshape(value.shape); + input_tensor->CopyFromCpu(input_value.data()); + break; + } + case paddle::PaddleDType::FLOAT32: { + std::vector input_value = + std::vector(value.data.begin(), value.data.end()); + auto input_tensor = predictor->GetInputHandle(key); + input_tensor->Reshape(value.shape); + input_tensor->CopyFromCpu(input_value.data()); + break; + } + } } // inference for (size_t i = 0; i < repeat_times; ++i) { - CHECK(predictor->Run()); + ASSERT_TRUE(predictor->Run()); } // get output data to Record @@ -112,8 +141,8 @@ void CompareRecord(std::map *truth_output_data, size_t numel = value.data.size() / sizeof(float); EXPECT_EQ(value.data.size(), truth_record.data.size()); for (size_t i = 0; i < numel; ++i) { - CHECK_LT(fabs(value.data.data()[i] - truth_record.data.data()[i]), - epislon); + ASSERT_LT(fabs(value.data.data()[i] - truth_record.data.data()[i]), + epislon); } } } -- GitLab