diff --git a/paddle/fluid/inference/tests/infer_ut/run.sh b/paddle/fluid/inference/tests/infer_ut/run.sh index c1694c76a7d2c8a7db99868ab0aae4ec12a3cfe2..627cd56f4830c9142c8d0b36ac9577e7d8514391 100755 --- a/paddle/fluid/inference/tests/infer_ut/run.sh +++ b/paddle/fluid/inference/tests/infer_ut/run.sh @@ -95,6 +95,12 @@ for model_name in $det_download_list; do download $url_prefix $model_name done +unknown_download_list='resnet50_quant' +for model_name in $unknown_download_list; do + url_prefix="https://paddle-qa.bj.bcebos.com/inference_model/unknown" + download $url_prefix $model_name +done + function compile_test() { mkdir -p ${build_dir} cd ${build_dir} @@ -194,6 +200,18 @@ if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then EXIT_CODE=1 fi + printf "${YELLOW} start test_resnet50_quant ${NC} \n"; + compile_test "test_resnet50_quant" + ./test_resnet50_quant \ + --int8dir=$DATA_DIR/resnet50_quant/resnet50_quant/resnet50_quant \ + --modeldir=$DATA_DIR/resnet50/resnet50 \ + --datadir=$DATA_DIR/resnet50_quant/resnet50_quant/imagenet-eval-binary/9.data \ + --gtest_output=xml:test_resnet50_quant.xml + if [ $? -ne 0 ]; then + echo "test_resnet50_quant runs failed" >> ${current_dir}/build/test_summary.txt + EXIT_CODE=1 + fi + cp ./*.xml ${log_dir}; fi diff --git a/paddle/fluid/inference/tests/infer_ut/test_resnet50_quant.cc b/paddle/fluid/inference/tests/infer_ut/test_resnet50_quant.cc new file mode 100644 index 0000000000000000000000000000000000000000..bf26f38c083fa22d96ffbe14a815469971211c43 --- /dev/null +++ b/paddle/fluid/inference/tests/infer_ut/test_resnet50_quant.cc @@ -0,0 +1,170 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test_suite.h" // NOLINT + +DEFINE_string(modeldir, "", "Directory of the inference model."); +DEFINE_string(int8dir, "", "Directory of the quant inference model."); +DEFINE_string(datadir, "", "Directory of the infer data."); + +namespace paddle_infer { + +paddle::test::Record PrepareInput(int batch_size) { + // init input data + int channel = 3; + int width = 224; + int height = 224; + paddle::test::Record image_Record; + int input_num = batch_size * channel * width * height; + + // load from binary data + std::ifstream fs(FLAGS_datadir, std::ifstream::binary); + EXPECT_TRUE(fs.is_open()); + CHECK(fs.is_open()); + + float* input = new float[input_num]; + memset(input, 0, input_num * sizeof(float)); + auto input_data_tmp = input; + for (int i = 0; i < input_num; ++i) { + fs.read(reinterpret_cast(input_data_tmp), sizeof(*input_data_tmp)); + input_data_tmp++; + } + int label = 0; + fs.read(reinterpret_cast(&label), sizeof(label)); + fs.close(); + + std::vector input_data{input, input + input_num}; + image_Record.data = input_data; + image_Record.shape = std::vector{batch_size, channel, width, height}; + image_Record.type = paddle::PaddleDType::FLOAT32; + image_Record.label = label; + return image_Record; +} + +TEST(test_resnet50_quant, multi_thread4_trt_int8_bz1) { + int thread_num = 4; + // init input data + std::map input_data_map; + input_data_map["image"] = PrepareInput(1); + // init output data + std::map infer_output_data; + // prepare inference config + paddle_infer::Config config; + config.SetModel(FLAGS_int8dir); + config.EnableUseGpu(1000, 0); + config.EnableTensorRtEngine(1 << 20, 10, 3, + paddle_infer::PrecisionType::kInt8, true, false); + // get infer results from multi threads + std::vector threads; + services::PredictorPool pred_pool(config, thread_num); + for (int i = 0; i < thread_num; ++i) { + threads.emplace_back(paddle::test::SingleThreadPrediction, + pred_pool.Retrive(i), &input_data_map, + &infer_output_data, 5); + } + + // thread join & check outputs + for (int i = 0; i < thread_num; ++i) { + LOG(INFO) << "join tid : " << i; + threads[i].join(); + + // check outputs + std::vector index(1000); + std::iota(index.begin(), index.end(), 0); + auto out_data = + infer_output_data["save_infer_model/scale_0.tmp_0"].data.data(); + std::sort(index.begin(), index.end(), [out_data](size_t i1, size_t i2) { + return out_data[i1] > out_data[i2]; + }); + // compare inference & groundtruth label + ASSERT_EQ(index[0], input_data_map["image"].label); + } + + std::cout << "finish test" << std::endl; +} + +TEST(test_resnet50_quant, multi_thread_multi_instance) { + int thread_num = 4; + // init input data + std::map input_data_fp32, input_data_quant; + input_data_quant["image"] = PrepareInput(1); + input_data_fp32["inputs"] = PrepareInput(1); + + // init output data + std::map infer_output_data; + // prepare inference config + paddle_infer::Config config_fp32, config_quant; + config_fp32.SetModel(FLAGS_modeldir + "/inference.pdmodel", + FLAGS_modeldir + "/inference.pdiparams"); + config_fp32.EnableUseGpu(1000, 0); + config_fp32.EnableTensorRtEngine( + 1 << 20, 10, 3, paddle_infer::PrecisionType::kFloat32, true, false); + + config_quant.SetModel(FLAGS_int8dir); + config_quant.EnableUseGpu(1000, 0); + config_quant.EnableTensorRtEngine( + 1 << 20, 10, 3, paddle_infer::PrecisionType::kInt8, true, false); + + // get infer results from multi threads + std::vector threads; + services::PredictorPool pred_pool_fp32(config_fp32, thread_num); + services::PredictorPool pred_pool_quant(config_quant, thread_num); + for (int i = 0; i < thread_num; ++i) { + if (i % 2 == 0) { + threads.emplace_back(paddle::test::SingleThreadPrediction, + pred_pool_fp32.Retrive(i), &input_data_fp32, + &infer_output_data, 5); + } else { + threads.emplace_back(paddle::test::SingleThreadPrediction, + pred_pool_quant.Retrive(i), &input_data_quant, + &infer_output_data, 5); + } + } + + // thread join & check outputs + for (int i = 0; i < thread_num; ++i) { + LOG(INFO) << "join tid : " << i; + std::vector index(1000); + threads[i].join(); + if (i % 2 == 0) { + // check outputs + std::iota(index.begin(), index.end(), 0); + auto out_data = + infer_output_data["save_infer_model/scale_0.tmp_0"].data.data(); + std::sort(index.begin(), index.end(), [out_data](size_t i1, size_t i2) { + return out_data[i1] > out_data[i2]; + }); + // compare inference & groundtruth label + ASSERT_EQ(index[0], input_data_fp32["inputs"].label); + } else { + // check outputs + std::iota(index.begin(), index.end(), 0); + auto out_data = + infer_output_data["save_infer_model/scale_0.tmp_0"].data.data(); + std::sort(index.begin(), index.end(), [out_data](size_t i1, size_t i2) { + return out_data[i1] > out_data[i2]; + }); + // compare inference & groundtruth label + ASSERT_EQ(index[0], input_data_quant["image"].label); + } + } +} + +} // namespace paddle_infer + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + ::google::ParseCommandLineFlags(&argc, &argv, true); + return RUN_ALL_TESTS(); +} diff --git a/paddle/fluid/inference/tests/infer_ut/test_suite.h b/paddle/fluid/inference/tests/infer_ut/test_suite.h index 2f1034d4df9a65bc506c492ffba594888ccd5c9b..b2546b180b976a16df6b81bd0f909d9c1e7463a7 100644 --- a/paddle/fluid/inference/tests/infer_ut/test_suite.h +++ b/paddle/fluid/inference/tests/infer_ut/test_suite.h @@ -35,6 +35,7 @@ class Record { std::vector data; std::vector shape; paddle::PaddleDType type; + int label; }; std::string read_file(std::string filename) {