// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "test_suite.h" // NOLINT DEFINE_string(modeldir, "", "Directory of the inference model."); DEFINE_string(int8dir, "", "Directory of the quant inference model."); DEFINE_string(datadir, "", "Directory of the infer data."); namespace paddle_infer { paddle::test::Record PrepareInput(int batch_size) { // init input data int channel = 3; int width = 224; int height = 224; paddle::test::Record image_Record; int input_num = batch_size * channel * width * height; // load from binary data std::ifstream fs(FLAGS_datadir, std::ifstream::binary); EXPECT_TRUE(fs.is_open()); CHECK(fs.is_open()); float* input = new float[input_num]; memset(input, 0, input_num * sizeof(float)); auto input_data_tmp = input; for (int i = 0; i < input_num; ++i) { fs.read(reinterpret_cast(input_data_tmp), sizeof(*input_data_tmp)); input_data_tmp++; } int label = 0; fs.read(reinterpret_cast(&label), sizeof(label)); fs.close(); std::vector input_data{input, input + input_num}; image_Record.data = input_data; image_Record.shape = std::vector{batch_size, channel, width, height}; image_Record.type = paddle::PaddleDType::FLOAT32; image_Record.label = label; return image_Record; } TEST(test_resnet50_quant, multi_thread4_trt_int8_bz1) { int thread_num = 4; // init input data std::map input_data_map; input_data_map["image"] = PrepareInput(1); // init output data std::map infer_output_data; // prepare inference config paddle_infer::Config config; config.SetModel(FLAGS_int8dir); config.EnableUseGpu(1000, 0); config.EnableTensorRtEngine(1 << 20, 10, 3, paddle_infer::PrecisionType::kInt8, true, false); // get infer results from multi threads std::vector threads; services::PredictorPool pred_pool(config, thread_num); for (int i = 0; i < thread_num; ++i) { threads.emplace_back(paddle::test::SingleThreadPrediction, pred_pool.Retrive(i), &input_data_map, &infer_output_data, 5); } // thread join & check outputs for (int i = 0; i < thread_num; ++i) { LOG(INFO) << "join tid : " << i; threads[i].join(); // check outputs std::vector index(1000); std::iota(index.begin(), index.end(), 0); auto out_data = infer_output_data["save_infer_model/scale_0.tmp_0"].data.data(); std::sort(index.begin(), index.end(), [out_data](size_t i1, size_t i2) { return out_data[i1] > out_data[i2]; }); // compare inference & groundtruth label ASSERT_EQ(index[0], input_data_map["image"].label); } std::cout << "finish test" << std::endl; } TEST(test_resnet50_quant, multi_thread_multi_instance) { int thread_num = 4; // init input data std::map input_data_fp32, input_data_quant; input_data_quant["image"] = PrepareInput(1); input_data_fp32["inputs"] = PrepareInput(1); // init output data std::map infer_output_data; // prepare inference config paddle_infer::Config config_fp32, config_quant; config_fp32.SetModel(FLAGS_modeldir + "/inference.pdmodel", FLAGS_modeldir + "/inference.pdiparams"); config_fp32.EnableUseGpu(1000, 0); config_fp32.EnableTensorRtEngine( 1 << 20, 10, 3, paddle_infer::PrecisionType::kFloat32, true, false); config_quant.SetModel(FLAGS_int8dir); config_quant.EnableUseGpu(1000, 0); config_quant.EnableTensorRtEngine( 1 << 20, 10, 3, paddle_infer::PrecisionType::kInt8, true, false); // get infer results from multi threads std::vector threads; services::PredictorPool pred_pool_fp32(config_fp32, thread_num); services::PredictorPool pred_pool_quant(config_quant, thread_num); for (int i = 0; i < thread_num; ++i) { if (i % 2 == 0) { threads.emplace_back(paddle::test::SingleThreadPrediction, pred_pool_fp32.Retrive(i), &input_data_fp32, &infer_output_data, 5); } else { threads.emplace_back(paddle::test::SingleThreadPrediction, pred_pool_quant.Retrive(i), &input_data_quant, &infer_output_data, 5); } } // thread join & check outputs for (int i = 0; i < thread_num; ++i) { LOG(INFO) << "join tid : " << i; std::vector index(1000); threads[i].join(); if (i % 2 == 0) { // check outputs std::iota(index.begin(), index.end(), 0); auto out_data = infer_output_data["save_infer_model/scale_0.tmp_0"].data.data(); std::sort(index.begin(), index.end(), [out_data](size_t i1, size_t i2) { return out_data[i1] > out_data[i2]; }); // compare inference & groundtruth label ASSERT_EQ(index[0], input_data_fp32["inputs"].label); } else { // check outputs std::iota(index.begin(), index.end(), 0); auto out_data = infer_output_data["save_infer_model/scale_0.tmp_0"].data.data(); std::sort(index.begin(), index.end(), [out_data](size_t i1, size_t i2) { return out_data[i1] > out_data[i2]; }); // compare inference & groundtruth label ASSERT_EQ(index[0], input_data_quant["image"].label); } } } } // namespace paddle_infer int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); ::google::ParseCommandLineFlags(&argc, &argv, true); return RUN_ALL_TESTS(); }