未验证 提交 97cae5e8 编写于 作者: P Peihan 提交者: GitHub

add resnet50_quant model in PR-CI-INFERENCE (#35012)

* add slim resnet50 quant model in pr-ci-inference

* enable resnet50_quant multi_thread4_trt_int8_bz1

* remove LOG(FATAL)
上级 a2e08657
...@@ -95,6 +95,12 @@ for model_name in $det_download_list; do ...@@ -95,6 +95,12 @@ for model_name in $det_download_list; do
download $url_prefix $model_name download $url_prefix $model_name
done done
unknown_download_list='resnet50_quant'
for model_name in $unknown_download_list; do
url_prefix="https://paddle-qa.bj.bcebos.com/inference_model/unknown"
download $url_prefix $model_name
done
function compile_test() { function compile_test() {
mkdir -p ${build_dir} mkdir -p ${build_dir}
cd ${build_dir} cd ${build_dir}
...@@ -194,6 +200,18 @@ if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then ...@@ -194,6 +200,18 @@ if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
EXIT_CODE=1 EXIT_CODE=1
fi fi
printf "${YELLOW} start test_resnet50_quant ${NC} \n";
compile_test "test_resnet50_quant"
./test_resnet50_quant \
--int8dir=$DATA_DIR/resnet50_quant/resnet50_quant/resnet50_quant \
--modeldir=$DATA_DIR/resnet50/resnet50 \
--datadir=$DATA_DIR/resnet50_quant/resnet50_quant/imagenet-eval-binary/9.data \
--gtest_output=xml:test_resnet50_quant.xml
if [ $? -ne 0 ]; then
echo "test_resnet50_quant runs failed" >> ${current_dir}/build/test_summary.txt
EXIT_CODE=1
fi
cp ./*.xml ${log_dir}; cp ./*.xml ${log_dir};
fi fi
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "test_suite.h" // NOLINT
DEFINE_string(modeldir, "", "Directory of the inference model.");
DEFINE_string(int8dir, "", "Directory of the quant inference model.");
DEFINE_string(datadir, "", "Directory of the infer data.");
namespace paddle_infer {
paddle::test::Record PrepareInput(int batch_size) {
// init input data
int channel = 3;
int width = 224;
int height = 224;
paddle::test::Record image_Record;
int input_num = batch_size * channel * width * height;
// load from binary data
std::ifstream fs(FLAGS_datadir, std::ifstream::binary);
EXPECT_TRUE(fs.is_open());
CHECK(fs.is_open());
float* input = new float[input_num];
memset(input, 0, input_num * sizeof(float));
auto input_data_tmp = input;
for (int i = 0; i < input_num; ++i) {
fs.read(reinterpret_cast<char*>(input_data_tmp), sizeof(*input_data_tmp));
input_data_tmp++;
}
int label = 0;
fs.read(reinterpret_cast<char*>(&label), sizeof(label));
fs.close();
std::vector<float> input_data{input, input + input_num};
image_Record.data = input_data;
image_Record.shape = std::vector<int>{batch_size, channel, width, height};
image_Record.type = paddle::PaddleDType::FLOAT32;
image_Record.label = label;
return image_Record;
}
TEST(test_resnet50_quant, multi_thread4_trt_int8_bz1) {
int thread_num = 4;
// init input data
std::map<std::string, paddle::test::Record> input_data_map;
input_data_map["image"] = PrepareInput(1);
// init output data
std::map<std::string, paddle::test::Record> infer_output_data;
// prepare inference config
paddle_infer::Config config;
config.SetModel(FLAGS_int8dir);
config.EnableUseGpu(1000, 0);
config.EnableTensorRtEngine(1 << 20, 10, 3,
paddle_infer::PrecisionType::kInt8, true, false);
// get infer results from multi threads
std::vector<std::thread> threads;
services::PredictorPool pred_pool(config, thread_num);
for (int i = 0; i < thread_num; ++i) {
threads.emplace_back(paddle::test::SingleThreadPrediction,
pred_pool.Retrive(i), &input_data_map,
&infer_output_data, 5);
}
// thread join & check outputs
for (int i = 0; i < thread_num; ++i) {
LOG(INFO) << "join tid : " << i;
threads[i].join();
// check outputs
std::vector<int> index(1000);
std::iota(index.begin(), index.end(), 0);
auto out_data =
infer_output_data["save_infer_model/scale_0.tmp_0"].data.data();
std::sort(index.begin(), index.end(), [out_data](size_t i1, size_t i2) {
return out_data[i1] > out_data[i2];
});
// compare inference & groundtruth label
ASSERT_EQ(index[0], input_data_map["image"].label);
}
std::cout << "finish test" << std::endl;
}
TEST(test_resnet50_quant, multi_thread_multi_instance) {
int thread_num = 4;
// init input data
std::map<std::string, paddle::test::Record> input_data_fp32, input_data_quant;
input_data_quant["image"] = PrepareInput(1);
input_data_fp32["inputs"] = PrepareInput(1);
// init output data
std::map<std::string, paddle::test::Record> infer_output_data;
// prepare inference config
paddle_infer::Config config_fp32, config_quant;
config_fp32.SetModel(FLAGS_modeldir + "/inference.pdmodel",
FLAGS_modeldir + "/inference.pdiparams");
config_fp32.EnableUseGpu(1000, 0);
config_fp32.EnableTensorRtEngine(
1 << 20, 10, 3, paddle_infer::PrecisionType::kFloat32, true, false);
config_quant.SetModel(FLAGS_int8dir);
config_quant.EnableUseGpu(1000, 0);
config_quant.EnableTensorRtEngine(
1 << 20, 10, 3, paddle_infer::PrecisionType::kInt8, true, false);
// get infer results from multi threads
std::vector<std::thread> threads;
services::PredictorPool pred_pool_fp32(config_fp32, thread_num);
services::PredictorPool pred_pool_quant(config_quant, thread_num);
for (int i = 0; i < thread_num; ++i) {
if (i % 2 == 0) {
threads.emplace_back(paddle::test::SingleThreadPrediction,
pred_pool_fp32.Retrive(i), &input_data_fp32,
&infer_output_data, 5);
} else {
threads.emplace_back(paddle::test::SingleThreadPrediction,
pred_pool_quant.Retrive(i), &input_data_quant,
&infer_output_data, 5);
}
}
// thread join & check outputs
for (int i = 0; i < thread_num; ++i) {
LOG(INFO) << "join tid : " << i;
std::vector<int> index(1000);
threads[i].join();
if (i % 2 == 0) {
// check outputs
std::iota(index.begin(), index.end(), 0);
auto out_data =
infer_output_data["save_infer_model/scale_0.tmp_0"].data.data();
std::sort(index.begin(), index.end(), [out_data](size_t i1, size_t i2) {
return out_data[i1] > out_data[i2];
});
// compare inference & groundtruth label
ASSERT_EQ(index[0], input_data_fp32["inputs"].label);
} else {
// check outputs
std::iota(index.begin(), index.end(), 0);
auto out_data =
infer_output_data["save_infer_model/scale_0.tmp_0"].data.data();
std::sort(index.begin(), index.end(), [out_data](size_t i1, size_t i2) {
return out_data[i1] > out_data[i2];
});
// compare inference & groundtruth label
ASSERT_EQ(index[0], input_data_quant["image"].label);
}
}
}
} // namespace paddle_infer
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
::google::ParseCommandLineFlags(&argc, &argv, true);
return RUN_ALL_TESTS();
}
...@@ -35,6 +35,7 @@ class Record { ...@@ -35,6 +35,7 @@ class Record {
std::vector<float> data; std::vector<float> data;
std::vector<int32_t> shape; std::vector<int32_t> shape;
paddle::PaddleDType type; paddle::PaddleDType type;
int label;
}; };
std::string read_file(std::string filename) { std::string read_file(std::string filename) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册