未验证 提交 9d4f00bc 编写于 作者: P Peihan 提交者: GitHub

add mkl multi-thread test cases in PR-CI-INFERENCE (#34946)

* add mkl multi-thread test cases

* fix codestyle

* fix codestyle & enable ernie mkl test
上级 10f9644c
......@@ -77,6 +77,12 @@ for model_name in $clas_download_list; do
download $url_prefix $model_name
done
nlp_download_list='ernie_text_cls'
for model_name in $nlp_download_list; do
url_prefix="https://paddle-qa.bj.bcebos.com/inference_model/2.1.1/nlp"
download $url_prefix $model_name
done
# compile and run test
cd $current_dir
mkdir -p build
......@@ -144,6 +150,26 @@ if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
fi
fi
# ---------gpu ernie_text_cls on linux---------
if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
cmake .. -DPADDLE_LIB=${inference_install_dir} \
-DWITH_MKL=$TURN_ON_MKL \
-DDEMO_NAME=test_ernie_text_cls \
-DWITH_GPU=$TEST_GPU_CPU \
-DWITH_STATIC_LIB=OFF \
-DUSE_TENSORRT=$USE_TENSORRT \
-DTENSORRT_ROOT=$TENSORRT_ROOT_DIR \
-DWITH_GTEST=ON
make -j$(nproc)
./test_ernie_text_cls \
--modeldir=$DATA_DIR/ernie_text_cls/ernie_text_cls \
--gtest_output=xml:test_ernie_text_cls.xml
if [ $? -ne 0 ]; then
echo "test_ernie_text_cls runs failed" >> ${current_dir}/build/test_summary.txt
EXIT_CODE=1
fi
fi
if [[ -f ${current_dir}/build/test_summary.txt ]];then
echo "=====================test summary======================"
cat ${current_dir}/build/test_summary.txt
......
......@@ -123,7 +123,7 @@ TEST(test_det_mv3_db, multi_thread2_trt_fp32_dynamic_shape_bz2) {
FLAGS_modeldir + "/inference.pdiparams");
config.EnableUseGpu(100, 0);
config.EnableTensorRtEngine(
1 << 20, 2, 3, paddle_infer::PrecisionType::kFloat32, false, false);
1 << 20, 2, 3, paddle_infer::PrecisionType::kFloat32, true, false);
PrepareDynamicShape(&config, 4);
// get groudtruth by disbale ir
paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1);
......@@ -149,6 +149,50 @@ TEST(test_det_mv3_db, multi_thread2_trt_fp32_dynamic_shape_bz2) {
std::cout << "finish multi-thread test" << std::endl;
}
TEST(test_det_mv3_db, multi_thread2_mkl_fp32_bz2) {
int thread_num = 2; // thread > 2 may OOM
// init input data
std::map<std::string, paddle::test::Record> my_input_data_map;
my_input_data_map["x"] = PrepareInput(2, 640);
// init output data
std::map<std::string, paddle::test::Record> infer_output_data,
truth_output_data;
// prepare groudtruth config
paddle_infer::Config config, config_no_ir;
config_no_ir.SetModel(FLAGS_modeldir + "/inference.pdmodel",
FLAGS_modeldir + "/inference.pdiparams");
config_no_ir.SwitchIrOptim(false);
// prepare inference config
config.SetModel(FLAGS_modeldir + "/inference.pdmodel",
FLAGS_modeldir + "/inference.pdiparams");
config.DisableGpu();
config.EnableMKLDNN();
config.SetMkldnnCacheCapacity(10);
config.SetCpuMathLibraryNumThreads(10);
// get groudtruth by disbale ir
paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1);
SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &my_input_data_map,
&truth_output_data, 1);
// get infer results from multi threads
std::vector<std::thread> threads;
services::PredictorPool pred_pool(config, thread_num);
for (int i = 0; i < thread_num; ++i) {
threads.emplace_back(paddle::test::SingleThreadPrediction,
pred_pool.Retrive(i), &my_input_data_map,
&infer_output_data, 2);
}
// thread join & check outputs
for (int i = 0; i < thread_num; ++i) {
LOG(INFO) << "join tid : " << i;
threads[i].join();
CompareRecord(&truth_output_data, &infer_output_data, 1e-4);
}
std::cout << "finish multi-thread test" << std::endl;
}
} // namespace paddle_infer
int main(int argc, char** argv) {
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "test_suite.h" // NOLINT
DEFINE_string(modeldir, "", "Directory of the inference model.");
namespace paddle_infer {
template <typename T>
T cRandom(int min, int max) {
unsigned int seed = 100;
return (min +
static_cast<T>(max * rand_r(&seed) / static_cast<T>(RAND_MAX + 1)));
}
std::map<std::string, paddle::test::Record> PrepareInput(int batch_size) {
// init input data
int digit_length = 115;
paddle::test::Record input_ids, segment_ids;
int input_num = batch_size * digit_length;
std::vector<int64_t> input_data(input_num, 1);
std::vector<int64_t> segment_data(input_num, 0);
srand((unsigned)time(NULL));
for (int x = 0; x < input_data.size(); x++) {
input_data[x] = cRandom<int>(1, 100);
}
input_ids.data = std::vector<float>(input_data.begin(), input_data.end());
input_ids.shape = std::vector<int>{batch_size, digit_length};
input_ids.type = paddle::PaddleDType::INT64;
segment_ids.data =
std::vector<float>(segment_data.begin(), segment_data.end());
segment_ids.shape = std::vector<int>{batch_size, digit_length};
segment_ids.type = paddle::PaddleDType::INT64;
std::map<std::string, paddle::test::Record> my_input_data_map;
my_input_data_map.insert({"input_ids", input_ids});
my_input_data_map.insert({"token_type_ids", segment_ids});
return my_input_data_map;
}
TEST(test_ernie_text_cls, analysis_gpu_bz2_buffer) {
// init input data
auto my_input_data_map = PrepareInput(2);
// init output data
std::map<std::string, paddle::test::Record> infer_output_data,
truth_output_data;
// prepare groudtruth config
paddle_infer::Config config, config_no_ir;
config_no_ir.SetModel(FLAGS_modeldir + "/inference.pdmodel",
FLAGS_modeldir + "/inference.pdiparams");
config_no_ir.SwitchIrOptim(false);
// prepare inference config from buffer
std::string prog_file = FLAGS_modeldir + "/inference.pdmodel";
std::string params_file = FLAGS_modeldir + "/inference.pdiparams";
std::string prog_str = paddle::test::read_file(prog_file);
std::string params_str = paddle::test::read_file(params_file);
config.SetModelBuffer(prog_str.c_str(), prog_str.size(), params_str.c_str(),
params_str.size());
// get groudtruth by disbale ir
paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1);
SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &my_input_data_map,
&truth_output_data, 1);
// get infer results
paddle_infer::services::PredictorPool pred_pool(config, 1);
SingleThreadPrediction(pred_pool.Retrive(0), &my_input_data_map,
&infer_output_data);
// check outputs
CompareRecord(&truth_output_data, &infer_output_data);
std::cout << "finish test" << std::endl;
}
TEST(test_ernie_text_cls, multi_thread4_mkl_fp32_bz2) {
int thread_num = 4;
// init input data
auto my_input_data_map = PrepareInput(2);
// init output data
std::map<std::string, paddle::test::Record> infer_output_data,
truth_output_data;
// prepare groudtruth config
paddle_infer::Config config, config_no_ir;
config_no_ir.SetModel(FLAGS_modeldir + "/inference.pdmodel",
FLAGS_modeldir + "/inference.pdiparams");
config.DisableGpu();
config_no_ir.SwitchIrOptim(false);
// prepare inference config
config.SetModel(FLAGS_modeldir + "/inference.pdmodel",
FLAGS_modeldir + "/inference.pdiparams");
config.DisableGpu();
config.EnableMKLDNN();
config.SetMkldnnCacheCapacity(10);
config.SetCpuMathLibraryNumThreads(10);
// get groudtruth by disbale ir
paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1);
SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &my_input_data_map,
&truth_output_data, 1);
// get infer results from multi threads
std::vector<std::thread> threads;
services::PredictorPool pred_pool(config, thread_num);
for (int i = 0; i < thread_num; ++i) {
threads.emplace_back(paddle::test::SingleThreadPrediction,
pred_pool.Retrive(i), &my_input_data_map,
&infer_output_data, 2);
}
// thread join & check outputs
for (int i = 0; i < thread_num; ++i) {
LOG(INFO) << "join tid : " << i;
threads[i].join();
CompareRecord(&truth_output_data, &infer_output_data);
}
std::cout << "finish multi-thread test" << std::endl;
}
} // namespace paddle_infer
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
::google::ParseCommandLineFlags(&argc, &argv, true);
return RUN_ALL_TESTS();
}
......@@ -37,6 +37,12 @@ class Record {
paddle::PaddleDType type;
};
std::string read_file(std::string filename) {
std::ifstream file(filename);
return std::string((std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
}
void SingleThreadPrediction(paddle_infer::Predictor *predictor,
std::map<std::string, Record> *input_data_map,
std::map<std::string, Record> *output_data_map,
......@@ -44,14 +50,37 @@ void SingleThreadPrediction(paddle_infer::Predictor *predictor,
// prepare input tensor
auto input_names = predictor->GetInputNames();
for (const auto & [ key, value ] : *input_data_map) {
auto input_tensor = predictor->GetInputHandle(key);
input_tensor->Reshape(value.shape);
input_tensor->CopyFromCpu(value.data.data());
switch (value.type) {
case paddle::PaddleDType::INT64: {
std::vector<int64_t> input_value =
std::vector<int64_t>(value.data.begin(), value.data.end());
auto input_tensor = predictor->GetInputHandle(key);
input_tensor->Reshape(value.shape);
input_tensor->CopyFromCpu(input_value.data());
break;
}
case paddle::PaddleDType::INT32: {
std::vector<int32_t> input_value =
std::vector<int32_t>(value.data.begin(), value.data.end());
auto input_tensor = predictor->GetInputHandle(key);
input_tensor->Reshape(value.shape);
input_tensor->CopyFromCpu(input_value.data());
break;
}
case paddle::PaddleDType::FLOAT32: {
std::vector<float> input_value =
std::vector<float>(value.data.begin(), value.data.end());
auto input_tensor = predictor->GetInputHandle(key);
input_tensor->Reshape(value.shape);
input_tensor->CopyFromCpu(input_value.data());
break;
}
}
}
// inference
for (size_t i = 0; i < repeat_times; ++i) {
CHECK(predictor->Run());
ASSERT_TRUE(predictor->Run());
}
// get output data to Record
......@@ -112,8 +141,8 @@ void CompareRecord(std::map<std::string, Record> *truth_output_data,
size_t numel = value.data.size() / sizeof(float);
EXPECT_EQ(value.data.size(), truth_record.data.size());
for (size_t i = 0; i < numel; ++i) {
CHECK_LT(fabs(value.data.data()[i] - truth_record.data.data()[i]),
epislon);
ASSERT_LT(fabs(value.data.data()[i] - truth_record.data.data()[i]),
epislon);
}
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册