提交 781cd0cf 编写于 作者: T tensor-tang 提交者: Yan Chunwei

add multi threads test of seqpool test (#15293)

上级 6eada9e0
......@@ -214,6 +214,9 @@ void PrepareZeroCopyInputs(
}
}
// diff: similarity_norm.tmp_0, // speed: fc_4.tmp_1
static const char out_var_name[] = "reduce_sum_0.tmp_0";
// return the output values
std::vector<float> zerocopy_profile(int repeat_times) {
AnalysisConfig config;
......@@ -222,7 +225,7 @@ std::vector<float> zerocopy_profile(int repeat_times) {
auto predictor = CreatePaddlePredictor<AnalysisConfig>(config);
std::vector<std::unique_ptr<ZeroCopyTensor>> inputs;
PrepareZeroCopyInputs(predictor, &inputs);
auto output_tensor = predictor->GetOutputTensor("reduce_sum_0.tmp_0");
auto output_tensor = predictor->GetOutputTensor(out_var_name);
Timer timer;
LOG(INFO) << "Warm up run...";
timer.tic();
......@@ -239,7 +242,7 @@ std::vector<float> zerocopy_profile(int repeat_times) {
PrintTime(FLAGS_batch_size, repeat_times, 1, 0, timer.toc() / repeat_times,
1);
VLOG(3) << "ZeroCopy output: " << DescribeZeroCopyTensor(*output_tensor);
LOG(INFO) << "ZeroCopy output: " << DescribeZeroCopyTensor(*output_tensor);
PaddlePlace place;
int output_size{0};
auto *pdata = output_tensor->data<float>(&place, &output_size);
......@@ -252,6 +255,58 @@ std::vector<float> zerocopy_profile(int repeat_times) {
TEST(Analyzer_seq_pool1, zerocopy_profile) { zerocopy_profile(FLAGS_repeat); }
TEST(Analyzer_seq_pool1, zerocopy_profile_threads) {
AnalysisConfig config;
SetConfig(&config);
config.SwitchUseFeedFetchOps(false);
auto base_predictor = CreatePaddlePredictor<AnalysisConfig>(config);
double total_time_of_threads{0};
std::vector<std::thread> threads;
std::vector<std::unique_ptr<PaddlePredictor>> predictors;
for (int tid = 0; tid < FLAGS_num_threads; tid++) {
predictors.emplace_back(base_predictor->Clone());
// predictors.emplace_back(CreatePaddlePredictor<AnalysisConfig>(config));
}
for (int tid = 0; tid < FLAGS_num_threads; tid++) {
threads.emplace_back([config, &total_time_of_threads, &predictors, tid] {
auto &predictor = predictors[tid];
std::vector<std::unique_ptr<ZeroCopyTensor>> inputs;
PrepareZeroCopyInputs(predictor, &inputs);
auto output_tensor = predictor->GetOutputTensor(out_var_name);
Timer timer;
double total_time{0};
LOG(INFO) << "Warm up run...";
timer.tic();
predictor->ZeroCopyRun();
PrintTime(FLAGS_batch_size, 1, FLAGS_num_threads, tid, timer.toc(), 1);
if (FLAGS_profile) {
paddle::platform::ResetProfiler();
}
int repeat_times = FLAGS_repeat;
LOG(INFO) << "Run " << repeat_times << " times...";
timer.tic();
for (int i = 0; i < repeat_times; i++) {
predictor->ZeroCopyRun();
}
total_time += timer.toc();
total_time_of_threads += total_time;
LOG(INFO) << "thread time: " << total_time / repeat_times;
});
}
for (auto &t : threads) {
t.join();
}
LOG(INFO) << "average time: "
<< total_time_of_threads / FLAGS_num_threads / FLAGS_repeat;
}
TEST(Analyzer_seq_pool1, zerocopy_fuse_statis) { analysis_fuse_statis(true); }
TEST(Analyzer_seq_pool1, zerocopy_compare_native) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册