未验证 提交 0b6447a4 编写于 作者: T tensor-tang 提交者: GitHub

Merge pull request #15310 from luotao1/ZeroCopy_omp

fix multi-threads in ZeroCopyProfile
...@@ -370,15 +370,12 @@ TEST(Analyzer_rnn1, ZeroCopyMultiThread) { ...@@ -370,15 +370,12 @@ TEST(Analyzer_rnn1, ZeroCopyMultiThread) {
auto base_predictor = CreatePaddlePredictor<AnalysisConfig>(config); auto base_predictor = CreatePaddlePredictor<AnalysisConfig>(config);
double total_time_of_threads{0}; double total_time_of_threads{0};
std::vector<std::thread> threads; std::vector<std::thread> threads;
std::vector<std::unique_ptr<PaddlePredictor>> predictors;
for (int tid = 0; tid < FLAGS_num_threads; tid++) {
predictors.emplace_back(CreatePaddlePredictor<AnalysisConfig>(config));
}
for (int tid = 0; tid < FLAGS_num_threads; tid++) { for (int tid = 0; tid < FLAGS_num_threads; tid++) {
threads.emplace_back([config, &total_time_of_threads, &predictors, tid] { threads.emplace_back([&, tid] {
// auto predictor = base_predictor->Clone(); // To ensure the thread binding correctly,
auto &predictor = predictors[tid]; // please clone inside the threadpool.
auto predictor = base_predictor->Clone();
NEW_TENSOR(data_lod_attention); NEW_TENSOR(data_lod_attention);
NEW_TENSOR(cell_init); NEW_TENSOR(cell_init);
NEW_TENSOR(data); NEW_TENSOR(data);
......
...@@ -263,15 +263,12 @@ TEST(Analyzer_seq_pool1, zerocopy_profile_threads) { ...@@ -263,15 +263,12 @@ TEST(Analyzer_seq_pool1, zerocopy_profile_threads) {
auto base_predictor = CreatePaddlePredictor<AnalysisConfig>(config); auto base_predictor = CreatePaddlePredictor<AnalysisConfig>(config);
double total_time_of_threads{0}; double total_time_of_threads{0};
std::vector<std::thread> threads; std::vector<std::thread> threads;
std::vector<std::unique_ptr<PaddlePredictor>> predictors;
for (int tid = 0; tid < FLAGS_num_threads; tid++) {
predictors.emplace_back(base_predictor->Clone());
// predictors.emplace_back(CreatePaddlePredictor<AnalysisConfig>(config));
}
for (int tid = 0; tid < FLAGS_num_threads; tid++) { for (int tid = 0; tid < FLAGS_num_threads; tid++) {
threads.emplace_back([config, &total_time_of_threads, &predictors, tid] { threads.emplace_back([&, tid] {
auto &predictor = predictors[tid]; // To ensure the thread binding correctly,
// please clone inside the threadpool.
auto predictor = base_predictor->Clone();
std::vector<std::unique_ptr<ZeroCopyTensor>> inputs; std::vector<std::unique_ptr<ZeroCopyTensor>> inputs;
PrepareZeroCopyInputs(predictor, &inputs); PrepareZeroCopyInputs(predictor, &inputs);
auto output_tensor = predictor->GetOutputTensor(out_var_name); auto output_tensor = predictor->GetOutputTensor(out_var_name);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册