未验证 提交 14b43376 编写于 作者: T Tao Luo 提交者: GitHub

Merge pull request #16062 from luotao1/num_threads

refine SetCpuMathLibraryNumThreads
......@@ -183,6 +183,9 @@ void AnalysisPredictor::SetMkldnnThreadID(int tid) {
bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data,
int batch_size) {
if (UNLIKELY(config_.cpu_math_library_num_threads() > 1)) {
paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
}
VLOG(3) << "Predictor::predict";
inference::Timer timer;
timer.tic();
......
......@@ -131,6 +131,9 @@ NativePaddlePredictor::~NativePaddlePredictor() {
bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data,
int batch_size) {
if (UNLIKELY(config_.cpu_math_library_num_threads() > 1)) {
paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
}
VLOG(3) << "Predictor::predict";
Timer timer;
timer.tic();
......
......@@ -366,15 +366,17 @@ TEST(Analyzer_rnn1, ZeroCopyMultiThread) {
#define NEW_TENSOR(name__) \
auto name__##_tensor = predictor->GetInputTensor(#name__);
auto base_predictor = CreatePaddlePredictor<AnalysisConfig>(config);
std::vector<std::unique_ptr<PaddlePredictor>> predictors;
predictors.emplace_back(CreatePaddlePredictor<AnalysisConfig>(config));
for (int tid = 1; tid < FLAGS_num_threads; tid++) {
predictors.emplace_back(predictors.front()->Clone());
}
double total_time_of_threads{0};
std::vector<std::thread> threads;
for (int tid = 0; tid < FLAGS_num_threads; tid++) {
threads.emplace_back([&, tid] {
// To ensure the thread binding correctly,
// please clone inside the threadpool.
auto predictor = base_predictor->Clone();
auto &predictor = predictors[tid];
NEW_TENSOR(data_lod_attention);
NEW_TENSOR(cell_init);
NEW_TENSOR(data);
......
......@@ -266,15 +266,17 @@ TEST(Analyzer_seq_pool1, zerocopy_profile_threads) {
SetConfig(&config);
config.SwitchUseFeedFetchOps(false);
auto base_predictor = CreatePaddlePredictor<AnalysisConfig>(config);
std::vector<std::unique_ptr<PaddlePredictor>> predictors;
predictors.emplace_back(CreatePaddlePredictor<AnalysisConfig>(config));
for (int tid = 1; tid < FLAGS_num_threads; tid++) {
predictors.emplace_back(predictors.front()->Clone());
}
double total_time_of_threads{0};
std::vector<std::thread> threads;
for (int tid = 0; tid < FLAGS_num_threads; tid++) {
threads.emplace_back([&, tid] {
// To ensure the thread binding correctly,
// please clone inside the threadpool.
auto predictor = base_predictor->Clone();
auto &predictor = predictors[tid];
std::vector<std::unique_ptr<ZeroCopyTensor>> inputs;
PrepareZeroCopyInputs(predictor, &inputs);
auto output_tensor = predictor->GetOutputTensor(out_var_name);
......
......@@ -17,8 +17,10 @@
#include <gtest/gtest.h>
#include <algorithm>
#include <memory>
#include <string>
#include <thread> // NOLINT
#include <unordered_map>
#include <vector>
#ifdef WITH_GPERFTOOLS
#include <gperftools/profiler.h>
......@@ -252,7 +254,11 @@ void TestMultiThreadPrediction(
int batch_size = FLAGS_batch_size;
int num_times = FLAGS_repeat;
std::vector<std::thread> threads;
auto main_predictor = CreateTestPredictor(config, use_analysis);
std::vector<std::unique_ptr<PaddlePredictor>> predictors;
predictors.emplace_back(CreateTestPredictor(config, use_analysis));
for (int tid = 1; tid < num_threads; tid++) {
predictors.emplace_back(predictors.front()->Clone());
}
size_t total_time{0};
for (int tid = 0; tid < num_threads; ++tid) {
......@@ -260,9 +266,7 @@ void TestMultiThreadPrediction(
// Each thread should have local inputs and outputs.
// The inputs of each thread are all the same.
std::vector<PaddleTensor> outputs_tid;
// To ensure the thread binding correctly,
// please clone inside the threadpool.
auto predictor = main_predictor->Clone();
auto &predictor = predictors[tid];
#ifdef PADDLE_WITH_MKLDNN
if (use_analysis) {
static_cast<AnalysisPredictor *>(predictor.get())
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册