Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
14b43376
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
14b43376
编写于
3月 06, 2019
作者:
T
Tao Luo
提交者:
GitHub
3月 06, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #16062 from luotao1/num_threads
refine SetCpuMathLibraryNumThreads
上级
7fbf52da
06aab1b4
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
26 addition
and
12 deletion
+26
-12
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+3
-0
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+3
-0
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+6
-4
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
+6
-4
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+8
-4
未找到文件。
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
14b43376
...
@@ -183,6 +183,9 @@ void AnalysisPredictor::SetMkldnnThreadID(int tid) {
...
@@ -183,6 +183,9 @@ void AnalysisPredictor::SetMkldnnThreadID(int tid) {
bool
AnalysisPredictor
::
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
bool
AnalysisPredictor
::
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
,
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
)
{
int
batch_size
)
{
if
(
UNLIKELY
(
config_
.
cpu_math_library_num_threads
()
>
1
))
{
paddle
::
platform
::
SetNumThreads
(
config_
.
cpu_math_library_num_threads
());
}
VLOG
(
3
)
<<
"Predictor::predict"
;
VLOG
(
3
)
<<
"Predictor::predict"
;
inference
::
Timer
timer
;
inference
::
Timer
timer
;
timer
.
tic
();
timer
.
tic
();
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
14b43376
...
@@ -131,6 +131,9 @@ NativePaddlePredictor::~NativePaddlePredictor() {
...
@@ -131,6 +131,9 @@ NativePaddlePredictor::~NativePaddlePredictor() {
bool
NativePaddlePredictor
::
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
bool
NativePaddlePredictor
::
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
,
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
)
{
int
batch_size
)
{
if
(
UNLIKELY
(
config_
.
cpu_math_library_num_threads
()
>
1
))
{
paddle
::
platform
::
SetNumThreads
(
config_
.
cpu_math_library_num_threads
());
}
VLOG
(
3
)
<<
"Predictor::predict"
;
VLOG
(
3
)
<<
"Predictor::predict"
;
Timer
timer
;
Timer
timer
;
timer
.
tic
();
timer
.
tic
();
...
...
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
浏览文件 @
14b43376
...
@@ -366,15 +366,17 @@ TEST(Analyzer_rnn1, ZeroCopyMultiThread) {
...
@@ -366,15 +366,17 @@ TEST(Analyzer_rnn1, ZeroCopyMultiThread) {
#define NEW_TENSOR(name__) \
#define NEW_TENSOR(name__) \
auto name__##_tensor = predictor->GetInputTensor(#name__);
auto name__##_tensor = predictor->GetInputTensor(#name__);
auto
base_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
predictors
.
emplace_back
(
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
));
for
(
int
tid
=
1
;
tid
<
FLAGS_num_threads
;
tid
++
)
{
predictors
.
emplace_back
(
predictors
.
front
()
->
Clone
());
}
double
total_time_of_threads
{
0
};
double
total_time_of_threads
{
0
};
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
thread
>
threads
;
for
(
int
tid
=
0
;
tid
<
FLAGS_num_threads
;
tid
++
)
{
for
(
int
tid
=
0
;
tid
<
FLAGS_num_threads
;
tid
++
)
{
threads
.
emplace_back
([
&
,
tid
]
{
threads
.
emplace_back
([
&
,
tid
]
{
// To ensure the thread binding correctly,
auto
&
predictor
=
predictors
[
tid
];
// please clone inside the threadpool.
auto
predictor
=
base_predictor
->
Clone
();
NEW_TENSOR
(
data_lod_attention
);
NEW_TENSOR
(
data_lod_attention
);
NEW_TENSOR
(
cell_init
);
NEW_TENSOR
(
cell_init
);
NEW_TENSOR
(
data
);
NEW_TENSOR
(
data
);
...
...
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
浏览文件 @
14b43376
...
@@ -266,15 +266,17 @@ TEST(Analyzer_seq_pool1, zerocopy_profile_threads) {
...
@@ -266,15 +266,17 @@ TEST(Analyzer_seq_pool1, zerocopy_profile_threads) {
SetConfig
(
&
config
);
SetConfig
(
&
config
);
config
.
SwitchUseFeedFetchOps
(
false
);
config
.
SwitchUseFeedFetchOps
(
false
);
auto
base_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
predictors
.
emplace_back
(
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
));
for
(
int
tid
=
1
;
tid
<
FLAGS_num_threads
;
tid
++
)
{
predictors
.
emplace_back
(
predictors
.
front
()
->
Clone
());
}
double
total_time_of_threads
{
0
};
double
total_time_of_threads
{
0
};
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
thread
>
threads
;
for
(
int
tid
=
0
;
tid
<
FLAGS_num_threads
;
tid
++
)
{
for
(
int
tid
=
0
;
tid
<
FLAGS_num_threads
;
tid
++
)
{
threads
.
emplace_back
([
&
,
tid
]
{
threads
.
emplace_back
([
&
,
tid
]
{
// To ensure the thread binding correctly,
auto
&
predictor
=
predictors
[
tid
];
// please clone inside the threadpool.
auto
predictor
=
base_predictor
->
Clone
();
std
::
vector
<
std
::
unique_ptr
<
ZeroCopyTensor
>>
inputs
;
std
::
vector
<
std
::
unique_ptr
<
ZeroCopyTensor
>>
inputs
;
PrepareZeroCopyInputs
(
predictor
,
&
inputs
);
PrepareZeroCopyInputs
(
predictor
,
&
inputs
);
auto
output_tensor
=
predictor
->
GetOutputTensor
(
out_var_name
);
auto
output_tensor
=
predictor
->
GetOutputTensor
(
out_var_name
);
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
14b43376
...
@@ -17,8 +17,10 @@
...
@@ -17,8 +17,10 @@
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <algorithm>
#include <algorithm>
#include <memory>
#include <string>
#include <string>
#include <thread> // NOLINT
#include <thread> // NOLINT
#include <unordered_map>
#include <vector>
#include <vector>
#ifdef WITH_GPERFTOOLS
#ifdef WITH_GPERFTOOLS
#include <gperftools/profiler.h>
#include <gperftools/profiler.h>
...
@@ -252,7 +254,11 @@ void TestMultiThreadPrediction(
...
@@ -252,7 +254,11 @@ void TestMultiThreadPrediction(
int
batch_size
=
FLAGS_batch_size
;
int
batch_size
=
FLAGS_batch_size
;
int
num_times
=
FLAGS_repeat
;
int
num_times
=
FLAGS_repeat
;
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
thread
>
threads
;
auto
main_predictor
=
CreateTestPredictor
(
config
,
use_analysis
);
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
predictors
.
emplace_back
(
CreateTestPredictor
(
config
,
use_analysis
));
for
(
int
tid
=
1
;
tid
<
num_threads
;
tid
++
)
{
predictors
.
emplace_back
(
predictors
.
front
()
->
Clone
());
}
size_t
total_time
{
0
};
size_t
total_time
{
0
};
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
...
@@ -260,9 +266,7 @@ void TestMultiThreadPrediction(
...
@@ -260,9 +266,7 @@ void TestMultiThreadPrediction(
// Each thread should have local inputs and outputs.
// Each thread should have local inputs and outputs.
// The inputs of each thread are all the same.
// The inputs of each thread are all the same.
std
::
vector
<
PaddleTensor
>
outputs_tid
;
std
::
vector
<
PaddleTensor
>
outputs_tid
;
// To ensure the thread binding correctly,
auto
&
predictor
=
predictors
[
tid
];
// please clone inside the threadpool.
auto
predictor
=
main_predictor
->
Clone
();
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
if
(
use_analysis
)
{
if
(
use_analysis
)
{
static_cast
<
AnalysisPredictor
*>
(
predictor
.
get
())
static_cast
<
AnalysisPredictor
*>
(
predictor
.
get
())
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录