Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
781cd0cf
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
781cd0cf
编写于
1月 12, 2019
作者:
T
tensor-tang
提交者:
Yan Chunwei
1月 12, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add multi threads test of seqpool test (#15293)
上级
6eada9e0
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
57 addition
and
2 deletion
+57
-2
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
+57
-2
未找到文件。
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
浏览文件 @
781cd0cf
...
@@ -214,6 +214,9 @@ void PrepareZeroCopyInputs(
...
@@ -214,6 +214,9 @@ void PrepareZeroCopyInputs(
}
}
}
}
// diff: similarity_norm.tmp_0, // speed: fc_4.tmp_1
static
const
char
out_var_name
[]
=
"reduce_sum_0.tmp_0"
;
// return the output values
// return the output values
std
::
vector
<
float
>
zerocopy_profile
(
int
repeat_times
)
{
std
::
vector
<
float
>
zerocopy_profile
(
int
repeat_times
)
{
AnalysisConfig
config
;
AnalysisConfig
config
;
...
@@ -222,7 +225,7 @@ std::vector<float> zerocopy_profile(int repeat_times) {
...
@@ -222,7 +225,7 @@ std::vector<float> zerocopy_profile(int repeat_times) {
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
std
::
vector
<
std
::
unique_ptr
<
ZeroCopyTensor
>>
inputs
;
std
::
vector
<
std
::
unique_ptr
<
ZeroCopyTensor
>>
inputs
;
PrepareZeroCopyInputs
(
predictor
,
&
inputs
);
PrepareZeroCopyInputs
(
predictor
,
&
inputs
);
auto
output_tensor
=
predictor
->
GetOutputTensor
(
"reduce_sum_0.tmp_0"
);
auto
output_tensor
=
predictor
->
GetOutputTensor
(
out_var_name
);
Timer
timer
;
Timer
timer
;
LOG
(
INFO
)
<<
"Warm up run..."
;
LOG
(
INFO
)
<<
"Warm up run..."
;
timer
.
tic
();
timer
.
tic
();
...
@@ -239,7 +242,7 @@ std::vector<float> zerocopy_profile(int repeat_times) {
...
@@ -239,7 +242,7 @@ std::vector<float> zerocopy_profile(int repeat_times) {
PrintTime
(
FLAGS_batch_size
,
repeat_times
,
1
,
0
,
timer
.
toc
()
/
repeat_times
,
PrintTime
(
FLAGS_batch_size
,
repeat_times
,
1
,
0
,
timer
.
toc
()
/
repeat_times
,
1
);
1
);
VLOG
(
3
)
<<
"ZeroCopy output: "
<<
DescribeZeroCopyTensor
(
*
output_tensor
);
LOG
(
INFO
)
<<
"ZeroCopy output: "
<<
DescribeZeroCopyTensor
(
*
output_tensor
);
PaddlePlace
place
;
PaddlePlace
place
;
int
output_size
{
0
};
int
output_size
{
0
};
auto
*
pdata
=
output_tensor
->
data
<
float
>
(
&
place
,
&
output_size
);
auto
*
pdata
=
output_tensor
->
data
<
float
>
(
&
place
,
&
output_size
);
...
@@ -252,6 +255,58 @@ std::vector<float> zerocopy_profile(int repeat_times) {
...
@@ -252,6 +255,58 @@ std::vector<float> zerocopy_profile(int repeat_times) {
TEST
(
Analyzer_seq_pool1
,
zerocopy_profile
)
{
zerocopy_profile
(
FLAGS_repeat
);
}
TEST
(
Analyzer_seq_pool1
,
zerocopy_profile
)
{
zerocopy_profile
(
FLAGS_repeat
);
}
TEST
(
Analyzer_seq_pool1
,
zerocopy_profile_threads
)
{
AnalysisConfig
config
;
SetConfig
(
&
config
);
config
.
SwitchUseFeedFetchOps
(
false
);
auto
base_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
double
total_time_of_threads
{
0
};
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
for
(
int
tid
=
0
;
tid
<
FLAGS_num_threads
;
tid
++
)
{
predictors
.
emplace_back
(
base_predictor
->
Clone
());
// predictors.emplace_back(CreatePaddlePredictor<AnalysisConfig>(config));
}
for
(
int
tid
=
0
;
tid
<
FLAGS_num_threads
;
tid
++
)
{
threads
.
emplace_back
([
config
,
&
total_time_of_threads
,
&
predictors
,
tid
]
{
auto
&
predictor
=
predictors
[
tid
];
std
::
vector
<
std
::
unique_ptr
<
ZeroCopyTensor
>>
inputs
;
PrepareZeroCopyInputs
(
predictor
,
&
inputs
);
auto
output_tensor
=
predictor
->
GetOutputTensor
(
out_var_name
);
Timer
timer
;
double
total_time
{
0
};
LOG
(
INFO
)
<<
"Warm up run..."
;
timer
.
tic
();
predictor
->
ZeroCopyRun
();
PrintTime
(
FLAGS_batch_size
,
1
,
FLAGS_num_threads
,
tid
,
timer
.
toc
(),
1
);
if
(
FLAGS_profile
)
{
paddle
::
platform
::
ResetProfiler
();
}
int
repeat_times
=
FLAGS_repeat
;
LOG
(
INFO
)
<<
"Run "
<<
repeat_times
<<
" times..."
;
timer
.
tic
();
for
(
int
i
=
0
;
i
<
repeat_times
;
i
++
)
{
predictor
->
ZeroCopyRun
();
}
total_time
+=
timer
.
toc
();
total_time_of_threads
+=
total_time
;
LOG
(
INFO
)
<<
"thread time: "
<<
total_time
/
repeat_times
;
});
}
for
(
auto
&
t
:
threads
)
{
t
.
join
();
}
LOG
(
INFO
)
<<
"average time: "
<<
total_time_of_threads
/
FLAGS_num_threads
/
FLAGS_repeat
;
}
TEST
(
Analyzer_seq_pool1
,
zerocopy_fuse_statis
)
{
analysis_fuse_statis
(
true
);
}
TEST
(
Analyzer_seq_pool1
,
zerocopy_fuse_statis
)
{
analysis_fuse_statis
(
true
);
}
TEST
(
Analyzer_seq_pool1
,
zerocopy_compare_native
)
{
TEST
(
Analyzer_seq_pool1
,
zerocopy_compare_native
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录