Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
b7588751
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b7588751
编写于
9月 20, 2018
作者:
T
Tao Luo
提交者:
Yan Chunwei
9月 20, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refine infer api test (#13472)
* refine analyzer_nlp_tester * refine analyzer_rnn/vis_tester
上级
d4570f04
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
383 addition
and
420 deletion
+383
-420
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
+57
-91
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
+56
-80
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+57
-72
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
+46
-67
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
...nference/tests/api/analyzer_text_classification_tester.cc
+47
-38
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
+58
-59
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+62
-13
未找到文件。
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
浏览文件 @
b7588751
...
@@ -103,108 +103,74 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
...
@@ -103,108 +103,74 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
input_slots
->
assign
({
input_tensor
});
input_slots
->
assign
({
input_tensor
});
}
}
const
int64_t
lac_ref_data
[]
=
{
24
,
25
,
25
,
25
,
38
,
30
,
31
,
14
,
15
,
44
,
24
,
25
,
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
25
,
25
,
25
,
25
,
44
,
24
,
25
,
25
,
25
,
36
,
42
,
43
,
cfg
->
model_dir
=
FLAGS_infer_model
;
44
,
14
,
15
,
44
,
14
,
15
,
44
,
14
,
15
,
44
,
38
,
39
,
cfg
->
use_gpu
=
false
;
14
,
15
,
44
,
22
,
23
,
23
,
23
,
23
,
23
,
23
,
23
};
cfg
->
device
=
0
;
cfg
->
specify_input_name
=
true
;
void
TestLACPrediction
(
const
std
::
string
&
model_path
,
cfg
->
enable_ir_optim
=
true
;
const
std
::
string
&
data_file
,
const
int
batch_size
,
}
const
int
repeat
,
bool
use_analysis
=
false
)
{
AnalysisConfig
cfg
;
cfg
.
model_dir
=
model_path
;
cfg
.
use_gpu
=
false
;
cfg
.
device
=
0
;
cfg
.
specify_input_name
=
true
;
cfg
.
enable_ir_optim
=
true
;
std
::
vector
<
PaddleTensor
>
input_slots
,
outputs_slots
;
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
DataRecord
data
(
data_file
,
batch_size
);
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
GetOneBatch
(
&
input_slots
,
&
data
,
batch_size
);
std
::
vector
<
PaddleTensor
>
input_slots
;
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
int
epoch
=
FLAGS_test_all_data
?
data
.
batched_datas
.
size
()
:
1
;
if
(
use_analysis
)
{
LOG
(
INFO
)
<<
"number of samples: "
<<
epoch
;
predictor
=
for
(
int
bid
=
0
;
bid
<
epoch
;
++
bid
)
{
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
cfg
);
GetOneBatch
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
}
else
{
(
*
inputs
).
emplace_back
(
input_slots
);
predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
cfg
);
}
for
(
int
i
=
0
;
i
<
FLAGS_burning
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs_slots
);
}
}
Timer
timer
;
}
if
(
FLAGS_test_all_data
)
{
LOG
(
INFO
)
<<
"test all data"
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
for
(
size_t
bid
=
0
;
bid
<
data
.
batched_datas
.
size
();
++
bid
)
{
GetOneBatch
(
&
input_slots
,
&
data
,
batch_size
);
input_slots_all
.
emplace_back
(
input_slots
);
}
LOG
(
INFO
)
<<
"total number of samples: "
<<
data
.
datasets
.
size
();
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs_slots
,
FLAGS_num_threads
);
return
;
}
timer
.
tic
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs_slots
);
}
PrintTime
(
batch_size
,
repeat
,
1
,
0
,
timer
.
toc
()
/
repeat
);
// check result
// Easy for profiling independently.
EXPECT_EQ
(
outputs_slots
.
size
(),
1UL
);
TEST
(
Analyzer_LAC
,
profile
)
{
auto
&
out
=
outputs_slots
[
0
];
AnalysisConfig
cfg
;
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
SetConfig
(
&
cfg
);
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
std
::
vector
<
PaddleTensor
>
outputs
;
size_t
batch1_size
=
sizeof
(
lac_ref_data
)
/
sizeof
(
int64_t
);
PADDLE_ENFORCE_GT
(
size
,
0
);
EXPECT_GE
(
size
,
batch1_size
);
int64_t
*
pdata
=
static_cast
<
int64_t
*>
(
out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
batch1_size
;
++
i
)
{
EXPECT_EQ
(
pdata
[
i
],
lac_ref_data
[
i
]);
}
if
(
use_analysis
)
{
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
// run once for comparion as reference
SetInput
(
&
input_slots_all
);
auto
ref_predictor
=
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
cfg
);
std
::
vector
<
PaddleTensor
>
ref_outputs_slots
;
ref_predictor
->
Run
(
input_slots
,
&
ref_outputs_slots
);
CompareResult
(
ref_outputs_slots
,
outputs_slots
);
AnalysisPredictor
*
analysis_predictor
=
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
// the first inference result
auto
&
fuse_statis
=
analysis_predictor
->
analysis_argument
()
const
int64_t
lac_ref_data
[]
=
{
.
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
24
,
25
,
25
,
25
,
38
,
30
,
31
,
14
,
15
,
44
,
24
,
25
,
25
,
25
,
25
,
25
,
framework
::
ir
::
kFuseStatisAttr
);
44
,
24
,
25
,
25
,
25
,
36
,
42
,
43
,
44
,
14
,
15
,
44
,
14
,
15
,
44
,
14
,
for
(
auto
&
item
:
fuse_statis
)
{
15
,
44
,
38
,
39
,
14
,
15
,
44
,
22
,
23
,
23
,
23
,
23
,
23
,
23
,
23
};
LOG
(
INFO
)
<<
"fused "
<<
item
.
first
<<
" "
<<
item
.
second
;
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
1UL
);
}
size_t
size
=
GetSize
(
outputs
[
0
]);
int
num_ops
=
0
;
size_t
batch1_size
=
sizeof
(
lac_ref_data
)
/
sizeof
(
int64_t
);
for
(
auto
&
node
:
PADDLE_ENFORCE_GE
(
size
,
batch1_size
);
analysis_predictor
->
analysis_argument
().
main_dfg
->
nodes
.
nodes
())
{
int64_t
*
pdata
=
static_cast
<
int64_t
*>
(
outputs
[
0
].
data
.
data
());
if
(
node
->
IsFunction
())
{
for
(
size_t
i
=
0
;
i
<
batch1_size
;
++
i
)
{
++
num_ops
;
EXPECT_EQ
(
pdata
[
i
],
lac_ref_data
[
i
]);
}
}
}
LOG
(
INFO
)
<<
"has num ops: "
<<
num_ops
;
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_gru_fuse"
),
4
);
EXPECT_EQ
(
num_ops
,
11
);
}
}
}
}
TEST
(
Analyzer_LAC
,
native
)
{
// Check the fuse status
LOG
(
INFO
)
<<
"LAC with native"
;
TEST
(
Analyzer_LAC
,
fuse_statis
)
{
TestLACPrediction
(
FLAGS_infer_model
,
FLAGS_infer_data
,
FLAGS_batch_size
,
AnalysisConfig
cfg
;
FLAGS_repeat
);
SetConfig
(
&
cfg
);
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
cfg
,
&
num_ops
);
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_gru_fuse"
),
4
);
EXPECT_EQ
(
num_ops
,
11
);
}
}
TEST
(
Analyzer_LAC
,
analysis
)
{
// Compare result of NativeConfig and AnalysisConfig
LOG
(
INFO
)
<<
"LAC with analysis"
;
TEST
(
Analyzer_LAC
,
compare
)
{
TestLACPrediction
(
FLAGS_infer_model
,
FLAGS_infer_data
,
FLAGS_batch_size
,
AnalysisConfig
cfg
;
FLAGS_repeat
,
true
);
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
}
}
}
// namespace analysis
}
// namespace analysis
...
...
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
浏览文件 @
b7588751
...
@@ -95,97 +95,73 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
...
@@ -95,97 +95,73 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
}
}
// the first inference result
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
const
int
chinese_ner_result_data
[]
=
{
30
,
45
,
41
,
48
,
17
,
26
,
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
48
,
39
,
38
,
16
,
25
};
cfg
->
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
->
use_gpu
=
false
;
void
TestChineseNERPrediction
(
bool
use_analysis
)
{
cfg
->
device
=
0
;
AnalysisConfig
cfg
;
cfg
->
specify_input_name
=
true
;
cfg
.
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
->
enable_ir_optim
=
true
;
cfg
.
param_file
=
FLAGS_infer_model
+
"/param"
;
}
cfg
.
use_gpu
=
false
;
cfg
.
device
=
0
;
cfg
.
specify_input_name
=
true
;
cfg
.
enable_ir_optim
=
true
;
std
::
vector
<
PaddleTensor
>
input_slots
,
outputs
;
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
Timer
timer
;
if
(
use_analysis
)
{
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
cfg
);
}
else
{
predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
cfg
);
}
if
(
FLAGS_test_all_data
)
{
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
LOG
(
INFO
)
<<
"test all data"
;
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
for
(
size_t
bid
=
0
;
bid
<
data
.
num_samples
/
FLAGS_batch_size
;
++
bid
)
{
PrepareInputs
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
input_slots_all
.
emplace_back
(
input_slots
);
}
LOG
(
INFO
)
<<
"total number of samples: "
<<
data
.
num_samples
;
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
return
;
}
// Prepare inputs.
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
PrepareInputs
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
std
::
vector
<
PaddleTensor
>
input_slots
;
int
epoch
=
FLAGS_test_all_data
?
data
.
num_samples
/
FLAGS_batch_size
:
1
;
timer
.
tic
();
LOG
(
INFO
)
<<
"number of samples: "
<<
epoch
*
FLAGS_batch_size
;
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
for
(
int
bid
=
0
;
bid
<
epoch
;
++
bid
)
{
predictor
->
Run
(
input_slots
,
&
outputs
);
PrepareInputs
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
(
*
inputs
).
emplace_back
(
input_slots
);
}
}
PrintTime
(
FLAGS_batch_size
,
FLAGS_repeat
,
1
,
0
,
timer
.
toc
()
/
FLAGS_repeat
);
}
PADDLE_ENFORCE
(
outputs
.
size
(),
1UL
);
// Easy for profiling independently.
auto
&
out
=
outputs
[
0
];
TEST
(
Analyzer_Chinese_ner
,
profile
)
{
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
AnalysisConfig
cfg
;
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
SetConfig
(
&
cfg
);
PADDLE_ENFORCE_GT
(
size
,
0
);
std
::
vector
<
PaddleTensor
>
outputs
;
int64_t
*
result
=
static_cast
<
int64_t
*>
(
out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
std
::
min
(
11UL
,
size
);
i
++
)
{
PADDLE_ENFORCE
(
result
[
i
],
chinese_ner_result_data
[
i
]);
}
if
(
use_analysis
)
{
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
// run once for comparion as reference
SetInput
(
&
input_slots_all
);
auto
ref_predictor
=
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
cfg
);
std
::
vector
<
PaddleTensor
>
ref_outputs_slots
;
ref_predictor
->
Run
(
input_slots
,
&
ref_outputs_slots
);
CompareResult
(
ref_outputs_slots
,
outputs
);
AnalysisPredictor
*
analysis_predictor
=
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
// the first inference result
auto
&
fuse_statis
=
analysis_predictor
->
analysis_argument
()
const
int
chinese_ner_result_data
[]
=
{
30
,
45
,
41
,
48
,
17
,
26
,
.
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
48
,
39
,
38
,
16
,
25
};
framework
::
ir
::
kFuseStatisAttr
);
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
1UL
);
for
(
auto
&
item
:
fuse_statis
)
{
size_t
size
=
GetSize
(
outputs
[
0
]);
LOG
(
INFO
)
<<
"fused "
<<
item
.
first
<<
" "
<<
item
.
second
;
PADDLE_ENFORCE_GT
(
size
,
0
);
}
int64_t
*
result
=
static_cast
<
int64_t
*>
(
outputs
[
0
].
data
.
data
());
int
num_ops
=
0
;
for
(
size_t
i
=
0
;
i
<
std
::
min
(
11UL
,
size
);
i
++
)
{
for
(
auto
&
node
:
EXPECT_EQ
(
result
[
i
],
chinese_ner_result_data
[
i
]);
analysis_predictor
->
analysis_argument
().
main_dfg
->
nodes
.
nodes
())
{
if
(
node
->
IsFunction
())
{
++
num_ops
;
}
}
}
LOG
(
INFO
)
<<
"has num ops: "
<<
num_ops
;
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_gru_fuse"
),
2
);
EXPECT_EQ
(
num_ops
,
14
);
}
}
}
}
TEST
(
Analyzer_Chinese_ner
,
native
)
{
TestChineseNERPrediction
(
false
);
}
// Check the fuse status
TEST
(
Analyzer_Chinese_ner
,
fuse_statis
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
TEST
(
Analyzer_Chinese_ner
,
analysis
)
{
TestChineseNERPrediction
(
true
);
}
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
cfg
,
&
num_ops
);
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_gru_fuse"
),
2
);
EXPECT_EQ
(
num_ops
,
14
);
}
// Compare result of NativeConfig and AnalysisConfig
TEST
(
Analyzer_Chinese_ner
,
compare
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
}
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
浏览文件 @
b7588751
...
@@ -25,6 +25,7 @@ struct DataRecord {
...
@@ -25,6 +25,7 @@ struct DataRecord {
std
::
vector
<
size_t
>
lod1
,
lod2
,
lod3
;
std
::
vector
<
size_t
>
lod1
,
lod2
,
lod3
;
std
::
vector
<
std
::
vector
<
float
>>
rnn_link_data
,
rnn_week_datas
,
std
::
vector
<
std
::
vector
<
float
>>
rnn_link_data
,
rnn_week_datas
,
rnn_minute_datas
;
rnn_minute_datas
;
size_t
num_samples
;
// total number of samples
size_t
batch_iter
{
0
};
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
size_t
batch_size
{
1
};
DataRecord
()
=
default
;
DataRecord
()
=
default
;
...
@@ -97,6 +98,7 @@ struct DataRecord {
...
@@ -97,6 +98,7 @@ struct DataRecord {
week_data_all
.
push_back
(
std
::
move
(
week_data
));
week_data_all
.
push_back
(
std
::
move
(
week_data
));
minute_data_all
.
push_back
(
std
::
move
(
minute_data
));
minute_data_all
.
push_back
(
std
::
move
(
minute_data
));
}
}
num_samples
=
num_lines
;
}
}
};
};
void
PrepareInputs
(
std
::
vector
<
PaddleTensor
>
*
input_slots
,
DataRecord
*
data
,
void
PrepareInputs
(
std
::
vector
<
PaddleTensor
>
*
input_slots
,
DataRecord
*
data
,
...
@@ -147,89 +149,72 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
...
@@ -147,89 +149,72 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
}
}
// Test with a really complicate model.
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
void
TestRNN1Prediction
(
bool
use_analysis
,
bool
activate_ir
,
int
num_threads
)
{
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
AnalysisConfig
config
;
cfg
->
param_file
=
FLAGS_infer_model
+
"/param"
;
config
.
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
->
use_gpu
=
false
;
config
.
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
->
device
=
0
;
config
.
use_gpu
=
false
;
cfg
->
specify_input_name
=
true
;
config
.
device
=
0
;
cfg
->
enable_ir_optim
=
true
;
config
.
specify_input_name
=
true
;
cfg
->
ir_passes
.
clear
();
// Do not exclude any pass.
config
.
enable_ir_optim
=
activate_ir
;
}
PADDLE_ENFORCE
(
config
.
ir_mode
==
AnalysisConfig
::
IrPassMode
::
kExclude
);
// default
config
.
ir_passes
.
clear
();
// Do not exclude any pass.
int
batch_size
=
FLAGS_batch_size
;
auto
base_predictor
=
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
std
::
vector
<
PaddleTensor
>
input_slots
;
std
::
vector
<
PaddleTensor
>
input_slots
;
DataRecord
data
(
FLAGS_infer_data
,
batch_size
);
int
epoch
=
FLAGS_test_all_data
?
data
.
num_samples
/
FLAGS_batch_size
:
1
;
// Prepare inputs.
LOG
(
INFO
)
<<
"number of samples: "
<<
epoch
*
FLAGS_batch_size
;
PrepareInputs
(
&
input_slots
,
&
data
,
batch_size
);
for
(
int
bid
=
0
;
bid
<
epoch
;
++
bid
)
{
std
::
vector
<
PaddleTensor
>
outputs
,
base_outputs
;
PrepareInputs
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
(
*
inputs
).
emplace_back
(
input_slots
);
}
}
base_predictor
->
Run
(
input_slots
,
&
base_outputs
);
// Easy for profiling independently.
TEST
(
Analyzer_rnn1
,
profile
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
input_slots_all
.
emplace_back
(
input_slots
);
SetInput
(
&
input_slots_all
);
if
(
num_threads
==
1
)
{
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
TestOneThreadPrediction
(
config
,
input_slots_all
,
&
outputs
);
}
CompareResult
(
outputs
,
base_outputs
);
}
else
{
// only return the output of first thread
TestMultiThreadPrediction
(
config
,
input_slots_all
,
&
outputs
,
num_threads
);
}
if
(
use_analysis
&&
activate_ir
)
{
// Check the fuse status
AnalysisPredictor
*
analysis_predictor
=
TEST
(
Analyzer_rnn1
,
fuse_statis
)
{
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
AnalysisConfig
cfg
;
auto
&
fuse_statis
=
analysis_predictor
->
analysis_argument
()
SetConfig
(
&
cfg
);
.
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
framework
::
ir
::
kFuseStatisAttr
);
for
(
auto
&
item
:
fuse_statis
)
{
LOG
(
INFO
)
<<
"fused "
<<
item
.
first
<<
" "
<<
item
.
second
;
}
int
num_ops
=
0
;
int
num_ops
;
for
(
auto
&
node
:
auto
fuse_statis
=
GetFuseStatis
(
cfg
,
&
num_ops
);
analysis_predictor
->
analysis_argument
().
main_dfg
->
nodes
.
nodes
())
{
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
if
(
node
->
IsFunction
())
{
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
++
num_ops
;
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_nobias_lstm_fuse"
),
2
);
// bi-directional LSTM
}
EXPECT_EQ
(
fuse_statis
.
at
(
"seq_concat_fc_fuse"
),
1
);
}
EXPECT_EQ
(
num_ops
,
LOG
(
INFO
)
<<
"has num ops: "
<<
num_ops
;
13
);
// After graph optimization, only 13 operators exists.
}
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
// Compare result of NativeConfig and AnalysisConfig
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
TEST
(
Analyzer_rnn1
,
compare
)
{
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_nobias_lstm_fuse"
),
2
);
// bi-directional LSTM
AnalysisConfig
cfg
;
EXPECT_EQ
(
fuse_statis
.
at
(
"seq_concat_fc_fuse"
),
1
);
SetConfig
(
&
cfg
);
EXPECT_EQ
(
num_ops
,
13
);
// After graph optimization, only 13 operators exists.
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
}
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
}
}
// Inference with analysis and IR, easy for profiling independently.
// Test Multi-Thread.
TEST
(
Analyzer
,
rnn1
)
{
TestRNN1Prediction
(
true
,
true
,
FLAGS_num_threads
);
}
TEST
(
Analyzer_rnn1
,
multi_thread
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
// Other unit-tests of RNN1, test different options of use_analysis,
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
// activate_ir and multi-threads.
SetInput
(
&
input_slots_all
);
TEST
(
Analyzer
,
RNN_tests
)
{
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
4
/* num_threads */
);
int
num_threads
[
2
]
=
{
1
,
4
};
for
(
auto
i
:
num_threads
)
{
// Directly infer with the original model.
TestRNN1Prediction
(
false
,
false
,
i
);
// Inference with the original model with the analysis turned on, the
// analysis module will transform the program to a data flow graph.
TestRNN1Prediction
(
true
,
false
,
i
);
// Inference with analysis and IR. The IR module will fuse some large
// kernels.
TestRNN1Prediction
(
true
,
true
,
i
);
}
}
}
}
// namespace inference
}
// namespace inference
...
...
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
浏览文件 @
b7588751
...
@@ -12,24 +12,7 @@
...
@@ -12,24 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
#include <google/protobuf/text_format.h>
#include <gtest/gtest.h>
#include <thread> // NOLINT
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
DEFINE_string
(
infer_model
,
""
,
"model path"
);
DEFINE_string
(
infer_data
,
""
,
"data path"
);
DEFINE_int32
(
batch_size
,
1
,
"batch size."
);
DEFINE_int32
(
repeat
,
1
,
"Running the inference program repeat times."
);
DEFINE_int32
(
num_threads
,
1
,
"Running the inference program in multi-threads."
);
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
...
@@ -41,6 +24,7 @@ struct DataRecord {
...
@@ -41,6 +24,7 @@ struct DataRecord {
std
::
vector
<
size_t
>
lod
;
std
::
vector
<
size_t
>
lod
;
std
::
vector
<
std
::
vector
<
float
>>
rnn_link_data
;
std
::
vector
<
std
::
vector
<
float
>>
rnn_link_data
;
std
::
vector
<
float
>
result_data
;
std
::
vector
<
float
>
result_data
;
size_t
num_samples
;
// total number of samples
size_t
batch_iter
{
0
};
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
size_t
batch_size
{
1
};
DataRecord
()
=
default
;
DataRecord
()
=
default
;
...
@@ -100,6 +84,7 @@ struct DataRecord {
...
@@ -100,6 +84,7 @@ struct DataRecord {
result_data
.
insert
(
result_data
.
end
(),
tmp
.
begin
(),
tmp
.
end
());
result_data
.
insert
(
result_data
.
end
(),
tmp
.
begin
(),
tmp
.
end
());
}
}
}
}
num_samples
=
num_lines
/
2
;
}
}
};
};
void
PrepareInputs
(
std
::
vector
<
PaddleTensor
>
*
input_slots
,
DataRecord
*
data
,
void
PrepareInputs
(
std
::
vector
<
PaddleTensor
>
*
input_slots
,
DataRecord
*
data
,
...
@@ -118,64 +103,58 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
...
@@ -118,64 +103,58 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
input_slots
->
assign
({
feed_tensor
});
input_slots
->
assign
({
feed_tensor
});
}
}
void
CompareResult
(
const
std
::
vector
<
PaddleTensor
>
&
outputs
,
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
const
std
::
vector
<
float
>
&
base_result
)
{
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
cfg
->
param_file
=
FLAGS_infer_model
+
"/param"
;
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
cfg
->
use_gpu
=
false
;
auto
&
out
=
outputs
[
i
];
cfg
->
device
=
0
;
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
cfg
->
specify_input_name
=
true
;
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
cfg
->
enable_ir_optim
=
true
;
PADDLE_ENFORCE_GT
(
size
,
0
);
}
float
*
data
=
static_cast
<
float
*>
(
out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
EXPECT_NEAR
(
data
[
i
],
base_result
[
i
],
1e-3
);
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
}
std
::
vector
<
PaddleTensor
>
input_slots
;
int
epoch
=
FLAGS_test_all_data
?
data
.
num_samples
/
FLAGS_batch_size
:
1
;
LOG
(
INFO
)
<<
"number of samples: "
<<
epoch
*
FLAGS_batch_size
;
for
(
int
bid
=
0
;
bid
<
epoch
;
++
bid
)
{
PrepareInputs
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
(
*
inputs
).
emplace_back
(
input_slots
);
}
}
}
}
// Test with a really complicate model.
void
TestRNN2Prediction
()
{
AnalysisConfig
config
;
config
.
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
config
.
param_file
=
FLAGS_infer_model
+
"/param"
;
config
.
use_gpu
=
false
;
config
.
device
=
0
;
config
.
specify_input_name
=
true
;
config
.
enable_ir_optim
=
true
;
PADDLE_ENFORCE
(
config
.
ir_mode
==
AnalysisConfig
::
IrPassMode
::
kExclude
);
// default
int
batch_size
=
FLAGS_batch_size
;
// Easy for profiling independently.
int
num_times
=
FLAGS_repeat
;
TEST
(
Analyzer_rnn2
,
profile
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
auto
base_predictor
=
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
SetInput
(
&
input_slots_all
);
auto
predictor
=
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
std
::
vector
<
PaddleTensor
>
input_slots
;
DataRecord
data
(
FLAGS_infer_data
,
batch_size
);
PrepareInputs
(
&
input_slots
,
&
data
,
batch_size
);
std
::
vector
<
PaddleTensor
>
outputs
,
base_outputs
;
Timer
timer1
;
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
timer1
.
tic
();
// the first inference result
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
base_predictor
->
Run
(
input_slots
,
&
base_outputs
);
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
size_t
size
=
GetSize
(
outputs
[
0
]);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
result
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_NEAR
(
result
[
i
],
data
.
result_data
[
i
],
1e-3
);
}
}
}
PrintTime
(
batch_size
,
num_times
,
1
,
0
,
timer1
.
toc
()
/
num_times
);
}
Timer
timer2
;
// Compare result of NativeConfig and AnalysisConfig
timer2
.
tic
();
TEST
(
Analyzer_rnn2
,
compare
)
{
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
AnalysisConfig
cfg
;
predictor
->
Run
(
input_slots
,
&
outputs
);
SetConfig
(
&
cfg
);
}
PrintTime
(
batch_size
,
num_times
,
1
,
0
,
timer2
.
toc
()
/
num_times
);
CompareResult
(
base_outputs
,
data
.
result_data
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
CompareResult
(
outputs
,
data
.
result_data
);
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
}
}
TEST
(
Analyzer
,
rnn2
)
{
TestRNN2Prediction
();
}
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
浏览文件 @
b7588751
...
@@ -46,54 +46,63 @@ struct DataReader {
...
@@ -46,54 +46,63 @@ struct DataReader {
std
::
unique_ptr
<
std
::
ifstream
>
file
;
std
::
unique_ptr
<
std
::
ifstream
>
file
;
};
};
void
Main
(
int
batch_size
)
{
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
// shape --
cfg
->
model_dir
=
FLAGS_infer_model
;
// Create Predictor --
cfg
->
use_gpu
=
false
;
AnalysisConfig
config
;
cfg
->
device
=
0
;
c
onfig
.
model_dir
=
FLAGS_infer_model
;
c
fg
->
specify_input_name
=
true
;
c
onfig
.
use_gpu
=
fals
e
;
c
fg
->
enable_ir_optim
=
tru
e
;
config
.
enable_ir_optim
=
true
;
}
std
::
vector
<
PaddleTensor
>
input_slots
,
output_slots
;
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
std
::
vector
<
PaddleTensor
>
input_slots
;
DataReader
reader
(
FLAGS_infer_data
);
DataReader
reader
(
FLAGS_infer_data
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
int
num_batches
=
0
;
while
(
reader
.
NextBatch
(
&
input_slots
,
FLAGS_batch_size
))
{
if
(
FLAGS_test_all_data
)
{
(
*
inputs
).
emplace_back
(
input_slots
);
LOG
(
INFO
)
<<
"test all data"
;
++
num_batches
;
int
num_batches
=
0
;
if
(
!
FLAGS_test_all_data
)
return
;
while
(
reader
.
NextBatch
(
&
input_slots
,
FLAGS_batch_size
))
{
input_slots_all
.
emplace_back
(
input_slots
);
++
num_batches
;
}
LOG
(
INFO
)
<<
"total number of samples: "
<<
num_batches
*
FLAGS_batch_size
;
TestPrediction
(
config
,
input_slots_all
,
&
output_slots
,
FLAGS_num_threads
);
return
;
}
}
LOG
(
INFO
)
<<
"total number of samples: "
<<
num_batches
*
FLAGS_batch_size
;
}
// one batch starts
// Easy for profiling independently.
// data --
TEST
(
Analyzer_Text_Classification
,
profile
)
{
reader
.
NextBatch
(
&
input_slots
,
FLAGS_batch_size
)
;
AnalysisConfig
cfg
;
input_slots_all
.
emplace_back
(
input_slots
);
SetConfig
(
&
cfg
);
TestPrediction
(
config
,
input_slots_all
,
&
output_slots
,
FLAGS_num_threads
)
;
std
::
vector
<
PaddleTensor
>
outputs
;
// Get output
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
LOG
(
INFO
)
<<
"get outputs "
<<
output_slots
.
size
();
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
for
(
auto
&
output
:
output_slots
)
{
if
(
FLAGS_num_threads
==
1
)
{
LOG
(
INFO
)
<<
"output.shape: "
<<
to_string
(
output
.
shape
);
// Get output
// no lod ?
LOG
(
INFO
)
<<
"get outputs "
<<
outputs
.
size
();
CHECK_EQ
(
output
.
lod
.
size
(),
0UL
);
for
(
auto
&
output
:
outputs
)
{
LOG
(
INFO
)
<<
"output.dtype: "
<<
output
.
dtype
;
LOG
(
INFO
)
<<
"output.shape: "
<<
to_string
(
output
.
shape
);
std
::
stringstream
ss
;
// no lod ?
for
(
int
i
=
0
;
i
<
5
;
i
++
)
{
CHECK_EQ
(
output
.
lod
.
size
(),
0UL
);
ss
<<
static_cast
<
float
*>
(
output
.
data
.
data
())[
i
]
<<
" "
;
LOG
(
INFO
)
<<
"output.dtype: "
<<
output
.
dtype
;
std
::
stringstream
ss
;
for
(
int
i
=
0
;
i
<
5
;
i
++
)
{
ss
<<
static_cast
<
float
*>
(
output
.
data
.
data
())[
i
]
<<
" "
;
}
LOG
(
INFO
)
<<
"output.data summary: "
<<
ss
.
str
();
// one batch ends
}
}
LOG
(
INFO
)
<<
"output.data summary: "
<<
ss
.
str
();
// one batch ends
}
}
}
}
TEST
(
text_classification
,
basic
)
{
Main
(
FLAGS_batch_size
);
}
// Compare result of NativeConfig and AnalysisConfig
TEST
(
Analyzer_Text_Classification
,
compare
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
}
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
浏览文件 @
b7588751
...
@@ -49,84 +49,83 @@ Record ProcessALine(const std::string &line) {
...
@@ -49,84 +49,83 @@ Record ProcessALine(const std::string &line) {
return
record
;
return
record
;
}
}
/*
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
* Use the native and analysis fluid engine to inference the demo.
cfg
->
param_file
=
FLAGS_infer_model
+
"/__params__"
;
* ocr, mobilenet and se_resnext50
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
*/
cfg
->
use_gpu
=
false
;
void
TestVisualPrediction
(
bool
use_mkldnn
)
{
cfg
->
device
=
0
;
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
cfg
->
enable_ir_optim
=
true
;
AnalysisConfig
cfg
;
cfg
->
specify_input_name
=
true
;
cfg
.
param_file
=
FLAGS_infer_model
+
"/__params__"
;
cfg
.
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
.
use_gpu
=
false
;
cfg
.
_use_mkldnn
=
use_mkldnn
;
cfg
.
device
=
0
;
cfg
.
enable_ir_optim
=
true
;
// TODO(TJ): fix fusion gru
// TODO(TJ): fix fusion gru
cfg
.
ir_passes
.
push_back
(
"fc_gru_fuse_pass"
);
cfg
->
ir_passes
.
push_back
(
"fc_gru_fuse_pass"
);
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
cfg
->
_use_mkldnn
=
true
;
// disable mkldnn fuse since it should have some bugs
// disable mkldnn fuse since it should have some bugs
cfg
.
ir_passes
.
push_back
(
"conv_relu_mkldnn_fuse_pass"
);
cfg
->
ir_passes
.
push_back
(
"conv_relu_mkldnn_fuse_pass"
);
#endif
#endif
predictor
=
}
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
cfg
);
// Only have single batch of data.
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
PADDLE_ENFORCE_EQ
(
FLAGS_test_all_data
,
0
,
"Only have single batch of data."
);
std
::
string
line
;
std
::
string
line
;
std
::
ifstream
file
(
FLAGS_infer_data
);
std
::
ifstream
file
(
FLAGS_infer_data
);
std
::
getline
(
file
,
line
);
std
::
getline
(
file
,
line
);
auto
record
=
ProcessALine
(
line
);
auto
record
=
ProcessALine
(
line
);
file
.
close
();
// Inference.
PaddleTensor
input
;
PaddleTensor
input
;
input
.
shape
=
record
.
shape
;
input
.
shape
=
record
.
shape
;
input
.
data
=
PaddleBuf
(
record
.
data
.
data
(),
record
.
data
.
size
()
*
sizeof
(
float
));
input
.
dtype
=
PaddleDType
::
FLOAT32
;
input
.
dtype
=
PaddleDType
::
FLOAT32
;
size_t
input_size
=
record
.
data
.
size
()
*
sizeof
(
float
);
input
.
data
.
Resize
(
input_size
);
memcpy
(
input
.
data
.
data
(),
record
.
data
.
data
(),
input_size
);
std
::
vector
<
PaddleTensor
>
input_slots
;
input_slots
.
assign
({
input
});
(
*
inputs
).
emplace_back
(
input_slots
);
}
std
::
vector
<
PaddleTensor
>
outputs_slots
;
// Easy for profiling independently.
Timer
timer
;
// ocr, mobilenet and se_resnext50
timer
.
tic
();
TEST
(
Analyzer_vis
,
profile
)
{
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
AnalysisConfig
cfg
;
predictor
->
Run
({
input
},
&
outputs_slots
);
SetConfig
(
&
cfg
);
}
std
::
vector
<
PaddleTensor
>
outputs
;
PrintTime
(
/*batch size*/
1
,
FLAGS_repeat
,
/*num threads*/
1
,
/*thread id*/
0
,
timer
.
toc
()
/
FLAGS_repeat
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
VLOG
(
3
)
<<
"output.size "
<<
outputs_slots
.
size
();
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
// run native as reference
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
auto
ref_predictor
=
const
float
ocr_result_data
[]
=
{
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
cfg
);
5.273636460856323538e-08
,
3.296741795111302054e-07
,
std
::
vector
<
PaddleTensor
>
ref_outputs_slots
;
1.873261190610264748e-08
,
3.403730275408634043e-08
,
ref_predictor
->
Run
({
input
},
&
ref_outputs_slots
);
3.383312474625199684e-08
};
CompareResult
(
outputs_slots
,
ref_outputs_slots
);
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
1UL
);
// print what are fused
size_t
size
=
GetSize
(
outputs
[
0
]);
AnalysisPredictor
*
analysis_predictor
=
PADDLE_ENFORCE_GT
(
size
,
0
);
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
float
*
result
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
());
auto
&
fuse_statis
=
analysis_predictor
->
analysis_argument
()
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
size
);
i
++
)
{
.
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
EXPECT_NEAR
(
result
[
i
],
ocr_result_data
[
i
],
1e-3
);
framework
::
ir
::
kFuseStatisAttr
);
for
(
auto
&
item
:
fuse_statis
)
{
LOG
(
INFO
)
<<
"fused "
<<
item
.
first
<<
" "
<<
item
.
second
;
}
int
num_ops
=
0
;
for
(
auto
&
node
:
analysis_predictor
->
analysis_argument
().
main_dfg
->
nodes
.
nodes
())
{
if
(
node
->
IsFunction
())
{
++
num_ops
;
}
}
}
}
LOG
(
INFO
)
<<
"has num ops: "
<<
num_ops
;
}
}
TEST
(
Analyzer_vis
,
analysis
)
{
TestVisualPrediction
(
/*use_mkldnn*/
false
);
}
// Check the fuse status
#ifdef PADDLE_WITH_MKLDNN
TEST
(
Analyzer_vis
,
fuse_statis
)
{
TEST
(
Analyzer_vis
,
analysis_mkldnn
)
{
AnalysisConfig
cfg
;
TestVisualPrediction
(
/*use_mkldnn*/
true
);
SetConfig
(
&
cfg
);
int
num_ops
;
GetFuseStatis
(
cfg
,
&
num_ops
);
}
// Compare result of NativeConfig and AnalysisConfig
TEST
(
Analyzer_vis
,
compare
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
}
}
#endif
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
b7588751
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
#pragma once
#pragma once
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <string>
#include <thread> // NOLINT
#include <thread> // NOLINT
#include <vector>
#include <vector>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
...
@@ -28,17 +29,18 @@
...
@@ -28,17 +29,18 @@
DEFINE_string
(
infer_model
,
""
,
"model path"
);
DEFINE_string
(
infer_model
,
""
,
"model path"
);
DEFINE_string
(
infer_data
,
""
,
"data file"
);
DEFINE_string
(
infer_data
,
""
,
"data file"
);
DEFINE_int32
(
batch_size
,
1
,
"batch size."
);
DEFINE_int32
(
batch_size
,
1
,
"batch size."
);
DEFINE_int32
(
burning
,
0
,
"Burning before repeat."
);
DEFINE_int32
(
repeat
,
1
,
"Running the inference program repeat times."
);
DEFINE_int32
(
repeat
,
1
,
"Running the inference program repeat times."
);
DEFINE_bool
(
test_all_data
,
false
,
"Test the all dataset in data file."
);
DEFINE_bool
(
test_all_data
,
false
,
"Test the all dataset in data file."
);
DEFINE_int32
(
num_threads
,
1
,
"Running the inference program in multi-threads."
);
DEFINE_int32
(
num_threads
,
1
,
"Running the inference program in multi-threads."
);
DEFINE_bool
(
use_analysis
,
true
,
"Running the inference program in analysis mode."
);
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
void
CompareResult
(
const
std
::
vector
<
PaddleTensor
>
&
outputs
,
void
CompareResult
(
const
std
::
vector
<
PaddleTensor
>
&
outputs
,
const
std
::
vector
<
PaddleTensor
>
&
ref_outputs
)
{
const
std
::
vector
<
PaddleTensor
>
&
ref_outputs
)
{
EXPECT_GT
(
outputs
.
size
(),
0
);
EXPECT_GT
(
outputs
.
size
(),
0
UL
);
EXPECT_EQ
(
outputs
.
size
(),
ref_outputs
.
size
());
EXPECT_EQ
(
outputs
.
size
(),
ref_outputs
.
size
());
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
auto
&
out
=
outputs
[
i
];
auto
&
out
=
outputs
[
i
];
...
@@ -72,14 +74,50 @@ void CompareResult(const std::vector<PaddleTensor> &outputs,
...
@@ -72,14 +74,50 @@ void CompareResult(const std::vector<PaddleTensor> &outputs,
}
}
}
}
std
::
unique_ptr
<
PaddlePredictor
>
GetPrediction
(
AnalysisConfig
config
,
bool
use_analysis
=
true
)
{
if
(
use_analysis
)
{
return
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
}
else
{
return
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
}
}
size_t
GetSize
(
const
PaddleTensor
&
out
)
{
return
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
}
std
::
unordered_map
<
std
::
string
,
int
>
GetFuseStatis
(
AnalysisConfig
config
,
int
*
num_ops
)
{
auto
predictor
=
GetPrediction
(
config
);
AnalysisPredictor
*
analysis_predictor
=
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
auto
&
fuse_statis
=
analysis_predictor
->
analysis_argument
()
.
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
framework
::
ir
::
kFuseStatisAttr
);
for
(
auto
&
item
:
fuse_statis
)
{
LOG
(
INFO
)
<<
"fused "
<<
item
.
first
<<
" "
<<
item
.
second
;
}
int
num
=
0
;
for
(
auto
&
node
:
analysis_predictor
->
analysis_argument
().
main_dfg
->
nodes
.
nodes
())
{
if
(
node
->
IsFunction
())
{
++
num
;
}
}
*
num_ops
=
num
;
return
fuse_statis
;
}
void
TestOneThreadPrediction
(
void
TestOneThreadPrediction
(
AnalysisConfig
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
,
AnalysisConfig
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
)
{
std
::
vector
<
PaddleTensor
>
*
outputs
,
bool
use_analysis
=
true
)
{
int
batch_size
=
FLAGS_batch_size
;
int
batch_size
=
FLAGS_batch_size
;
int
num_times
=
FLAGS_repeat
;
int
num_times
=
FLAGS_repeat
;
auto
predictor
=
auto
predictor
=
GetPrediction
(
config
,
use_analysis
);
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
Timer
timer
;
Timer
timer
;
timer
.
tic
();
timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
...
@@ -93,7 +131,8 @@ void TestOneThreadPrediction(
...
@@ -93,7 +131,8 @@ void TestOneThreadPrediction(
void
TestMultiThreadPrediction
(
void
TestMultiThreadPrediction
(
AnalysisConfig
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
,
AnalysisConfig
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
)
{
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
,
bool
use_analysis
=
true
)
{
int
batch_size
=
FLAGS_batch_size
;
int
batch_size
=
FLAGS_batch_size
;
int
num_times
=
FLAGS_repeat
;
int
num_times
=
FLAGS_repeat
;
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
thread
>
threads
;
...
@@ -101,9 +140,7 @@ void TestMultiThreadPrediction(
...
@@ -101,9 +140,7 @@ void TestMultiThreadPrediction(
// TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled
// TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled
// because AttentionLSTM's hard code nodeid will be damanged.
// because AttentionLSTM's hard code nodeid will be damanged.
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
predictors
.
emplace_back
(
predictors
.
emplace_back
(
GetPrediction
(
config
,
use_analysis
));
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
));
}
}
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
threads
.
emplace_back
([
&
,
tid
]()
{
threads
.
emplace_back
([
&
,
tid
]()
{
...
@@ -129,13 +166,25 @@ void TestMultiThreadPrediction(
...
@@ -129,13 +166,25 @@ void TestMultiThreadPrediction(
void
TestPrediction
(
AnalysisConfig
config
,
void
TestPrediction
(
AnalysisConfig
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
)
{
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
,
bool
use_analysis
=
FLAGS_use_analysis
)
{
LOG
(
INFO
)
<<
"use_analysis: "
<<
use_analysis
;
if
(
num_threads
==
1
)
{
if
(
num_threads
==
1
)
{
TestOneThreadPrediction
(
config
,
inputs
,
outputs
);
TestOneThreadPrediction
(
config
,
inputs
,
outputs
,
use_analysis
);
}
else
{
}
else
{
TestMultiThreadPrediction
(
config
,
inputs
,
outputs
,
num_threads
);
TestMultiThreadPrediction
(
config
,
inputs
,
outputs
,
num_threads
,
use_analysis
);
}
}
}
}
void
CompareNativeAndAnalysis
(
AnalysisConfig
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
)
{
std
::
vector
<
PaddleTensor
>
native_outputs
,
analysis_outputs
;
TestOneThreadPrediction
(
config
,
inputs
,
&
native_outputs
,
false
);
TestOneThreadPrediction
(
config
,
inputs
,
&
analysis_outputs
,
true
);
CompareResult
(
analysis_outputs
,
native_outputs
);
}
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录