Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
abf019f6
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
1 年多 前同步成功
通知
696
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
abf019f6
编写于
9月 20, 2018
作者:
L
luotao1
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into for_test
上级
3ba7e74d
b7588751
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
390 addition
and
420 deletion
+390
-420
doc/README.md
doc/README.md
+7
-0
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
+57
-91
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
+56
-80
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+57
-72
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
+46
-67
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
...nference/tests/api/analyzer_text_classification_tester.cc
+47
-38
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
+58
-59
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+62
-13
未找到文件。
doc/README.md
0 → 100644
浏览文件 @
abf019f6
# For Readers and Developers
Thanks for reading PaddlePaddle documentation.
Since
**September 17th, 2018**
, the
**0.15.0 and develop**
documentation source has been moved to
[
Fluiddoc Repo
](
https://github.com/PaddlePaddle/Paddle
)
and updated in Fluiddoc Repo.
Please turn to Fluiddoc Repo for the latest documentation.
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
浏览文件 @
abf019f6
...
...
@@ -103,108 +103,74 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
input_slots
->
assign
({
input_tensor
});
}
const
int64_t
lac_ref_data
[]
=
{
24
,
25
,
25
,
25
,
38
,
30
,
31
,
14
,
15
,
44
,
24
,
25
,
25
,
25
,
25
,
25
,
44
,
24
,
25
,
25
,
25
,
36
,
42
,
43
,
44
,
14
,
15
,
44
,
14
,
15
,
44
,
14
,
15
,
44
,
38
,
39
,
14
,
15
,
44
,
22
,
23
,
23
,
23
,
23
,
23
,
23
,
23
};
void
TestLACPrediction
(
const
std
::
string
&
model_path
,
const
std
::
string
&
data_file
,
const
int
batch_size
,
const
int
repeat
,
bool
use_analysis
=
false
)
{
AnalysisConfig
cfg
;
cfg
.
model_dir
=
model_path
;
cfg
.
use_gpu
=
false
;
cfg
.
device
=
0
;
cfg
.
specify_input_name
=
true
;
cfg
.
enable_ir_optim
=
true
;
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
model_dir
=
FLAGS_infer_model
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
specify_input_name
=
true
;
cfg
->
enable_ir_optim
=
true
;
}
std
::
vector
<
PaddleTensor
>
input_slots
,
outputs_slots
;
DataRecord
data
(
data_file
,
batch_size
);
GetOneBatch
(
&
input_slots
,
&
data
,
batch_size
);
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
if
(
use_analysis
)
{
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
cfg
);
}
else
{
predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
cfg
);
}
for
(
int
i
=
0
;
i
<
FLAGS_burning
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs_slots
);
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
std
::
vector
<
PaddleTensor
>
input_slots
;
int
epoch
=
FLAGS_test_all_data
?
data
.
batched_datas
.
size
()
:
1
;
LOG
(
INFO
)
<<
"number of samples: "
<<
epoch
;
for
(
int
bid
=
0
;
bid
<
epoch
;
++
bid
)
{
GetOneBatch
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
(
*
inputs
).
emplace_back
(
input_slots
);
}
Timer
timer
;
if
(
FLAGS_test_all_data
)
{
LOG
(
INFO
)
<<
"test all data"
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
for
(
size_t
bid
=
0
;
bid
<
data
.
batched_datas
.
size
();
++
bid
)
{
GetOneBatch
(
&
input_slots
,
&
data
,
batch_size
);
input_slots_all
.
emplace_back
(
input_slots
);
}
LOG
(
INFO
)
<<
"total number of samples: "
<<
data
.
datasets
.
size
();
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs_slots
,
FLAGS_num_threads
);
return
;
}
timer
.
tic
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs_slots
);
}
PrintTime
(
batch_size
,
repeat
,
1
,
0
,
timer
.
toc
()
/
repeat
);
}
// check result
EXPECT_EQ
(
outputs_slots
.
size
(),
1UL
);
auto
&
out
=
outputs_slots
[
0
];
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
size_t
batch1_size
=
sizeof
(
lac_ref_data
)
/
sizeof
(
int64_t
);
PADDLE_ENFORCE_GT
(
size
,
0
);
EXPECT_GE
(
size
,
batch1_size
);
int64_t
*
pdata
=
static_cast
<
int64_t
*>
(
out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
batch1_size
;
++
i
)
{
EXPECT_EQ
(
pdata
[
i
],
lac_ref_data
[
i
]);
}
// Easy for profiling independently.
TEST
(
Analyzer_LAC
,
profile
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
if
(
use_analysis
)
{
// run once for comparion as reference
auto
ref_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
cfg
);
std
::
vector
<
PaddleTensor
>
ref_outputs_slots
;
ref_predictor
->
Run
(
input_slots
,
&
ref_outputs_slots
);
CompareResult
(
ref_outputs_slots
,
outputs_slots
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
AnalysisPredictor
*
analysis_predictor
=
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
auto
&
fuse_statis
=
analysis_predictor
->
analysis_argument
()
.
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
framework
::
ir
::
kFuseStatisAttr
);
for
(
auto
&
item
:
fuse_statis
)
{
LOG
(
INFO
)
<<
"fused "
<<
item
.
first
<<
" "
<<
item
.
second
;
}
int
num_ops
=
0
;
for
(
auto
&
node
:
analysis_predictor
->
analysis_argument
().
main_dfg
->
nodes
.
nodes
())
{
if
(
node
->
IsFunction
())
{
++
num_ops
;
}
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
// the first inference result
const
int64_t
lac_ref_data
[]
=
{
24
,
25
,
25
,
25
,
38
,
30
,
31
,
14
,
15
,
44
,
24
,
25
,
25
,
25
,
25
,
25
,
44
,
24
,
25
,
25
,
25
,
36
,
42
,
43
,
44
,
14
,
15
,
44
,
14
,
15
,
44
,
14
,
15
,
44
,
38
,
39
,
14
,
15
,
44
,
22
,
23
,
23
,
23
,
23
,
23
,
23
,
23
};
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
1UL
);
size_t
size
=
GetSize
(
outputs
[
0
]);
size_t
batch1_size
=
sizeof
(
lac_ref_data
)
/
sizeof
(
int64_t
);
PADDLE_ENFORCE_GE
(
size
,
batch1_size
);
int64_t
*
pdata
=
static_cast
<
int64_t
*>
(
outputs
[
0
].
data
.
data
());
for
(
size_t
i
=
0
;
i
<
batch1_size
;
++
i
)
{
EXPECT_EQ
(
pdata
[
i
],
lac_ref_data
[
i
]);
}
LOG
(
INFO
)
<<
"has num ops: "
<<
num_ops
;
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_gru_fuse"
),
4
);
EXPECT_EQ
(
num_ops
,
11
);
}
}
TEST
(
Analyzer_LAC
,
native
)
{
LOG
(
INFO
)
<<
"LAC with native"
;
TestLACPrediction
(
FLAGS_infer_model
,
FLAGS_infer_data
,
FLAGS_batch_size
,
FLAGS_repeat
);
// Check the fuse status
TEST
(
Analyzer_LAC
,
fuse_statis
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
cfg
,
&
num_ops
);
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_gru_fuse"
),
4
);
EXPECT_EQ
(
num_ops
,
11
);
}
TEST
(
Analyzer_LAC
,
analysis
)
{
LOG
(
INFO
)
<<
"LAC with analysis"
;
TestLACPrediction
(
FLAGS_infer_model
,
FLAGS_infer_data
,
FLAGS_batch_size
,
FLAGS_repeat
,
true
);
// Compare result of NativeConfig and AnalysisConfig
TEST
(
Analyzer_LAC
,
compare
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
}
}
// namespace analysis
...
...
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
浏览文件 @
abf019f6
...
...
@@ -95,97 +95,73 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
// the first inference result
const
int
chinese_ner_result_data
[]
=
{
30
,
45
,
41
,
48
,
17
,
26
,
48
,
39
,
38
,
16
,
25
};
void
TestChineseNERPrediction
(
bool
use_analysis
)
{
AnalysisConfig
cfg
;
cfg
.
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
.
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
.
use_gpu
=
false
;
cfg
.
device
=
0
;
cfg
.
specify_input_name
=
true
;
cfg
.
enable_ir_optim
=
true
;
std
::
vector
<
PaddleTensor
>
input_slots
,
outputs
;
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
Timer
timer
;
if
(
use_analysis
)
{
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
cfg
);
}
else
{
predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
cfg
);
}
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
->
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
specify_input_name
=
true
;
cfg
->
enable_ir_optim
=
true
;
}
if
(
FLAGS_test_all_data
)
{
LOG
(
INFO
)
<<
"test all data"
;
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
for
(
size_t
bid
=
0
;
bid
<
data
.
num_samples
/
FLAGS_batch_size
;
++
bid
)
{
PrepareInputs
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
input_slots_all
.
emplace_back
(
input_slots
);
}
LOG
(
INFO
)
<<
"total number of samples: "
<<
data
.
num_samples
;
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
return
;
}
// Prepare inputs.
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
PrepareInputs
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
timer
.
tic
();
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs
);
std
::
vector
<
PaddleTensor
>
input_slots
;
int
epoch
=
FLAGS_test_all_data
?
data
.
num_samples
/
FLAGS_batch_size
:
1
;
LOG
(
INFO
)
<<
"number of samples: "
<<
epoch
*
FLAGS_batch_size
;
for
(
int
bid
=
0
;
bid
<
epoch
;
++
bid
)
{
PrepareInputs
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
(
*
inputs
).
emplace_back
(
input_slots
);
}
PrintTime
(
FLAGS_batch_size
,
FLAGS_repeat
,
1
,
0
,
timer
.
toc
()
/
FLAGS_repeat
);
}
PADDLE_ENFORCE
(
outputs
.
size
(),
1UL
);
auto
&
out
=
outputs
[
0
];
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
PADDLE_ENFORCE_GT
(
size
,
0
);
int64_t
*
result
=
static_cast
<
int64_t
*>
(
out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
std
::
min
(
11UL
,
size
);
i
++
)
{
PADDLE_ENFORCE
(
result
[
i
],
chinese_ner_result_data
[
i
]);
}
// Easy for profiling independently.
TEST
(
Analyzer_Chinese_ner
,
profile
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
if
(
use_analysis
)
{
// run once for comparion as reference
auto
ref_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
cfg
);
std
::
vector
<
PaddleTensor
>
ref_outputs_slots
;
ref_predictor
->
Run
(
input_slots
,
&
ref_outputs_slots
);
CompareResult
(
ref_outputs_slots
,
outputs
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
AnalysisPredictor
*
analysis_predictor
=
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
auto
&
fuse_statis
=
analysis_predictor
->
analysis_argument
()
.
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
framework
::
ir
::
kFuseStatisAttr
);
for
(
auto
&
item
:
fuse_statis
)
{
LOG
(
INFO
)
<<
"fused "
<<
item
.
first
<<
" "
<<
item
.
second
;
}
int
num_ops
=
0
;
for
(
auto
&
node
:
analysis_predictor
->
analysis_argument
().
main_dfg
->
nodes
.
nodes
())
{
if
(
node
->
IsFunction
())
{
++
num_ops
;
}
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
// the first inference result
const
int
chinese_ner_result_data
[]
=
{
30
,
45
,
41
,
48
,
17
,
26
,
48
,
39
,
38
,
16
,
25
};
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
1UL
);
size_t
size
=
GetSize
(
outputs
[
0
]);
PADDLE_ENFORCE_GT
(
size
,
0
);
int64_t
*
result
=
static_cast
<
int64_t
*>
(
outputs
[
0
].
data
.
data
());
for
(
size_t
i
=
0
;
i
<
std
::
min
(
11UL
,
size
);
i
++
)
{
EXPECT_EQ
(
result
[
i
],
chinese_ner_result_data
[
i
]);
}
LOG
(
INFO
)
<<
"has num ops: "
<<
num_ops
;
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_gru_fuse"
),
2
);
EXPECT_EQ
(
num_ops
,
14
);
}
}
TEST
(
Analyzer_Chinese_ner
,
native
)
{
TestChineseNERPrediction
(
false
);
}
// Check the fuse status
TEST
(
Analyzer_Chinese_ner
,
fuse_statis
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
TEST
(
Analyzer_Chinese_ner
,
analysis
)
{
TestChineseNERPrediction
(
true
);
}
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
cfg
,
&
num_ops
);
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_gru_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_gru_fuse"
),
2
);
EXPECT_EQ
(
num_ops
,
14
);
}
// Compare result of NativeConfig and AnalysisConfig
TEST
(
Analyzer_Chinese_ner
,
compare
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
浏览文件 @
abf019f6
...
...
@@ -25,6 +25,7 @@ struct DataRecord {
std
::
vector
<
size_t
>
lod1
,
lod2
,
lod3
;
std
::
vector
<
std
::
vector
<
float
>>
rnn_link_data
,
rnn_week_datas
,
rnn_minute_datas
;
size_t
num_samples
;
// total number of samples
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
DataRecord
()
=
default
;
...
...
@@ -97,6 +98,7 @@ struct DataRecord {
week_data_all
.
push_back
(
std
::
move
(
week_data
));
minute_data_all
.
push_back
(
std
::
move
(
minute_data
));
}
num_samples
=
num_lines
;
}
};
void
PrepareInputs
(
std
::
vector
<
PaddleTensor
>
*
input_slots
,
DataRecord
*
data
,
...
...
@@ -147,89 +149,72 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
// Test with a really complicate model.
void
TestRNN1Prediction
(
bool
use_analysis
,
bool
activate_ir
,
int
num_threads
)
{
AnalysisConfig
config
;
config
.
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
config
.
param_file
=
FLAGS_infer_model
+
"/param"
;
config
.
use_gpu
=
false
;
config
.
device
=
0
;
config
.
specify_input_name
=
true
;
config
.
enable_ir_optim
=
activate_ir
;
PADDLE_ENFORCE
(
config
.
ir_mode
==
AnalysisConfig
::
IrPassMode
::
kExclude
);
// default
config
.
ir_passes
.
clear
();
// Do not exclude any pass.
int
batch_size
=
FLAGS_batch_size
;
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
->
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
specify_input_name
=
true
;
cfg
->
enable_ir_optim
=
true
;
cfg
->
ir_passes
.
clear
();
// Do not exclude any pass.
}
auto
base_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
std
::
vector
<
PaddleTensor
>
input_slots
;
DataRecord
data
(
FLAGS_infer_data
,
batch_size
);
// Prepare inputs.
PrepareInputs
(
&
input_slots
,
&
data
,
batch_size
);
std
::
vector
<
PaddleTensor
>
outputs
,
base_outputs
;
int
epoch
=
FLAGS_test_all_data
?
data
.
num_samples
/
FLAGS_batch_size
:
1
;
LOG
(
INFO
)
<<
"number of samples: "
<<
epoch
*
FLAGS_batch_size
;
for
(
int
bid
=
0
;
bid
<
epoch
;
++
bid
)
{
PrepareInputs
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
(
*
inputs
).
emplace_back
(
input_slots
);
}
}
base_predictor
->
Run
(
input_slots
,
&
base_outputs
);
// Easy for profiling independently.
TEST
(
Analyzer_rnn1
,
profile
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
input_slots_all
.
emplace_back
(
input_slots
);
if
(
num_threads
==
1
)
{
TestOneThreadPrediction
(
config
,
input_slots_all
,
&
outputs
);
CompareResult
(
outputs
,
base_outputs
);
}
else
{
// only return the output of first thread
TestMultiThreadPrediction
(
config
,
input_slots_all
,
&
outputs
,
num_threads
);
}
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
}
if
(
use_analysis
&&
activate_ir
)
{
AnalysisPredictor
*
analysis_predictor
=
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
auto
&
fuse_statis
=
analysis_predictor
->
analysis_argument
()
.
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
framework
::
ir
::
kFuseStatisAttr
);
for
(
auto
&
item
:
fuse_statis
)
{
LOG
(
INFO
)
<<
"fused "
<<
item
.
first
<<
" "
<<
item
.
second
;
}
// Check the fuse status
TEST
(
Analyzer_rnn1
,
fuse_statis
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
int
num_ops
=
0
;
for
(
auto
&
node
:
analysis_predictor
->
analysis_argument
().
main_dfg
->
nodes
.
nodes
())
{
if
(
node
->
IsFunction
())
{
++
num_ops
;
}
}
LOG
(
INFO
)
<<
"has num ops: "
<<
num_ops
;
int
num_ops
;
auto
fuse_statis
=
GetFuseStatis
(
cfg
,
&
num_ops
);
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_nobias_lstm_fuse"
),
2
);
// bi-directional LSTM
EXPECT_EQ
(
fuse_statis
.
at
(
"seq_concat_fc_fuse"
),
1
);
EXPECT_EQ
(
num_ops
,
13
);
// After graph optimization, only 13 operators exists.
}
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
1
);
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_nobias_lstm_fuse"
),
2
);
// bi-directional LSTM
EXPECT_EQ
(
fuse_statis
.
at
(
"seq_concat_fc_fuse"
),
1
);
EXPECT_EQ
(
num_ops
,
13
);
// After graph optimization, only 13 operators exists.
}
// Compare result of NativeConfig and AnalysisConfig
TEST
(
Analyzer_rnn1
,
compare
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
}
// Inference with analysis and IR, easy for profiling independently.
TEST
(
Analyzer
,
rnn1
)
{
TestRNN1Prediction
(
true
,
true
,
FLAGS_num_threads
);
}
// Test Multi-Thread.
TEST
(
Analyzer_rnn1
,
multi_thread
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
// Other unit-tests of RNN1, test different options of use_analysis,
// activate_ir and multi-threads.
TEST
(
Analyzer
,
RNN_tests
)
{
int
num_threads
[
2
]
=
{
1
,
4
};
for
(
auto
i
:
num_threads
)
{
// Directly infer with the original model.
TestRNN1Prediction
(
false
,
false
,
i
);
// Inference with the original model with the analysis turned on, the
// analysis module will transform the program to a data flow graph.
TestRNN1Prediction
(
true
,
false
,
i
);
// Inference with analysis and IR. The IR module will fuse some large
// kernels.
TestRNN1Prediction
(
true
,
true
,
i
);
}
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
4
/* num_threads */
);
}
}
// namespace inference
...
...
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
浏览文件 @
abf019f6
...
...
@@ -12,24 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <google/protobuf/text_format.h>
#include <gtest/gtest.h>
#include <thread> // NOLINT
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
DEFINE_string
(
infer_model
,
""
,
"model path"
);
DEFINE_string
(
infer_data
,
""
,
"data path"
);
DEFINE_int32
(
batch_size
,
1
,
"batch size."
);
DEFINE_int32
(
repeat
,
1
,
"Running the inference program repeat times."
);
DEFINE_int32
(
num_threads
,
1
,
"Running the inference program in multi-threads."
);
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -41,6 +24,7 @@ struct DataRecord {
std
::
vector
<
size_t
>
lod
;
std
::
vector
<
std
::
vector
<
float
>>
rnn_link_data
;
std
::
vector
<
float
>
result_data
;
size_t
num_samples
;
// total number of samples
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
DataRecord
()
=
default
;
...
...
@@ -100,6 +84,7 @@ struct DataRecord {
result_data
.
insert
(
result_data
.
end
(),
tmp
.
begin
(),
tmp
.
end
());
}
}
num_samples
=
num_lines
/
2
;
}
};
void
PrepareInputs
(
std
::
vector
<
PaddleTensor
>
*
input_slots
,
DataRecord
*
data
,
...
...
@@ -118,64 +103,58 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
input_slots
->
assign
({
feed_tensor
});
}
void
CompareResult
(
const
std
::
vector
<
PaddleTensor
>
&
outputs
,
const
std
::
vector
<
float
>
&
base_result
)
{
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
auto
&
out
=
outputs
[
i
];
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
data
=
static_cast
<
float
*>
(
out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_NEAR
(
data
[
i
],
base_result
[
i
],
1e-3
);
}
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
->
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
specify_input_name
=
true
;
cfg
->
enable_ir_optim
=
true
;
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
std
::
vector
<
PaddleTensor
>
input_slots
;
int
epoch
=
FLAGS_test_all_data
?
data
.
num_samples
/
FLAGS_batch_size
:
1
;
LOG
(
INFO
)
<<
"number of samples: "
<<
epoch
*
FLAGS_batch_size
;
for
(
int
bid
=
0
;
bid
<
epoch
;
++
bid
)
{
PrepareInputs
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
(
*
inputs
).
emplace_back
(
input_slots
);
}
}
// Test with a really complicate model.
void
TestRNN2Prediction
()
{
AnalysisConfig
config
;
config
.
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
config
.
param_file
=
FLAGS_infer_model
+
"/param"
;
config
.
use_gpu
=
false
;
config
.
device
=
0
;
config
.
specify_input_name
=
true
;
config
.
enable_ir_optim
=
true
;
PADDLE_ENFORCE
(
config
.
ir_mode
==
AnalysisConfig
::
IrPassMode
::
kExclude
);
// default
int
batch_size
=
FLAGS_batch_size
;
int
num_times
=
FLAGS_repeat
;
// Easy for profiling independently.
TEST
(
Analyzer_rnn2
,
profile
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
auto
base_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
std
::
vector
<
PaddleTensor
>
input_slots
;
DataRecord
data
(
FLAGS_infer_data
,
batch_size
);
PrepareInputs
(
&
input_slots
,
&
data
,
batch_size
);
std
::
vector
<
PaddleTensor
>
outputs
,
base_outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
Timer
timer1
;
timer1
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
base_predictor
->
Run
(
input_slots
,
&
base_outputs
);
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
// the first inference result
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
size_t
size
=
GetSize
(
outputs
[
0
]);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
result
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_NEAR
(
result
[
i
],
data
.
result_data
[
i
],
1e-3
);
}
}
PrintTime
(
batch_size
,
num_times
,
1
,
0
,
timer1
.
toc
()
/
num_times
);
}
Timer
timer2
;
timer2
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs
);
}
PrintTime
(
batch_size
,
num_times
,
1
,
0
,
timer2
.
toc
()
/
num_times
);
// Compare result of NativeConfig and AnalysisConfig
TEST
(
Analyzer_rnn2
,
compare
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
CompareResult
(
base_outputs
,
data
.
result_data
);
CompareResult
(
outputs
,
data
.
result_data
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
}
TEST
(
Analyzer
,
rnn2
)
{
TestRNN2Prediction
();
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
浏览文件 @
abf019f6
...
...
@@ -46,54 +46,63 @@ struct DataReader {
std
::
unique_ptr
<
std
::
ifstream
>
file
;
};
void
Main
(
int
batch_size
)
{
// shape --
// Create Predictor --
AnalysisConfig
config
;
c
onfig
.
model_dir
=
FLAGS_infer_model
;
c
onfig
.
use_gpu
=
fals
e
;
config
.
enable_ir_optim
=
true
;
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
model_dir
=
FLAGS_infer_model
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
c
fg
->
specify_input_name
=
true
;
c
fg
->
enable_ir_optim
=
tru
e
;
}
std
::
vector
<
PaddleTensor
>
input_slots
,
output_slots
;
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
std
::
vector
<
PaddleTensor
>
input_slots
;
DataReader
reader
(
FLAGS_infer_data
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
if
(
FLAGS_test_all_data
)
{
LOG
(
INFO
)
<<
"test all data"
;
int
num_batches
=
0
;
while
(
reader
.
NextBatch
(
&
input_slots
,
FLAGS_batch_size
))
{
input_slots_all
.
emplace_back
(
input_slots
);
++
num_batches
;
}
LOG
(
INFO
)
<<
"total number of samples: "
<<
num_batches
*
FLAGS_batch_size
;
TestPrediction
(
config
,
input_slots_all
,
&
output_slots
,
FLAGS_num_threads
);
return
;
int
num_batches
=
0
;
while
(
reader
.
NextBatch
(
&
input_slots
,
FLAGS_batch_size
))
{
(
*
inputs
).
emplace_back
(
input_slots
);
++
num_batches
;
if
(
!
FLAGS_test_all_data
)
return
;
}
LOG
(
INFO
)
<<
"total number of samples: "
<<
num_batches
*
FLAGS_batch_size
;
}
// one batch starts
// data --
reader
.
NextBatch
(
&
input_slots
,
FLAGS_batch_size
)
;
input_slots_all
.
emplace_back
(
input_slots
);
TestPrediction
(
config
,
input_slots_all
,
&
output_slots
,
FLAGS_num_threads
)
;
// Easy for profiling independently.
TEST
(
Analyzer_Text_Classification
,
profile
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
// Get output
LOG
(
INFO
)
<<
"get outputs "
<<
output_slots
.
size
();
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
for
(
auto
&
output
:
output_slots
)
{
LOG
(
INFO
)
<<
"output.shape: "
<<
to_string
(
output
.
shape
);
// no lod ?
CHECK_EQ
(
output
.
lod
.
size
(),
0UL
);
LOG
(
INFO
)
<<
"output.dtype: "
<<
output
.
dtype
;
std
::
stringstream
ss
;
for
(
int
i
=
0
;
i
<
5
;
i
++
)
{
ss
<<
static_cast
<
float
*>
(
output
.
data
.
data
())[
i
]
<<
" "
;
if
(
FLAGS_num_threads
==
1
)
{
// Get output
LOG
(
INFO
)
<<
"get outputs "
<<
outputs
.
size
();
for
(
auto
&
output
:
outputs
)
{
LOG
(
INFO
)
<<
"output.shape: "
<<
to_string
(
output
.
shape
);
// no lod ?
CHECK_EQ
(
output
.
lod
.
size
(),
0UL
);
LOG
(
INFO
)
<<
"output.dtype: "
<<
output
.
dtype
;
std
::
stringstream
ss
;
for
(
int
i
=
0
;
i
<
5
;
i
++
)
{
ss
<<
static_cast
<
float
*>
(
output
.
data
.
data
())[
i
]
<<
" "
;
}
LOG
(
INFO
)
<<
"output.data summary: "
<<
ss
.
str
();
// one batch ends
}
LOG
(
INFO
)
<<
"output.data summary: "
<<
ss
.
str
();
// one batch ends
}
}
TEST
(
text_classification
,
basic
)
{
Main
(
FLAGS_batch_size
);
}
// Compare result of NativeConfig and AnalysisConfig
TEST
(
Analyzer_Text_Classification
,
compare
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
浏览文件 @
abf019f6
...
...
@@ -49,84 +49,83 @@ Record ProcessALine(const std::string &line) {
return
record
;
}
/*
* Use the native and analysis fluid engine to inference the demo.
* ocr, mobilenet and se_resnext50
*/
void
TestVisualPrediction
(
bool
use_mkldnn
)
{
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
AnalysisConfig
cfg
;
cfg
.
param_file
=
FLAGS_infer_model
+
"/__params__"
;
cfg
.
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
.
use_gpu
=
false
;
cfg
.
_use_mkldnn
=
use_mkldnn
;
cfg
.
device
=
0
;
cfg
.
enable_ir_optim
=
true
;
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
param_file
=
FLAGS_infer_model
+
"/__params__"
;
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
enable_ir_optim
=
true
;
cfg
->
specify_input_name
=
true
;
// TODO(TJ): fix fusion gru
cfg
.
ir_passes
.
push_back
(
"fc_gru_fuse_pass"
);
cfg
->
ir_passes
.
push_back
(
"fc_gru_fuse_pass"
);
#ifdef PADDLE_WITH_MKLDNN
cfg
->
_use_mkldnn
=
true
;
// disable mkldnn fuse since it should have some bugs
cfg
.
ir_passes
.
push_back
(
"conv_relu_mkldnn_fuse_pass"
);
cfg
->
ir_passes
.
push_back
(
"conv_relu_mkldnn_fuse_pass"
);
#endif
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
cfg
);
}
// Only have single batch of data.
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
PADDLE_ENFORCE_EQ
(
FLAGS_test_all_data
,
0
,
"Only have single batch of data."
);
std
::
string
line
;
std
::
ifstream
file
(
FLAGS_infer_data
);
std
::
getline
(
file
,
line
);
auto
record
=
ProcessALine
(
line
);
file
.
close
();
// Inference.
PaddleTensor
input
;
input
.
shape
=
record
.
shape
;
input
.
data
=
PaddleBuf
(
record
.
data
.
data
(),
record
.
data
.
size
()
*
sizeof
(
float
));
input
.
dtype
=
PaddleDType
::
FLOAT32
;
size_t
input_size
=
record
.
data
.
size
()
*
sizeof
(
float
);
input
.
data
.
Resize
(
input_size
);
memcpy
(
input
.
data
.
data
(),
record
.
data
.
data
(),
input_size
);
std
::
vector
<
PaddleTensor
>
input_slots
;
input_slots
.
assign
({
input
});
(
*
inputs
).
emplace_back
(
input_slots
);
}
std
::
vector
<
PaddleTensor
>
outputs_slots
;
Timer
timer
;
timer
.
tic
();
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
predictor
->
Run
({
input
},
&
outputs_slots
);
}
PrintTime
(
/*batch size*/
1
,
FLAGS_repeat
,
/*num threads*/
1
,
/*thread id*/
0
,
timer
.
toc
()
/
FLAGS_repeat
);
VLOG
(
3
)
<<
"output.size "
<<
outputs_slots
.
size
();
// run native as reference
auto
ref_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
cfg
);
std
::
vector
<
PaddleTensor
>
ref_outputs_slots
;
ref_predictor
->
Run
({
input
},
&
ref_outputs_slots
);
CompareResult
(
outputs_slots
,
ref_outputs_slots
);
// print what are fused
AnalysisPredictor
*
analysis_predictor
=
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
auto
&
fuse_statis
=
analysis_predictor
->
analysis_argument
()
.
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
framework
::
ir
::
kFuseStatisAttr
);
for
(
auto
&
item
:
fuse_statis
)
{
LOG
(
INFO
)
<<
"fused "
<<
item
.
first
<<
" "
<<
item
.
second
;
}
int
num_ops
=
0
;
for
(
auto
&
node
:
analysis_predictor
->
analysis_argument
().
main_dfg
->
nodes
.
nodes
())
{
if
(
node
->
IsFunction
())
{
++
num_ops
;
// Easy for profiling independently.
// ocr, mobilenet and se_resnext50
TEST
(
Analyzer_vis
,
profile
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
const
float
ocr_result_data
[]
=
{
5.273636460856323538e-08
,
3.296741795111302054e-07
,
1.873261190610264748e-08
,
3.403730275408634043e-08
,
3.383312474625199684e-08
};
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
1UL
);
size_t
size
=
GetSize
(
outputs
[
0
]);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
result
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
());
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
size
);
i
++
)
{
EXPECT_NEAR
(
result
[
i
],
ocr_result_data
[
i
],
1e-3
);
}
}
LOG
(
INFO
)
<<
"has num ops: "
<<
num_ops
;
}
TEST
(
Analyzer_vis
,
analysis
)
{
TestVisualPrediction
(
/*use_mkldnn*/
false
);
}
#ifdef PADDLE_WITH_MKLDNN
TEST
(
Analyzer_vis
,
analysis_mkldnn
)
{
TestVisualPrediction
(
/*use_mkldnn*/
true
);
// Check the fuse status
TEST
(
Analyzer_vis
,
fuse_statis
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
int
num_ops
;
GetFuseStatis
(
cfg
,
&
num_ops
);
}
// Compare result of NativeConfig and AnalysisConfig
TEST
(
Analyzer_vis
,
compare
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
CompareNativeAndAnalysis
(
cfg
,
input_slots_all
);
}
#endif
}
// namespace analysis
}
// namespace inference
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
abf019f6
...
...
@@ -15,6 +15,7 @@
#pragma once
#include <gtest/gtest.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
...
...
@@ -28,17 +29,18 @@
DEFINE_string
(
infer_model
,
""
,
"model path"
);
DEFINE_string
(
infer_data
,
""
,
"data file"
);
DEFINE_int32
(
batch_size
,
1
,
"batch size."
);
DEFINE_int32
(
burning
,
0
,
"Burning before repeat."
);
DEFINE_int32
(
repeat
,
1
,
"Running the inference program repeat times."
);
DEFINE_bool
(
test_all_data
,
false
,
"Test the all dataset in data file."
);
DEFINE_int32
(
num_threads
,
1
,
"Running the inference program in multi-threads."
);
DEFINE_bool
(
use_analysis
,
true
,
"Running the inference program in analysis mode."
);
namespace
paddle
{
namespace
inference
{
void
CompareResult
(
const
std
::
vector
<
PaddleTensor
>
&
outputs
,
const
std
::
vector
<
PaddleTensor
>
&
ref_outputs
)
{
EXPECT_GT
(
outputs
.
size
(),
0
);
EXPECT_GT
(
outputs
.
size
(),
0
UL
);
EXPECT_EQ
(
outputs
.
size
(),
ref_outputs
.
size
());
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
auto
&
out
=
outputs
[
i
];
...
...
@@ -72,14 +74,50 @@ void CompareResult(const std::vector<PaddleTensor> &outputs,
}
}
std
::
unique_ptr
<
PaddlePredictor
>
GetPrediction
(
AnalysisConfig
config
,
bool
use_analysis
=
true
)
{
if
(
use_analysis
)
{
return
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
}
else
{
return
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
}
}
size_t
GetSize
(
const
PaddleTensor
&
out
)
{
return
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
}
std
::
unordered_map
<
std
::
string
,
int
>
GetFuseStatis
(
AnalysisConfig
config
,
int
*
num_ops
)
{
auto
predictor
=
GetPrediction
(
config
);
AnalysisPredictor
*
analysis_predictor
=
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
auto
&
fuse_statis
=
analysis_predictor
->
analysis_argument
()
.
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
framework
::
ir
::
kFuseStatisAttr
);
for
(
auto
&
item
:
fuse_statis
)
{
LOG
(
INFO
)
<<
"fused "
<<
item
.
first
<<
" "
<<
item
.
second
;
}
int
num
=
0
;
for
(
auto
&
node
:
analysis_predictor
->
analysis_argument
().
main_dfg
->
nodes
.
nodes
())
{
if
(
node
->
IsFunction
())
{
++
num
;
}
}
*
num_ops
=
num
;
return
fuse_statis
;
}
void
TestOneThreadPrediction
(
AnalysisConfig
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
)
{
std
::
vector
<
PaddleTensor
>
*
outputs
,
bool
use_analysis
=
true
)
{
int
batch_size
=
FLAGS_batch_size
;
int
num_times
=
FLAGS_repeat
;
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
auto
predictor
=
GetPrediction
(
config
,
use_analysis
);
Timer
timer
;
timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
...
...
@@ -93,7 +131,8 @@ void TestOneThreadPrediction(
void
TestMultiThreadPrediction
(
AnalysisConfig
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
)
{
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
,
bool
use_analysis
=
true
)
{
int
batch_size
=
FLAGS_batch_size
;
int
num_times
=
FLAGS_repeat
;
std
::
vector
<
std
::
thread
>
threads
;
...
...
@@ -101,9 +140,7 @@ void TestMultiThreadPrediction(
// TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled
// because AttentionLSTM's hard code nodeid will be damanged.
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
predictors
.
emplace_back
(
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
));
predictors
.
emplace_back
(
GetPrediction
(
config
,
use_analysis
));
}
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
threads
.
emplace_back
([
&
,
tid
]()
{
...
...
@@ -129,13 +166,25 @@ void TestMultiThreadPrediction(
void
TestPrediction
(
AnalysisConfig
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
)
{
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
,
bool
use_analysis
=
FLAGS_use_analysis
)
{
LOG
(
INFO
)
<<
"use_analysis: "
<<
use_analysis
;
if
(
num_threads
==
1
)
{
TestOneThreadPrediction
(
config
,
inputs
,
outputs
);
TestOneThreadPrediction
(
config
,
inputs
,
outputs
,
use_analysis
);
}
else
{
TestMultiThreadPrediction
(
config
,
inputs
,
outputs
,
num_threads
);
TestMultiThreadPrediction
(
config
,
inputs
,
outputs
,
num_threads
,
use_analysis
);
}
}
void
CompareNativeAndAnalysis
(
AnalysisConfig
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
)
{
std
::
vector
<
PaddleTensor
>
native_outputs
,
analysis_outputs
;
TestOneThreadPrediction
(
config
,
inputs
,
&
native_outputs
,
false
);
TestOneThreadPrediction
(
config
,
inputs
,
&
analysis_outputs
,
true
);
CompareResult
(
analysis_outputs
,
native_outputs
);
}
}
// namespace inference
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录