Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
d4a5326a
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
d4a5326a
编写于
9月 14, 2018
作者:
T
Tao Luo
提交者:
GitHub
9月 14, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #13387 from luotao1/nlp_multi_thread
add multi-thread for nlp unit-tests
上级
24e61d30
6b9ccd97
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
207 addition
and
235 deletion
+207
-235
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+7
-1
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+2
-4
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
+19
-51
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
+18
-55
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+7
-77
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
...nference/tests/api/analyzer_text_classification_tester.cc
+28
-47
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+126
-0
未找到文件。
paddle/fluid/inference/api/helper.h
浏览文件 @
d4a5326a
...
...
@@ -123,10 +123,16 @@ std::string DescribeTensor(const PaddleTensor &tensor) {
}
void
PrintTime
(
int
batch_size
,
int
repeat
,
int
num_threads
,
int
tid
,
double
latency
)
{
double
latency
,
int
epoch
=
1
)
{
LOG
(
INFO
)
<<
"====== batch_size: "
<<
batch_size
<<
", repeat: "
<<
repeat
<<
", threads: "
<<
num_threads
<<
", thread id: "
<<
tid
<<
", latency: "
<<
latency
<<
"ms ======"
;
if
(
epoch
>
1
)
{
int
samples
=
batch_size
*
epoch
;
LOG
(
INFO
)
<<
"====== sample number: "
<<
samples
<<
", average latency of each sample: "
<<
latency
/
samples
<<
"ms ======"
;
}
}
}
// namespace inference
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
d4a5326a
...
...
@@ -51,9 +51,7 @@ inference_analysis_test(test_analyzer_lac SRCS analyzer_lac_tester.cc
# text_classification
set
(
TEXT_CLASSIFICATION_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/text_classification"
)
download_model_and_data
(
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
"text-classification-Senta.tar.gz"
"text_classification_data.txt.tar.gz"
)
inference_analysis_test
(
test_text_classification SRCS analyzer_text_classification_tester.cc
inference_analysis_test
(
test_
analyzer_
text_classification SRCS analyzer_text_classification_tester.cc
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
ARGS --infer_model=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/text-classification-Senta
--infer_data=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/data.txt
--topn=1
# Just run top 1 batch.
)
--infer_data=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/data.txt
)
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
浏览文件 @
d4a5326a
...
...
@@ -12,21 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE_string
(
infer_model
,
""
,
"model path for LAC"
);
DEFINE_string
(
infer_data
,
""
,
"data file for LAC"
);
DEFINE_int32
(
batch_size
,
1
,
"batch size."
);
DEFINE_int32
(
burning
,
0
,
"Burning before repeat."
);
DEFINE_int32
(
repeat
,
1
,
"Running the inference program repeat times."
);
DEFINE_bool
(
test_all_data
,
false
,
"Test the all dataset in data file."
);
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -126,46 +112,37 @@ void TestLACPrediction(const std::string &model_path,
const
std
::
string
&
data_file
,
const
int
batch_size
,
const
int
repeat
,
bool
test_all_data
,
bool
use_analysis
=
false
)
{
NativeConfig
config
;
config
.
model_dir
=
model_path
;
config
.
use_gpu
=
false
;
config
.
device
=
0
;
config
.
specify_input_name
=
true
;
AnalysisConfig
cfg
;
cfg
.
model_dir
=
model_path
;
cfg
.
use_gpu
=
false
;
cfg
.
device
=
0
;
cfg
.
specify_input_name
=
true
;
cfg
.
enable_ir_optim
=
true
;
std
::
vector
<
PaddleTensor
>
input_slots
,
outputs_slots
;
DataRecord
data
(
data_file
,
batch_size
);
GetOneBatch
(
&
input_slots
,
&
data
,
batch_size
);
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
if
(
use_analysis
)
{
AnalysisConfig
cfg
;
cfg
.
model_dir
=
model_path
;
cfg
.
use_gpu
=
false
;
cfg
.
device
=
0
;
cfg
.
specify_input_name
=
true
;
cfg
.
enable_ir_optim
=
true
;
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
cfg
);
}
else
{
predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
c
onfi
g
);
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
c
f
g
);
}
for
(
int
i
=
0
;
i
<
FLAGS_burning
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs_slots
);
}
Timer
timer
;
if
(
test_all_data
)
{
double
sum
=
0
;
LOG
(
INFO
)
<<
"Total number of samples: "
<<
data
.
datasets
.
size
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
for
(
size_t
bid
=
0
;
bid
<
data
.
batched_datas
.
size
();
++
bid
)
{
GetOneBatch
(
&
input_slots
,
&
data
,
batch_size
);
timer
.
tic
();
predictor
->
Run
(
input_slots
,
&
outputs_slots
);
sum
+=
timer
.
toc
();
}
if
(
FLAGS_test_all_data
)
{
LOG
(
INFO
)
<<
"test all data"
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
for
(
size_t
bid
=
0
;
bid
<
data
.
batched_datas
.
size
();
++
bid
)
{
GetOneBatch
(
&
input_slots
,
&
data
,
batch_size
);
input_slots_all
.
emplace_back
(
input_slots
);
}
PrintTime
(
batch_size
,
repeat
,
1
,
0
,
sum
/
repeat
);
LOG
(
INFO
)
<<
"Average latency of each sample: "
<<
sum
/
repeat
/
data
.
datasets
.
size
()
<<
" ms"
;
LOG
(
INFO
)
<<
"total number of samples: "
<<
data
.
datasets
.
size
();
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs_slots
,
FLAGS_num_threads
);
return
;
}
timer
.
tic
();
...
...
@@ -190,19 +167,10 @@ void TestLACPrediction(const std::string &model_path,
if
(
use_analysis
)
{
// run once for comparion as reference
auto
ref_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
c
onfi
g
);
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
c
f
g
);
std
::
vector
<
PaddleTensor
>
ref_outputs_slots
;
ref_predictor
->
Run
(
input_slots
,
&
ref_outputs_slots
);
EXPECT_EQ
(
ref_outputs_slots
.
size
(),
outputs_slots
.
size
());
auto
&
ref_out
=
ref_outputs_slots
[
0
];
size_t
ref_size
=
std
::
accumulate
(
ref_out
.
shape
.
begin
(),
ref_out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
EXPECT_EQ
(
size
,
ref_size
);
int64_t
*
pdata_ref
=
static_cast
<
int64_t
*>
(
ref_out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
EXPECT_EQ
(
pdata_ref
[
i
],
pdata
[
i
]);
}
CompareResult
(
ref_outputs_slots
,
outputs_slots
);
AnalysisPredictor
*
analysis_predictor
=
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
...
...
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
浏览文件 @
d4a5326a
...
...
@@ -12,20 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE_string
(
infer_model
,
""
,
"model path"
);
DEFINE_string
(
infer_data
,
""
,
"data path"
);
DEFINE_int32
(
batch_size
,
10
,
"batch size."
);
DEFINE_int32
(
repeat
,
1
,
"Running the inference program repeat times."
);
DEFINE_bool
(
test_all_data
,
false
,
"Test the all dataset in data file."
);
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -113,50 +100,35 @@ const int chinese_ner_result_data[] = {30, 45, 41, 48, 17, 26,
48
,
39
,
38
,
16
,
25
};
void
TestChineseNERPrediction
(
bool
use_analysis
)
{
NativeConfig
config
;
config
.
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
config
.
param_file
=
FLAGS_infer_model
+
"/param"
;
config
.
use_gpu
=
false
;
config
.
device
=
0
;
config
.
specify_input_name
=
true
;
AnalysisConfig
cfg
;
cfg
.
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
.
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
.
use_gpu
=
false
;
cfg
.
device
=
0
;
cfg
.
specify_input_name
=
true
;
cfg
.
enable_ir_optim
=
true
;
std
::
vector
<
PaddleTensor
>
input_slots
,
outputs
;
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
Timer
timer
;
if
(
use_analysis
)
{
AnalysisConfig
cfg
;
cfg
.
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
.
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
.
use_gpu
=
false
;
cfg
.
device
=
0
;
cfg
.
specify_input_name
=
true
;
cfg
.
enable_ir_optim
=
true
;
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
cfg
);
}
else
{
predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
c
onfi
g
);
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
c
f
g
);
}
if
(
FLAGS_test_all_data
)
{
LOG
(
INFO
)
<<
"test all data"
;
double
sum
=
0
;
size_t
num_samples
;
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
// Just one batch, the num_samples remains the same.
num_samples
=
data
.
num_samples
;
for
(
size_t
bid
=
0
;
bid
<
num_samples
/
FLAGS_batch_size
;
++
bid
)
{
PrepareInputs
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
timer
.
tic
();
predictor
->
Run
(
input_slots
,
&
outputs
);
sum
+=
timer
.
toc
();
}
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
for
(
size_t
bid
=
0
;
bid
<
data
.
num_samples
/
FLAGS_batch_size
;
++
bid
)
{
PrepareInputs
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
input_slots_all
.
emplace_back
(
input_slots
);
}
LOG
(
INFO
)
<<
"total number of samples: "
<<
num_samples
;
PrintTime
(
FLAGS_batch_size
,
FLAGS_repeat
,
1
,
0
,
sum
/
FLAGS_repeat
);
LOG
(
INFO
)
<<
"average latency of each sample: "
<<
sum
/
FLAGS_repeat
/
num_samples
;
LOG
(
INFO
)
<<
"total number of samples: "
<<
data
.
num_samples
;
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
return
;
}
// Prepare inputs.
...
...
@@ -182,19 +154,10 @@ void TestChineseNERPrediction(bool use_analysis) {
if
(
use_analysis
)
{
// run once for comparion as reference
auto
ref_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
c
onfi
g
);
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
c
f
g
);
std
::
vector
<
PaddleTensor
>
ref_outputs_slots
;
ref_predictor
->
Run
(
input_slots
,
&
ref_outputs_slots
);
EXPECT_EQ
(
ref_outputs_slots
.
size
(),
outputs
.
size
());
auto
&
ref_out
=
ref_outputs_slots
[
0
];
size_t
ref_size
=
std
::
accumulate
(
ref_out
.
shape
.
begin
(),
ref_out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
EXPECT_EQ
(
size
,
ref_size
);
int64_t
*
pdata_ref
=
static_cast
<
int64_t
*>
(
ref_out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
EXPECT_EQ
(
pdata_ref
[
i
],
result
[
i
]);
}
CompareResult
(
ref_outputs_slots
,
outputs
);
AnalysisPredictor
*
analysis_predictor
=
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
...
...
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
浏览文件 @
d4a5326a
...
...
@@ -12,24 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <google/protobuf/text_format.h>
#include <gtest/gtest.h>
#include <thread> // NOLINT
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
DEFINE_string
(
infer_model
,
""
,
"model path"
);
DEFINE_string
(
infer_data
,
""
,
"data path"
);
DEFINE_int32
(
batch_size
,
10
,
"batch size."
);
DEFINE_int32
(
repeat
,
1
,
"Running the inference program repeat times."
);
DEFINE_int32
(
num_threads
,
1
,
"Running the inference program in multi-threads."
);
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -164,26 +147,6 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
void
CompareResult
(
const
std
::
vector
<
PaddleTensor
>
&
outputs
,
const
std
::
vector
<
PaddleTensor
>
&
base_outputs
)
{
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
base_outputs
.
size
());
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
auto
&
out
=
outputs
[
i
];
auto
&
base_out
=
base_outputs
[
i
];
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
size_t
size1
=
std
::
accumulate
(
base_out
.
shape
.
begin
(),
base_out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
PADDLE_ENFORCE_EQ
(
size
,
size1
);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
data
=
static_cast
<
float
*>
(
out
.
data
.
data
());
float
*
base_data
=
static_cast
<
float
*>
(
base_out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_NEAR
(
data
[
i
],
base_data
[
i
],
1e-3
);
}
}
}
// Test with a really complicate model.
void
TestRNN1Prediction
(
bool
use_analysis
,
bool
activate_ir
,
int
num_threads
)
{
AnalysisConfig
config
;
...
...
@@ -198,7 +161,6 @@ void TestRNN1Prediction(bool use_analysis, bool activate_ir, int num_threads) {
config
.
ir_passes
.
clear
();
// Do not exclude any pass.
int
batch_size
=
FLAGS_batch_size
;
int
num_times
=
FLAGS_repeat
;
auto
base_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
...
...
@@ -213,45 +175,14 @@ void TestRNN1Prediction(bool use_analysis, bool activate_ir, int num_threads) {
base_predictor
->
Run
(
input_slots
,
&
base_outputs
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
input_slots_all
.
emplace_back
(
input_slots
);
if
(
num_threads
==
1
)
{
// Prepare inputs.
Timer
timer
;
timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs
);
}
PrintTime
(
batch_size
,
num_times
,
1
,
0
,
timer
.
toc
()
/
num_times
);
TestOneThreadPrediction
(
config
,
input_slots_all
,
&
outputs
);
CompareResult
(
outputs
,
base_outputs
);
}
else
{
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
// TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled
// because AttentionLSTM's hard code nodeid will be damanged.
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
predictors
.
emplace_back
(
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
));
}
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
threads
.
emplace_back
([
&
,
tid
]()
{
// Each thread should have local input_slots and outputs.
std
::
vector
<
PaddleTensor
>
input_slots
;
DataRecord
data
(
FLAGS_infer_data
,
batch_size
);
PrepareInputs
(
&
input_slots
,
&
data
,
batch_size
);
std
::
vector
<
PaddleTensor
>
outputs
;
Timer
timer
;
timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
predictors
[
tid
]
->
Run
(
input_slots
,
&
outputs
);
}
PrintTime
(
batch_size
,
num_times
,
num_threads
,
tid
,
timer
.
toc
()
/
num_times
);
CompareResult
(
outputs
,
base_outputs
);
});
}
for
(
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
threads
[
i
].
join
();
}
// only return the output of first thread
TestMultiThreadPrediction
(
config
,
input_slots_all
,
&
outputs
,
num_threads
);
}
if
(
use_analysis
&&
activate_ir
)
{
...
...
@@ -293,8 +224,7 @@ TEST(Analyzer, RNN_tests) {
// Directly infer with the original model.
TestRNN1Prediction
(
false
,
false
,
i
);
// Inference with the original model with the analysis turned on, the
// analysis
// module will transform the program to a data flow graph.
// analysis module will transform the program to a data flow graph.
TestRNN1Prediction
(
true
,
false
,
i
);
// Inference with analysis and IR. The IR module will fuse some large
// kernels.
...
...
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
浏览文件 @
d4a5326a
...
...
@@ -12,23 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <gflags/gflags.h>
#include <glog/logging.h> // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files.
#include <gtest/gtest.h>
#include <fstream>
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/api/timer.h"
DEFINE_string
(
infer_model
,
""
,
"Directory of the inference model."
);
DEFINE_string
(
infer_data
,
""
,
"Path of the dataset."
);
DEFINE_int32
(
batch_size
,
1
,
"batch size."
);
DEFINE_int32
(
repeat
,
1
,
"How many times to repeat run."
);
DEFINE_int32
(
topn
,
-
1
,
"Run top n batches of data to save time"
);
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -37,24 +21,25 @@ struct DataReader {
explicit
DataReader
(
const
std
::
string
&
path
)
:
file
(
new
std
::
ifstream
(
path
))
{}
bool
NextBatch
(
PaddleTensor
*
tensor
,
int
batch_size
)
{
bool
NextBatch
(
std
::
vector
<
PaddleTensor
>
*
input
,
int
batch_size
)
{
PADDLE_ENFORCE_EQ
(
batch_size
,
1
);
std
::
string
line
;
tensor
->
lod
.
clear
();
tensor
->
lod
.
emplace_back
(
std
::
vector
<
size_t
>
({
0
}));
PaddleTensor
tensor
;
tensor
.
dtype
=
PaddleDType
::
INT64
;
tensor
.
lod
.
emplace_back
(
std
::
vector
<
size_t
>
({
0
}));
std
::
vector
<
int64_t
>
data
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
if
(
!
std
::
getline
(
*
file
,
line
))
return
false
;
inference
::
split_to_int64
(
line
,
' '
,
&
data
);
}
tensor
->
lod
.
front
().
push_back
(
data
.
size
());
tensor
.
lod
.
front
().
push_back
(
data
.
size
());
tensor
->
data
.
Resize
(
data
.
size
()
*
sizeof
(
int64_t
));
memcpy
(
tensor
->
data
.
data
(),
data
.
data
(),
data
.
size
()
*
sizeof
(
int64_t
));
tensor
->
shape
.
clear
(
);
tensor
->
shape
.
push_back
(
data
.
size
()
);
tensor
->
shape
.
push_back
(
1
);
tensor
.
data
.
Resize
(
data
.
size
()
*
sizeof
(
int64_t
));
memcpy
(
tensor
.
data
.
data
(),
data
.
data
(),
data
.
size
()
*
sizeof
(
int64_t
));
tensor
.
shape
.
push_back
(
data
.
size
()
);
tensor
.
shape
.
push_back
(
1
);
input
->
assign
({
tensor
}
);
return
true
;
}
...
...
@@ -68,32 +53,28 @@ void Main(int batch_size) {
config
.
model_dir
=
FLAGS_infer_model
;
config
.
use_gpu
=
false
;
config
.
enable_ir_optim
=
true
;
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
std
::
vector
<
PaddleTensor
>
input_slots
(
1
);
// one batch starts
// data --
auto
&
input
=
input_slots
[
0
];
input
.
dtype
=
PaddleDType
::
INT64
;
inference
::
Timer
timer
;
double
sum
=
0
;
std
::
vector
<
PaddleTensor
>
output_slots
;
std
::
vector
<
PaddleTensor
>
input_slots
,
output_slots
;
DataReader
reader
(
FLAGS_infer_data
)
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
int
num_batches
=
0
;
for
(
int
t
=
0
;
t
<
FLAGS_repeat
;
t
++
)
{
DataReader
reader
(
FLAGS_infer_data
);
while
(
reader
.
NextBatch
(
&
input
,
FLAGS_batch_size
))
{
if
(
FLAGS_topn
>
0
&&
num_batches
>
FLAGS_topn
)
break
;
timer
.
tic
();
CHECK
(
predictor
->
Run
(
input_slots
,
&
output_slots
));
sum
+=
timer
.
toc
();
if
(
FLAGS_test_all_data
)
{
LOG
(
INFO
)
<<
"test all data"
;
int
num_batches
=
0
;
while
(
reader
.
NextBatch
(
&
input_slots
,
FLAGS_batch_size
))
{
input_slots_all
.
emplace_back
(
input_slots
);
++
num_batches
;
}
LOG
(
INFO
)
<<
"total number of samples: "
<<
num_batches
*
FLAGS_batch_size
;
TestPrediction
(
config
,
input_slots_all
,
&
output_slots
,
FLAGS_num_threads
);
return
;
}
PrintTime
(
batch_size
,
FLAGS_repeat
,
1
,
0
,
sum
/
FLAGS_repeat
);
// one batch starts
// data --
reader
.
NextBatch
(
&
input_slots
,
FLAGS_batch_size
);
input_slots_all
.
emplace_back
(
input_slots
);
TestPrediction
(
config
,
input_slots_all
,
&
output_slots
,
FLAGS_num_threads
);
// Get output
LOG
(
INFO
)
<<
"get outputs "
<<
output_slots
.
size
();
...
...
paddle/fluid/inference/tests/api/tester_helper.h
0 → 100644
浏览文件 @
d4a5326a
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <gtest/gtest.h>
#include <thread> // NOLINT
#include <vector>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE_string
(
infer_model
,
""
,
"model path"
);
DEFINE_string
(
infer_data
,
""
,
"data file"
);
DEFINE_int32
(
batch_size
,
1
,
"batch size."
);
DEFINE_int32
(
burning
,
0
,
"Burning before repeat."
);
DEFINE_int32
(
repeat
,
1
,
"Running the inference program repeat times."
);
DEFINE_bool
(
test_all_data
,
false
,
"Test the all dataset in data file."
);
DEFINE_int32
(
num_threads
,
1
,
"Running the inference program in multi-threads."
);
namespace
paddle
{
namespace
inference
{
void
CompareResult
(
const
std
::
vector
<
PaddleTensor
>
&
outputs
,
const
std
::
vector
<
PaddleTensor
>
&
base_outputs
)
{
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
base_outputs
.
size
());
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
auto
&
out
=
outputs
[
i
];
auto
&
base_out
=
base_outputs
[
i
];
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
size_t
size1
=
std
::
accumulate
(
base_out
.
shape
.
begin
(),
base_out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
PADDLE_ENFORCE_EQ
(
size
,
size1
);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
data
=
static_cast
<
float
*>
(
out
.
data
.
data
());
float
*
base_data
=
static_cast
<
float
*>
(
base_out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_NEAR
(
data
[
i
],
base_data
[
i
],
1e-3
);
}
}
}
void
TestOneThreadPrediction
(
AnalysisConfig
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
)
{
int
batch_size
=
FLAGS_batch_size
;
int
num_times
=
FLAGS_repeat
;
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
Timer
timer
;
timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
inputs
.
size
();
j
++
)
{
predictor
->
Run
(
inputs
[
j
],
outputs
);
}
}
PrintTime
(
batch_size
,
num_times
,
1
,
0
,
timer
.
toc
()
/
num_times
,
inputs
.
size
());
}
void
TestMultiThreadPrediction
(
AnalysisConfig
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
)
{
int
batch_size
=
FLAGS_batch_size
;
int
num_times
=
FLAGS_repeat
;
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
// TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled
// because AttentionLSTM's hard code nodeid will be damanged.
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
predictors
.
emplace_back
(
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
));
}
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
threads
.
emplace_back
([
&
,
tid
]()
{
// Each thread should have local inputs and outputs.
// The inputs of each thread are all the same.
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs_tid
=
inputs
;
std
::
vector
<
PaddleTensor
>
outputs_tid
;
Timer
timer
;
timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
inputs_tid
.
size
();
j
++
)
{
predictors
[
tid
]
->
Run
(
inputs_tid
[
j
],
&
outputs_tid
);
}
}
PrintTime
(
batch_size
,
num_times
,
num_threads
,
tid
,
timer
.
toc
()
/
num_times
,
inputs_tid
.
size
());
});
}
for
(
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
threads
[
i
].
join
();
}
}
void
TestPrediction
(
AnalysisConfig
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
)
{
if
(
num_threads
==
1
)
{
TestOneThreadPrediction
(
config
,
inputs
,
outputs
);
}
else
{
TestMultiThreadPrediction
(
config
,
inputs
,
outputs
,
num_threads
);
}
}
}
// namespace inference
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录