Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
89d09e65
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
89d09e65
编写于
9月 14, 2018
作者:
T
tensor-tang
提交者:
GitHub
9月 14, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into fea/ut/vis
上级
0bd6476f
d4a5326a
变更
12
显示空白变更内容
内联
并排
Showing
12 changed file
with
233 addition
and
243 deletion
+233
-243
paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
+1
-1
paddle/fluid/inference/analysis/analyzer.h
paddle/fluid/inference/analysis/analyzer.h
+3
-0
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+7
-1
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+2
-4
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
+19
-51
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
+18
-55
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+7
-77
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
...nference/tests/api/analyzer_text_classification_tester.cc
+28
-47
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+126
-0
paddle/fluid/operators/conv_mkldnn_op.cc
paddle/fluid/operators/conv_mkldnn_op.cc
+18
-4
paddle/fluid/string/CMakeLists.txt
paddle/fluid/string/CMakeLists.txt
+0
-1
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+4
-2
未找到文件。
paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
浏览文件 @
89d09e65
...
...
@@ -58,7 +58,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
desc
.
SetInput
(
"Input"
,
std
::
vector
<
std
::
string
>
({
conv_relu_i_in
}));
desc
.
SetInput
(
"Filter"
,
std
::
vector
<
std
::
string
>
({
conv_relu_w_in
}));
desc
.
SetInput
(
"Bias"
,
std
::
vector
<
std
::
string
>
({
conv_relu_b_in
}));
desc
.
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
conv_relu_out
}));
desc
.
SetOutput
(
"Out
put
"
,
std
::
vector
<
std
::
string
>
({
conv_relu_out
}));
desc
.
SetType
(
"conv2d"
);
for
(
auto
&
attr
:
conv
->
Op
()
->
GetAttrMap
())
{
desc
.
SetAttr
(
attr
.
first
,
attr
.
second
);
...
...
paddle/fluid/inference/analysis/analyzer.h
浏览文件 @
89d09e65
...
...
@@ -72,6 +72,9 @@ class Analyzer : public OrderedRegistry<PassManager> {
"mul_gru_fuse_pass"
,
//
"seq_concat_fc_fuse_pass"
,
//
"fc_fuse_pass"
,
//
#ifdef PADDLE_WITH_MKLDNN
"conv_relu_mkldnn_fuse_pass"
,
//
#endif
}};
std
::
unordered_set
<
std
::
string
>
disabled_ir_passes_
;
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
89d09e65
...
...
@@ -123,10 +123,16 @@ std::string DescribeTensor(const PaddleTensor &tensor) {
}
void
PrintTime
(
int
batch_size
,
int
repeat
,
int
num_threads
,
int
tid
,
double
latency
)
{
double
latency
,
int
epoch
=
1
)
{
LOG
(
INFO
)
<<
"====== batch_size: "
<<
batch_size
<<
", repeat: "
<<
repeat
<<
", threads: "
<<
num_threads
<<
", thread id: "
<<
tid
<<
", latency: "
<<
latency
<<
"ms ======"
;
if
(
epoch
>
1
)
{
int
samples
=
batch_size
*
epoch
;
LOG
(
INFO
)
<<
"====== sample number: "
<<
samples
<<
", average latency of each sample: "
<<
latency
/
samples
<<
"ms ======"
;
}
}
}
// namespace inference
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
89d09e65
...
...
@@ -51,12 +51,10 @@ inference_analysis_test(test_analyzer_lac SRCS analyzer_lac_tester.cc
# text_classification
set
(
TEXT_CLASSIFICATION_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/text_classification"
)
download_model_and_data
(
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
"text-classification-Senta.tar.gz"
"text_classification_data.txt.tar.gz"
)
inference_analysis_test
(
test_text_classification SRCS analyzer_text_classification_tester.cc
inference_analysis_test
(
test_
analyzer_
text_classification SRCS analyzer_text_classification_tester.cc
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
ARGS --infer_model=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/text-classification-Senta
--infer_data=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/data.txt
--topn=1
# Just run top 1 batch.
)
--infer_data=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/data.txt
)
# ocr
set
(
OCR_MODEL_URL
"http://paddlemodels.cdn.bcebos.com/inference-vis-demos%2Focr.tar.gz"
)
...
...
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
浏览文件 @
89d09e65
...
...
@@ -12,21 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE_string
(
infer_model
,
""
,
"model path for LAC"
);
DEFINE_string
(
infer_data
,
""
,
"data file for LAC"
);
DEFINE_int32
(
batch_size
,
1
,
"batch size."
);
DEFINE_int32
(
burning
,
0
,
"Burning before repeat."
);
DEFINE_int32
(
repeat
,
1
,
"Running the inference program repeat times."
);
DEFINE_bool
(
test_all_data
,
false
,
"Test the all dataset in data file."
);
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -126,46 +112,37 @@ void TestLACPrediction(const std::string &model_path,
const
std
::
string
&
data_file
,
const
int
batch_size
,
const
int
repeat
,
bool
test_all_data
,
bool
use_analysis
=
false
)
{
NativeConfig
config
;
config
.
model_dir
=
model_path
;
config
.
use_gpu
=
false
;
config
.
device
=
0
;
config
.
specify_input_name
=
true
;
std
::
vector
<
PaddleTensor
>
input_slots
,
outputs_slots
;
DataRecord
data
(
data_file
,
batch_size
);
GetOneBatch
(
&
input_slots
,
&
data
,
batch_size
);
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
if
(
use_analysis
)
{
AnalysisConfig
cfg
;
cfg
.
model_dir
=
model_path
;
cfg
.
use_gpu
=
false
;
cfg
.
device
=
0
;
cfg
.
specify_input_name
=
true
;
cfg
.
enable_ir_optim
=
true
;
std
::
vector
<
PaddleTensor
>
input_slots
,
outputs_slots
;
DataRecord
data
(
data_file
,
batch_size
);
GetOneBatch
(
&
input_slots
,
&
data
,
batch_size
);
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
if
(
use_analysis
)
{
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
cfg
);
}
else
{
predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
c
onfi
g
);
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
c
f
g
);
}
for
(
int
i
=
0
;
i
<
FLAGS_burning
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs_slots
);
}
Timer
timer
;
if
(
test_all_data
)
{
double
sum
=
0
;
LOG
(
INFO
)
<<
"Total number of samples: "
<<
data
.
datasets
.
size
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
if
(
FLAGS_test_all_data
)
{
LOG
(
INFO
)
<<
"test all data"
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
for
(
size_t
bid
=
0
;
bid
<
data
.
batched_datas
.
size
();
++
bid
)
{
GetOneBatch
(
&
input_slots
,
&
data
,
batch_size
);
timer
.
tic
();
predictor
->
Run
(
input_slots
,
&
outputs_slots
);
sum
+=
timer
.
toc
();
}
input_slots_all
.
emplace_back
(
input_slots
);
}
PrintTime
(
batch_size
,
repeat
,
1
,
0
,
sum
/
repeat
);
LOG
(
INFO
)
<<
"Average latency of each sample: "
<<
sum
/
repeat
/
data
.
datasets
.
size
()
<<
" ms"
;
LOG
(
INFO
)
<<
"total number of samples: "
<<
data
.
datasets
.
size
();
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs_slots
,
FLAGS_num_threads
);
return
;
}
timer
.
tic
();
...
...
@@ -190,19 +167,10 @@ void TestLACPrediction(const std::string &model_path,
if
(
use_analysis
)
{
// run once for comparion as reference
auto
ref_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
c
onfi
g
);
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
c
f
g
);
std
::
vector
<
PaddleTensor
>
ref_outputs_slots
;
ref_predictor
->
Run
(
input_slots
,
&
ref_outputs_slots
);
EXPECT_EQ
(
ref_outputs_slots
.
size
(),
outputs_slots
.
size
());
auto
&
ref_out
=
ref_outputs_slots
[
0
];
size_t
ref_size
=
std
::
accumulate
(
ref_out
.
shape
.
begin
(),
ref_out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
EXPECT_EQ
(
size
,
ref_size
);
int64_t
*
pdata_ref
=
static_cast
<
int64_t
*>
(
ref_out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
EXPECT_EQ
(
pdata_ref
[
i
],
pdata
[
i
]);
}
CompareResult
(
ref_outputs_slots
,
outputs_slots
);
AnalysisPredictor
*
analysis_predictor
=
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
...
...
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
浏览文件 @
89d09e65
...
...
@@ -12,20 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE_string
(
infer_model
,
""
,
"model path"
);
DEFINE_string
(
infer_data
,
""
,
"data path"
);
DEFINE_int32
(
batch_size
,
10
,
"batch size."
);
DEFINE_int32
(
repeat
,
1
,
"Running the inference program repeat times."
);
DEFINE_bool
(
test_all_data
,
false
,
"Test the all dataset in data file."
);
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -113,17 +100,6 @@ const int chinese_ner_result_data[] = {30, 45, 41, 48, 17, 26,
48
,
39
,
38
,
16
,
25
};
void
TestChineseNERPrediction
(
bool
use_analysis
)
{
NativeConfig
config
;
config
.
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
config
.
param_file
=
FLAGS_infer_model
+
"/param"
;
config
.
use_gpu
=
false
;
config
.
device
=
0
;
config
.
specify_input_name
=
true
;
std
::
vector
<
PaddleTensor
>
input_slots
,
outputs
;
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
Timer
timer
;
if
(
use_analysis
)
{
AnalysisConfig
cfg
;
cfg
.
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
.
param_file
=
FLAGS_infer_model
+
"/param"
;
...
...
@@ -131,32 +107,28 @@ void TestChineseNERPrediction(bool use_analysis) {
cfg
.
device
=
0
;
cfg
.
specify_input_name
=
true
;
cfg
.
enable_ir_optim
=
true
;
std
::
vector
<
PaddleTensor
>
input_slots
,
outputs
;
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
Timer
timer
;
if
(
use_analysis
)
{
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
cfg
);
}
else
{
predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
c
onfi
g
);
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
c
f
g
);
}
if
(
FLAGS_test_all_data
)
{
LOG
(
INFO
)
<<
"test all data"
;
double
sum
=
0
;
size_t
num_samples
;
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
// Just one batch, the num_samples remains the same.
num_samples
=
data
.
num_samples
;
for
(
size_t
bid
=
0
;
bid
<
num_samples
/
FLAGS_batch_size
;
++
bid
)
{
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
for
(
size_t
bid
=
0
;
bid
<
data
.
num_samples
/
FLAGS_batch_size
;
++
bid
)
{
PrepareInputs
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
timer
.
tic
();
predictor
->
Run
(
input_slots
,
&
outputs
);
sum
+=
timer
.
toc
();
}
input_slots_all
.
emplace_back
(
input_slots
);
}
LOG
(
INFO
)
<<
"total number of samples: "
<<
num_samples
;
PrintTime
(
FLAGS_batch_size
,
FLAGS_repeat
,
1
,
0
,
sum
/
FLAGS_repeat
);
LOG
(
INFO
)
<<
"average latency of each sample: "
<<
sum
/
FLAGS_repeat
/
num_samples
;
LOG
(
INFO
)
<<
"total number of samples: "
<<
data
.
num_samples
;
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
return
;
}
// Prepare inputs.
...
...
@@ -182,19 +154,10 @@ void TestChineseNERPrediction(bool use_analysis) {
if
(
use_analysis
)
{
// run once for comparion as reference
auto
ref_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
c
onfi
g
);
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
c
f
g
);
std
::
vector
<
PaddleTensor
>
ref_outputs_slots
;
ref_predictor
->
Run
(
input_slots
,
&
ref_outputs_slots
);
EXPECT_EQ
(
ref_outputs_slots
.
size
(),
outputs
.
size
());
auto
&
ref_out
=
ref_outputs_slots
[
0
];
size_t
ref_size
=
std
::
accumulate
(
ref_out
.
shape
.
begin
(),
ref_out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
EXPECT_EQ
(
size
,
ref_size
);
int64_t
*
pdata_ref
=
static_cast
<
int64_t
*>
(
ref_out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
EXPECT_EQ
(
pdata_ref
[
i
],
result
[
i
]);
}
CompareResult
(
ref_outputs_slots
,
outputs
);
AnalysisPredictor
*
analysis_predictor
=
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
...
...
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
浏览文件 @
89d09e65
...
...
@@ -12,24 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <google/protobuf/text_format.h>
#include <gtest/gtest.h>
#include <thread> // NOLINT
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
DEFINE_string
(
infer_model
,
""
,
"model path"
);
DEFINE_string
(
infer_data
,
""
,
"data path"
);
DEFINE_int32
(
batch_size
,
10
,
"batch size."
);
DEFINE_int32
(
repeat
,
1
,
"Running the inference program repeat times."
);
DEFINE_int32
(
num_threads
,
1
,
"Running the inference program in multi-threads."
);
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -164,26 +147,6 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
void
CompareResult
(
const
std
::
vector
<
PaddleTensor
>
&
outputs
,
const
std
::
vector
<
PaddleTensor
>
&
base_outputs
)
{
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
base_outputs
.
size
());
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
auto
&
out
=
outputs
[
i
];
auto
&
base_out
=
base_outputs
[
i
];
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
size_t
size1
=
std
::
accumulate
(
base_out
.
shape
.
begin
(),
base_out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
PADDLE_ENFORCE_EQ
(
size
,
size1
);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
data
=
static_cast
<
float
*>
(
out
.
data
.
data
());
float
*
base_data
=
static_cast
<
float
*>
(
base_out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_NEAR
(
data
[
i
],
base_data
[
i
],
1e-3
);
}
}
}
// Test with a really complicate model.
void
TestRNN1Prediction
(
bool
use_analysis
,
bool
activate_ir
,
int
num_threads
)
{
AnalysisConfig
config
;
...
...
@@ -198,7 +161,6 @@ void TestRNN1Prediction(bool use_analysis, bool activate_ir, int num_threads) {
config
.
ir_passes
.
clear
();
// Do not exclude any pass.
int
batch_size
=
FLAGS_batch_size
;
int
num_times
=
FLAGS_repeat
;
auto
base_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
...
...
@@ -213,45 +175,14 @@ void TestRNN1Prediction(bool use_analysis, bool activate_ir, int num_threads) {
base_predictor
->
Run
(
input_slots
,
&
base_outputs
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
input_slots_all
.
emplace_back
(
input_slots
);
if
(
num_threads
==
1
)
{
// Prepare inputs.
Timer
timer
;
timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs
);
}
PrintTime
(
batch_size
,
num_times
,
1
,
0
,
timer
.
toc
()
/
num_times
);
TestOneThreadPrediction
(
config
,
input_slots_all
,
&
outputs
);
CompareResult
(
outputs
,
base_outputs
);
}
else
{
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
// TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled
// because AttentionLSTM's hard code nodeid will be damanged.
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
predictors
.
emplace_back
(
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
));
}
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
threads
.
emplace_back
([
&
,
tid
]()
{
// Each thread should have local input_slots and outputs.
std
::
vector
<
PaddleTensor
>
input_slots
;
DataRecord
data
(
FLAGS_infer_data
,
batch_size
);
PrepareInputs
(
&
input_slots
,
&
data
,
batch_size
);
std
::
vector
<
PaddleTensor
>
outputs
;
Timer
timer
;
timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
predictors
[
tid
]
->
Run
(
input_slots
,
&
outputs
);
}
PrintTime
(
batch_size
,
num_times
,
num_threads
,
tid
,
timer
.
toc
()
/
num_times
);
CompareResult
(
outputs
,
base_outputs
);
});
}
for
(
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
threads
[
i
].
join
();
}
// only return the output of first thread
TestMultiThreadPrediction
(
config
,
input_slots_all
,
&
outputs
,
num_threads
);
}
if
(
use_analysis
&&
activate_ir
)
{
...
...
@@ -293,8 +224,7 @@ TEST(Analyzer, RNN_tests) {
// Directly infer with the original model.
TestRNN1Prediction
(
false
,
false
,
i
);
// Inference with the original model with the analysis turned on, the
// analysis
// module will transform the program to a data flow graph.
// analysis module will transform the program to a data flow graph.
TestRNN1Prediction
(
true
,
false
,
i
);
// Inference with analysis and IR. The IR module will fuse some large
// kernels.
...
...
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
浏览文件 @
89d09e65
...
...
@@ -12,23 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <gflags/gflags.h>
#include <glog/logging.h> // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files.
#include <gtest/gtest.h>
#include <fstream>
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/api/timer.h"
DEFINE_string
(
infer_model
,
""
,
"Directory of the inference model."
);
DEFINE_string
(
infer_data
,
""
,
"Path of the dataset."
);
DEFINE_int32
(
batch_size
,
1
,
"batch size."
);
DEFINE_int32
(
repeat
,
1
,
"How many times to repeat run."
);
DEFINE_int32
(
topn
,
-
1
,
"Run top n batches of data to save time"
);
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -37,24 +21,25 @@ struct DataReader {
explicit
DataReader
(
const
std
::
string
&
path
)
:
file
(
new
std
::
ifstream
(
path
))
{}
bool
NextBatch
(
PaddleTensor
*
tensor
,
int
batch_size
)
{
bool
NextBatch
(
std
::
vector
<
PaddleTensor
>
*
input
,
int
batch_size
)
{
PADDLE_ENFORCE_EQ
(
batch_size
,
1
);
std
::
string
line
;
tensor
->
lod
.
clear
();
tensor
->
lod
.
emplace_back
(
std
::
vector
<
size_t
>
({
0
}));
PaddleTensor
tensor
;
tensor
.
dtype
=
PaddleDType
::
INT64
;
tensor
.
lod
.
emplace_back
(
std
::
vector
<
size_t
>
({
0
}));
std
::
vector
<
int64_t
>
data
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
if
(
!
std
::
getline
(
*
file
,
line
))
return
false
;
inference
::
split_to_int64
(
line
,
' '
,
&
data
);
}
tensor
->
lod
.
front
().
push_back
(
data
.
size
());
tensor
.
lod
.
front
().
push_back
(
data
.
size
());
tensor
->
data
.
Resize
(
data
.
size
()
*
sizeof
(
int64_t
));
memcpy
(
tensor
->
data
.
data
(),
data
.
data
(),
data
.
size
()
*
sizeof
(
int64_t
));
tensor
->
shape
.
clear
(
);
tensor
->
shape
.
push_back
(
data
.
size
()
);
tensor
->
shape
.
push_back
(
1
);
tensor
.
data
.
Resize
(
data
.
size
()
*
sizeof
(
int64_t
));
memcpy
(
tensor
.
data
.
data
(),
data
.
data
(),
data
.
size
()
*
sizeof
(
int64_t
));
tensor
.
shape
.
push_back
(
data
.
size
()
);
tensor
.
shape
.
push_back
(
1
);
input
->
assign
({
tensor
}
);
return
true
;
}
...
...
@@ -68,32 +53,28 @@ void Main(int batch_size) {
config
.
model_dir
=
FLAGS_infer_model
;
config
.
use_gpu
=
false
;
config
.
enable_ir_optim
=
true
;
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
std
::
vector
<
PaddleTensor
>
input_slots
(
1
);
// one batch starts
// data --
auto
&
input
=
input_slots
[
0
];
input
.
dtype
=
PaddleDType
::
INT64
;
inference
::
Timer
timer
;
double
sum
=
0
;
std
::
vector
<
PaddleTensor
>
output_slots
;
std
::
vector
<
PaddleTensor
>
input_slots
,
output_slots
;
DataReader
reader
(
FLAGS_infer_data
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
if
(
FLAGS_test_all_data
)
{
LOG
(
INFO
)
<<
"test all data"
;
int
num_batches
=
0
;
for
(
int
t
=
0
;
t
<
FLAGS_repeat
;
t
++
)
{
DataReader
reader
(
FLAGS_infer_data
);
while
(
reader
.
NextBatch
(
&
input
,
FLAGS_batch_size
))
{
if
(
FLAGS_topn
>
0
&&
num_batches
>
FLAGS_topn
)
break
;
timer
.
tic
();
CHECK
(
predictor
->
Run
(
input_slots
,
&
output_slots
));
sum
+=
timer
.
toc
();
while
(
reader
.
NextBatch
(
&
input_slots
,
FLAGS_batch_size
))
{
input_slots_all
.
emplace_back
(
input_slots
);
++
num_batches
;
}
LOG
(
INFO
)
<<
"total number of samples: "
<<
num_batches
*
FLAGS_batch_size
;
TestPrediction
(
config
,
input_slots_all
,
&
output_slots
,
FLAGS_num_threads
);
return
;
}
PrintTime
(
batch_size
,
FLAGS_repeat
,
1
,
0
,
sum
/
FLAGS_repeat
);
// one batch starts
// data --
reader
.
NextBatch
(
&
input_slots
,
FLAGS_batch_size
);
input_slots_all
.
emplace_back
(
input_slots
);
TestPrediction
(
config
,
input_slots_all
,
&
output_slots
,
FLAGS_num_threads
);
// Get output
LOG
(
INFO
)
<<
"get outputs "
<<
output_slots
.
size
();
...
...
paddle/fluid/inference/tests/api/tester_helper.h
0 → 100644
浏览文件 @
89d09e65
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <gtest/gtest.h>
#include <thread> // NOLINT
#include <vector>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE_string
(
infer_model
,
""
,
"model path"
);
DEFINE_string
(
infer_data
,
""
,
"data file"
);
DEFINE_int32
(
batch_size
,
1
,
"batch size."
);
DEFINE_int32
(
burning
,
0
,
"Burning before repeat."
);
DEFINE_int32
(
repeat
,
1
,
"Running the inference program repeat times."
);
DEFINE_bool
(
test_all_data
,
false
,
"Test the all dataset in data file."
);
DEFINE_int32
(
num_threads
,
1
,
"Running the inference program in multi-threads."
);
namespace
paddle
{
namespace
inference
{
void
CompareResult
(
const
std
::
vector
<
PaddleTensor
>
&
outputs
,
const
std
::
vector
<
PaddleTensor
>
&
base_outputs
)
{
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
base_outputs
.
size
());
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
auto
&
out
=
outputs
[
i
];
auto
&
base_out
=
base_outputs
[
i
];
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
size_t
size1
=
std
::
accumulate
(
base_out
.
shape
.
begin
(),
base_out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
PADDLE_ENFORCE_EQ
(
size
,
size1
);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
data
=
static_cast
<
float
*>
(
out
.
data
.
data
());
float
*
base_data
=
static_cast
<
float
*>
(
base_out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_NEAR
(
data
[
i
],
base_data
[
i
],
1e-3
);
}
}
}
void
TestOneThreadPrediction
(
AnalysisConfig
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
)
{
int
batch_size
=
FLAGS_batch_size
;
int
num_times
=
FLAGS_repeat
;
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
Timer
timer
;
timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
inputs
.
size
();
j
++
)
{
predictor
->
Run
(
inputs
[
j
],
outputs
);
}
}
PrintTime
(
batch_size
,
num_times
,
1
,
0
,
timer
.
toc
()
/
num_times
,
inputs
.
size
());
}
void
TestMultiThreadPrediction
(
AnalysisConfig
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
)
{
int
batch_size
=
FLAGS_batch_size
;
int
num_times
=
FLAGS_repeat
;
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
// TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled
// because AttentionLSTM's hard code nodeid will be damanged.
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
predictors
.
emplace_back
(
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
));
}
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
threads
.
emplace_back
([
&
,
tid
]()
{
// Each thread should have local inputs and outputs.
// The inputs of each thread are all the same.
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs_tid
=
inputs
;
std
::
vector
<
PaddleTensor
>
outputs_tid
;
Timer
timer
;
timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
inputs_tid
.
size
();
j
++
)
{
predictors
[
tid
]
->
Run
(
inputs_tid
[
j
],
&
outputs_tid
);
}
}
PrintTime
(
batch_size
,
num_times
,
num_threads
,
tid
,
timer
.
toc
()
/
num_times
,
inputs_tid
.
size
());
});
}
for
(
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
threads
[
i
].
join
();
}
}
void
TestPrediction
(
AnalysisConfig
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
)
{
if
(
num_threads
==
1
)
{
TestOneThreadPrediction
(
config
,
inputs
,
outputs
);
}
else
{
TestMultiThreadPrediction
(
config
,
inputs
,
outputs
,
num_threads
);
}
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/operators/conv_mkldnn_op.cc
浏览文件 @
89d09e65
...
...
@@ -302,8 +302,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
bool
fuse_relu
=
ctx
.
Attr
<
bool
>
(
"fuse_relu"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
// TODO(pzelazko-intel) add support for group convolution and dilation
PADDLE_ENFORCE
(
groups
==
1
,
"group convolution is not implemented yet"
);
// TODO: add support for dilation
PADDLE_ENFORCE
(
dilations
.
size
()
==
2
&&
dilations
[
0
]
==
1
&&
dilations
[
1
]
==
1
,
"dilation in convolution is not implemented yet"
);
...
...
@@ -314,6 +313,19 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
std
::
vector
<
int
>
src_tz
=
paddle
::
framework
::
vectorize2int
(
input
->
dims
());
std
::
vector
<
int
>
weights_tz
=
paddle
::
framework
::
vectorize2int
(
filter
->
dims
());
int
g
=
std
::
max
(
groups
,
1
);
if
(
g
>
1
)
{
int
o
=
weights_tz
[
0
];
int
i
=
weights_tz
[
1
];
int
h
=
weights_tz
[
2
];
int
w
=
weights_tz
[
3
];
weights_tz
.
resize
(
5
);
weights_tz
[
0
]
=
g
;
weights_tz
[
1
]
=
o
/
g
;
weights_tz
[
2
]
=
i
;
weights_tz
[
3
]
=
h
;
weights_tz
[
4
]
=
w
;
}
std
::
vector
<
int
>
dst_tz
=
paddle
::
framework
::
vectorize2int
(
output
->
dims
());
// Get unique name for storing MKLDNN primitives
...
...
@@ -327,7 +339,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto
user_src_md
=
platform
::
MKLDNNMemDesc
(
{
src_tz
},
platform
::
MKLDNNGetDataType
<
T
>
(),
input
->
format
());
auto
user_weights_md
=
platform
::
MKLDNNMemDesc
(
{
weights_tz
},
platform
::
MKLDNNGetDataType
<
T
>
(),
filter
->
format
());
{
weights_tz
},
platform
::
MKLDNNGetDataType
<
T
>
(),
(
g
==
1
)
?
filter
->
format
()
:
mkldnn
::
memory
::
format
::
goihw
);
/* create memory descriptor for convolution without specified format
* ('any') which lets a primitive (convolution in this case) choose
...
...
@@ -340,7 +353,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto
src_md
=
platform
::
MKLDNNMemDesc
(
src_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
chosen_memory_format
);
auto
weights_md
=
platform
::
MKLDNNMemDesc
(
weights_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
chosen_memory_format
);
weights_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
(
g
==
1
)
?
chosen_memory_format
:
mkldnn
::
memory
::
format
::
goihw
);
std
::
vector
<
int
>
bias_tz
;
// TODO(mgallus): avoid empty vector creation.
// Currently used whenever bias is != nullptr.
auto
dst_md
=
platform
::
MKLDNNMemDesc
(
...
...
paddle/fluid/string/CMakeLists.txt
浏览文件 @
89d09e65
cc_library
(
stringpiece SRCS piece.cc
)
cc_library
(
pretty_log SRCS pretty_log.cc
)
cc_test
(
test_pretty_log SRCS pretty_log.cc
)
cc_test
(
stringpiece_test SRCS piece_test.cc DEPS stringpiece glog gflags
)
cc_test
(
stringprintf_test SRCS printf_test.cc DEPS glog gflags
)
cc_test
(
to_string_test SRCS to_string_test.cc
)
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
89d09e65
...
...
@@ -50,6 +50,7 @@ function(py_test_modules TARGET_NAME)
endfunction
()
list
(
REMOVE_ITEM TEST_OPS test_warpctc_op
)
list
(
REMOVE_ITEM TEST_OPS test_dist_train
)
list
(
REMOVE_ITEM TEST_OPS test_dist_transpiler
)
list
(
REMOVE_ITEM TEST_OPS test_parallel_executor_crf
)
list
(
REMOVE_ITEM TEST_OPS test_parallel_executor_fetch_feed
)
list
(
REMOVE_ITEM TEST_OPS test_dist_se_resnext
)
...
...
@@ -65,11 +66,12 @@ if(WITH_DISTRIBUTE)
set_tests_properties
(
test_listen_and_serv_op PROPERTIES TIMEOUT 20
)
set_tests_properties
(
test_dist_mnist PROPERTIES TIMEOUT 200
)
set_tests_properties
(
test_dist_word2vec PROPERTIES TIMEOUT 200
)
py_test_modules
(
test_dist_transpiler MODULES test_dist_transpiler
)
py_test_modules
(
test_dist_transformer MODULES test_dist_transformer SERIAL
)
py_test_modules
(
test_dist_se_resnext MODULES test_dist_se_resnext SERIAL
)
endif
()
py_test_modules
(
test_parallel_executor_crf MODULES test_parallel_executor_crf SERIAL
)
py_test_modules
(
test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL
)
set_tests_properties
(
test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 150
)
py_test_modules
(
test_dist_transformer MODULES test_dist_transformer SERIAL
)
py_test_modules
(
test_dist_se_resnext MODULES test_dist_se_resnext SERIAL
)
py_test_modules
(
test_parallel_executor_transformer MODULES test_parallel_executor_transformer SERIAL
)
py_test_modules
(
test_image_classification_resnet MODULES test_image_classification_resnet SERIAL
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录