Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
07f495ec
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
07f495ec
编写于
9月 07, 2018
作者:
M
minqiyang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into fix_python35_CI_random_fail
上级
2bb0ac92
f76f42c2
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
153 addition
and
82 deletion
+153
-82
CMakeLists.txt
CMakeLists.txt
+3
-1
cmake/external/anakin.cmake
cmake/external/anakin.cmake
+6
-13
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+1
-1
doc/fluid/api/layers.rst
doc/fluid/api/layers.rst
+16
-0
doc/survey/dynamic_graph.md
doc/survey/dynamic_graph.md
+1
-1
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+18
-11
paddle/fluid/inference/analysis/CMakeLists.txt
paddle/fluid/inference/analysis/CMakeLists.txt
+6
-1
paddle/fluid/inference/analysis/analyzer_lac_tester.cc
paddle/fluid/inference/analysis/analyzer_lac_tester.cc
+2
-8
paddle/fluid/inference/analysis/analyzer_ner_tester.cc
paddle/fluid/inference/analysis/analyzer_ner_tester.cc
+28
-8
paddle/fluid/inference/analysis/analyzer_text_classification_tester.cc
...inference/analysis/analyzer_text_classification_tester.cc
+48
-33
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+17
-5
paddle/fluid/inference/api/api_anakin_engine.cc
paddle/fluid/inference/api/api_anakin_engine.cc
+7
-0
未找到文件。
CMakeLists.txt
浏览文件 @
07f495ec
...
...
@@ -213,9 +213,11 @@ include(configure) # add paddle env configuration
if
(
WITH_GPU
)
include
(
cuda
)
include
(
tensorrt
)
endif
()
if
(
WITH_MKL OR WITH_MKLML
)
include
(
external/anakin
)
elseif
()
set
(
WITH_ANAKIN OFF CACHE STRING
"Anakin is used in
GPU
only now."
FORCE
)
set
(
WITH_ANAKIN OFF CACHE STRING
"Anakin is used in
MKL
only now."
FORCE
)
endif
()
include
(
generic
)
# simplify cmake module
...
...
cmake/external/anakin.cmake
浏览文件 @
07f495ec
...
...
@@ -16,16 +16,6 @@ set(ANAKIN_LIBRARY ${ANAKIN_INSTALL_DIR})
set
(
ANAKIN_SHARED_LIB
${
ANAKIN_LIBRARY
}
/libanakin.so
)
set
(
ANAKIN_SABER_LIB
${
ANAKIN_LIBRARY
}
/libanakin_saber_common.so
)
# TODO(luotao): ANAKIN_MODLE_URL etc will move to demo ci later.
set
(
INFERENCE_URL
"http://paddle-inference-dist.bj.bcebos.com"
)
set
(
ANAKIN_MODLE_URL
"
${
INFERENCE_URL
}
/mobilenet_v2.anakin.bin"
)
set
(
ANAKIN_RNN_MODLE_URL
"
${
INFERENCE_URL
}
/anakin_test%2Fditu_rnn.anakin2.model.bin"
)
set
(
ANAKIN_RNN_DATA_URL
"
${
INFERENCE_URL
}
/anakin_test%2Fditu_rnn_data.txt"
)
execute_process
(
COMMAND bash -c
"mkdir -p
${
ANAKIN_SOURCE_DIR
}
"
)
execute_process
(
COMMAND bash -c
"cd
${
ANAKIN_SOURCE_DIR
}
; wget -q --no-check-certificate
${
ANAKIN_MODLE_URL
}
-N"
)
execute_process
(
COMMAND bash -c
"cd
${
ANAKIN_SOURCE_DIR
}
; wget -q --no-check-certificate
${
ANAKIN_RNN_MODLE_URL
}
-N"
)
execute_process
(
COMMAND bash -c
"cd
${
ANAKIN_SOURCE_DIR
}
; wget -q --no-check-certificate
${
ANAKIN_RNN_DATA_URL
}
-N"
)
include_directories
(
${
ANAKIN_INCLUDE
}
)
include_directories
(
${
ANAKIN_INCLUDE
}
/saber/
)
include_directories
(
${
ANAKIN_INCLUDE
}
/saber/core/
)
...
...
@@ -48,6 +38,11 @@ set(ANAKIN_COMPILE_EXTRA_FLAGS
-Wno-reorder
-Wno-error=cpp
)
if
(
WITH_GPU
)
set
(
CMAKE_ARGS_PREFIX -DUSE_GPU_PLACE=YES -DCUDNN_ROOT=
${
CUDNN_ROOT
}
-DCUDNN_INCLUDE_DIR=
${
CUDNN_INCLUDE_DIR
}
)
else
()
set
(
CMAKE_ARGS_PREFIX -DUSE_GPU_PLACE=NO
)
endif
()
ExternalProject_Add
(
extern_anakin
${
EXTERNAL_PROJECT_LOG_ARGS
}
...
...
@@ -56,13 +51,11 @@ ExternalProject_Add(
GIT_TAG
"9424277cf9ae180a14aff09560d3cd60a49c76d2"
PREFIX
${
ANAKIN_SOURCE_DIR
}
UPDATE_COMMAND
""
CMAKE_ARGS
-DUSE_GPU_PLACE=YES
CMAKE_ARGS
${
CMAKE_ARGS_PREFIX
}
-DUSE_X86_PLACE=YES
-DBUILD_WITH_UNIT_TEST=NO
-DPROTOBUF_ROOT=
${
THIRD_PARTY_PATH
}
/install/protobuf
-DMKLML_ROOT=
${
THIRD_PARTY_PATH
}
/install/mklml
-DCUDNN_ROOT=
${
CUDNN_ROOT
}
-DCUDNN_INCLUDE_DIR=
${
CUDNN_INCLUDE_DIR
}
-DENABLE_OP_TIMER=
${
ANAKIN_ENABLE_OP_TIMER
}
${
EXTERNAL_OPTIONAL_ARGS
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
ANAKIN_INSTALL_DIR
}
...
...
cmake/inference_lib.cmake
浏览文件 @
07f495ec
...
...
@@ -145,7 +145,7 @@ copy(memory_lib
set
(
inference_deps paddle_fluid_shared paddle_fluid
)
set
(
module
"inference/api"
)
if
(
WITH_ANAKIN AND WITH_
GPU
)
if
(
WITH_ANAKIN AND WITH_
MKL
)
copy
(
anakin_inference_lib DEPS paddle_inference_api inference_anakin_api
SRCS
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/libinference_anakin_api*
# compiled anakin api
...
...
doc/fluid/api/layers.rst
浏览文件 @
07f495ec
...
...
@@ -822,6 +822,14 @@ pad
.. autofunction:: paddle.fluid.layers.pad
:noindex:
.. _api_fluid_layers_pad_constant_like:
pad_constant_like
---
.. autofunction:: paddle.fluid.layers.pad_constant_like
:noindex:
.. _api_fluid_layers_label_smooth:
label_smooth
...
...
@@ -1145,6 +1153,14 @@ sigmoid
.. autofunction:: paddle.fluid.layers.sigmoid
:noindex:
.. _api_fluid_layers_hsigmoid:
hsigmoid
-------
.. autofunction:: paddle.fluid.layers.hsigmoid
:noindex:
.. _api_fluid_layers_logsigmoid:
logsigmoid
...
...
doc/survey/dynamic_graph.md
浏览文件 @
07f495ec
...
...
@@ -30,7 +30,7 @@ x = Variable(randn(20, 1)))
label
=
Variable
(
randint
(
1
))
W_1
,
W_2
=
Variable
(
randn
(
20
,
20
)),
Variable
(
randn
(
10
,
20
))
h
=
matmul
(
W_1
,
x
)
pred
=
matmul
(
W_2
,
x
)
pred
=
matmul
(
W_2
,
h
)
loss
=
softmax
(
pred
,
label
)
loss
.
backward
()
```
...
...
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
07f495ec
set
(
pass_file
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/paddle_inference_pass.h
)
file
(
WRITE
${
pass_file
}
"// Generated by the paddle/fluid/framework/ir/CMakeLists.txt. DO NOT EDIT!
\n\n
"
)
file
(
APPEND
${
pass_file
}
"
\#
include
\"
paddle/fluid/framework/ir/pass.h
\"\n
"
)
function
(
pass_library TARGET
)
# Usage: pass_library(target inference) will append to paddle_inference_pass.h
function
(
pass_library TARGET DEST
)
set
(
options
""
)
set
(
oneValueArgs
""
)
set
(
multiValueArgs SRCS DEPS
)
cmake_parse_arguments
(
op_library
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cc_library
(
${
TARGET
}
SRCS
${
TARGET
}
.cc DEPS graph_pattern_detector pass
)
file
(
APPEND
${
pass_file
}
"USE_PASS(
${
TARGET
}
);
\n
"
)
set
(
PASS_LIBRARY
${
TARGET
}
${
PASS_LIBRARY
}
PARENT_SCOPE
)
cc_library
(
${
TARGET
}
SRCS
${
TARGET
}
.cc DEPS graph_pattern_detector pass
${
op_library_DEPS
}
)
# add more DEST here, such as train, dist and collect USE_PASS into a file automatically.
if
(
${
DEST
}
STREQUAL
"base"
OR
${
DEST
}
STREQUAL
"inference"
)
message
(
STATUS
"add pass
${
TARGET
}
${
DEST
}
"
)
file
(
APPEND
${
pass_file
}
"USE_PASS(
${
TARGET
}
);
\n
"
)
set
(
PASS_LIBRARY
${
TARGET
}
${
PASS_LIBRARY
}
PARENT_SCOPE
)
endif
()
endfunction
()
cc_library
(
node SRCS node.cc DEPS proto_desc
)
...
...
@@ -18,13 +25,13 @@ cc_library(pass SRCS pass.cc DEPS graph node graph_helper)
cc_library
(
graph_traits SRCS graph_traits.cc DEPS graph
)
cc_library
(
graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph graph_helper graph_traits
)
pass_library
(
graph_to_program_pass
)
pass_library
(
graph_viz_pass
)
pass_library
(
fc_fuse_pass
)
pass_library
(
attention_lstm_fuse_pass
)
pass_library
(
infer_clean_graph_pass
)
pass_library
(
fc_lstm_fuse_pass
)
pass_library
(
seq_concat_fc_fuse_pass
)
pass_library
(
graph_to_program_pass
base
)
pass_library
(
graph_viz_pass
base
)
pass_library
(
fc_fuse_pass
inference
)
pass_library
(
attention_lstm_fuse_pass
inference
)
pass_library
(
infer_clean_graph_pass
inference
)
pass_library
(
fc_lstm_fuse_pass
inference
)
pass_library
(
seq_concat_fc_fuse_pass
inference
)
set
(
GLOB_PASS_LIB
${
PASS_LIBRARY
}
CACHE INTERNAL
"Global PASS library"
)
cc_test
(
pass_test SRCS pass_test.cc DEPS graph pass graph_helper
)
...
...
paddle/fluid/inference/analysis/CMakeLists.txt
浏览文件 @
07f495ec
...
...
@@ -100,12 +100,17 @@ inference_analysis_test(test_analyzer_lac SRCS analyzer_lac_tester.cc
set
(
TEXT_CLASSIFICATION_MODEL_URL
"http://paddle-inference-dist.bj.bcebos.com/text-classification-Senta.tar.gz"
)
set
(
TEXT_CLASSIFICATION_DATA_URL
"http://paddle-inference-dist.bj.bcebos.com/text_classification_data.txt.tar.gz"
)
set
(
TEXT_CLASSIFICATION_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo/text_classification"
CACHE PATH
"Text Classification model and data root."
FORCE
)
if
(
NOT EXISTS
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
AND WITH_TESTING AND WITH_INFERENCE
)
inference_download_and_uncompress
(
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
${
TEXT_CLASSIFICATION_MODEL_URL
}
"text-classification-Senta.tar.gz"
)
inference_download_and_uncompress
(
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
${
TEXT_CLASSIFICATION_DATA_URL
}
"text_classification_data.txt.tar.gz"
)
endif
()
inference_analysis_test
(
test_text_classification SRCS analyzer_text_classification_tester.cc
EXTRA_DEPS paddle_inference_api paddle_fluid_api analysis_predictor
ARGS --infer_model=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/text-classification-Senta
)
ARGS --infer_model=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/text-classification-Senta
--infer_data=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/data.txt
--topn=1
# Just run top 1 batch.
)
paddle/fluid/inference/analysis/analyzer_lac_tester.cc
浏览文件 @
07f495ec
...
...
@@ -114,12 +114,6 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
PADDLE_ENFORCE_EQ
(
batch_size
,
static_cast
<
int
>
(
one_batch
.
lod
.
size
()
-
1
));
input_slots
->
assign
({
input_tensor
});
}
static
void
PrintTime
(
const
double
latency
,
const
int
bs
,
const
int
repeat
)
{
LOG
(
INFO
)
<<
"===========profile result==========="
;
LOG
(
INFO
)
<<
"batch_size: "
<<
bs
<<
", repeat: "
<<
repeat
<<
", avg latency: "
<<
latency
/
repeat
<<
"ms"
;
LOG
(
INFO
)
<<
"====================================="
;
}
void
BenchAllData
(
const
std
::
string
&
model_path
,
const
std
::
string
&
data_file
,
const
int
batch_size
,
const
int
repeat
)
{
NativeConfig
config
;
...
...
@@ -145,7 +139,7 @@ void BenchAllData(const std::string &model_path, const std::string &data_file,
sum
+=
timer
.
toc
();
}
}
PrintTime
(
sum
,
batch_size
,
repeat
);
PrintTime
(
batch_size
,
repeat
,
1
,
0
,
sum
/
repeat
);
}
const
int64_t
lac_ref_data
[]
=
{
24
,
25
,
25
,
25
,
38
,
30
,
31
,
14
,
15
,
44
,
24
,
25
,
25
,
25
,
25
,
25
,
44
,
24
,
25
,
25
,
25
,
36
,
42
,
43
,
...
...
@@ -176,7 +170,7 @@ void TestLACPrediction(const std::string &model_path,
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs_slots
);
}
PrintTime
(
timer
.
toc
(),
batch_size
,
repeat
);
PrintTime
(
batch_size
,
repeat
,
1
,
0
,
timer
.
toc
()
/
repeat
);
EXPECT_EQ
(
outputs_slots
.
size
(),
1UL
);
auto
&
out
=
outputs_slots
[
0
];
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
...
...
paddle/fluid/inference/analysis/analyzer_ner_tester.cc
浏览文件 @
07f495ec
...
...
@@ -25,6 +25,7 @@ DEFINE_string(infer_model, "", "model path");
DEFINE_string
(
infer_data
,
""
,
"data path"
);
DEFINE_int32
(
batch_size
,
10
,
"batch size."
);
DEFINE_int32
(
repeat
,
1
,
"Running the inference program repeat times."
);
DEFINE_bool
(
test_all_data
,
false
,
"Test the all dataset in data file."
);
namespace
paddle
{
namespace
inference
{
...
...
@@ -35,6 +36,7 @@ struct DataRecord {
std
::
vector
<
size_t
>
lod
;
// two inputs have the same lod info.
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
size_t
num_samples
;
// total number of samples
DataRecord
()
=
default
;
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
:
batch_size
(
batch_size
)
{
...
...
@@ -81,6 +83,7 @@ struct DataRecord {
word_data_all
.
push_back
(
std
::
move
(
word_data
));
mention_data_all
.
push_back
(
std
::
move
(
mention_data
));
}
num_samples
=
num_lines
;
}
};
...
...
@@ -120,21 +123,38 @@ void TestChineseNERPrediction() {
auto
predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
std
::
vector
<
PaddleTensor
>
input_slots
;
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
std
::
vector
<
PaddleTensor
>
outputs
;
Timer
timer
;
if
(
FLAGS_test_all_data
)
{
LOG
(
INFO
)
<<
"test all data"
;
double
sum
=
0
;
size_t
num_samples
;
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
num_samples
=
data
.
num_samples
;
for
(
size_t
bid
=
0
;
bid
<
num_samples
;
++
bid
)
{
PrepareInputs
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
timer
.
tic
();
predictor
->
Run
(
input_slots
,
&
outputs
);
sum
+=
timer
.
toc
();
}
}
LOG
(
INFO
)
<<
"total number of samples: "
<<
num_samples
;
PrintTime
(
FLAGS_batch_size
,
FLAGS_repeat
,
1
,
0
,
sum
/
FLAGS_repeat
);
LOG
(
INFO
)
<<
"average latency of each sample: "
<<
sum
/
FLAGS_repeat
/
num_samples
;
return
;
}
// Prepare inputs.
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
PrepareInputs
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
std
::
vector
<
PaddleTensor
>
outputs
;
Timer
timer
;
timer
.
tic
();
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs
);
}
LOG
(
INFO
)
<<
"===========profile result==========="
;
LOG
(
INFO
)
<<
"batch_size: "
<<
FLAGS_batch_size
<<
", repeat: "
<<
FLAGS_repeat
<<
", latency: "
<<
timer
.
toc
()
/
FLAGS_repeat
<<
"ms"
;
LOG
(
INFO
)
<<
"====================================="
;
PrintTime
(
FLAGS_batch_size
,
FLAGS_repeat
,
1
,
0
,
timer
.
toc
()
/
FLAGS_repeat
);
PADDLE_ENFORCE
(
outputs
.
size
(),
1UL
);
auto
&
out
=
outputs
[
0
];
...
...
paddle/fluid/inference/analysis/analyzer_text_classification_tester.cc
浏览文件 @
07f495ec
...
...
@@ -16,8 +16,10 @@
#include <gflags/gflags.h>
#include <glog/logging.h> // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files.
#include <gtest/gtest.h>
#include <fstream>
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/api/timer.h"
...
...
@@ -26,60 +28,72 @@ DEFINE_string(infer_model, "", "Directory of the inference model.");
DEFINE_string
(
infer_data
,
""
,
"Path of the dataset."
);
DEFINE_int32
(
batch_size
,
1
,
"batch size."
);
DEFINE_int32
(
repeat
,
1
,
"How many times to repeat run."
);
DEFINE_int32
(
topn
,
-
1
,
"Run top n batches of data to save time"
);
namespace
paddle
{
namespace
inference
{
template
<
typename
T
>
std
::
string
to_string
(
const
std
::
vector
<
T
>
&
vec
)
{
std
::
stringstream
ss
;
for
(
const
auto
&
c
:
vec
)
{
ss
<<
c
<<
" "
;
}
return
ss
.
str
();
}
struct
DataReader
{
explicit
DataReader
(
const
std
::
string
&
path
)
:
file
(
new
std
::
ifstream
(
path
))
{}
void
PrintTime
(
const
double
latency
,
const
int
bs
,
const
int
repeat
)
{
LOG
(
INFO
)
<<
"===========profile result==========="
;
LOG
(
INFO
)
<<
"batch_size: "
<<
bs
<<
", repeat: "
<<
repeat
<<
", avg latency: "
<<
latency
/
repeat
<<
"ms"
;
LOG
(
INFO
)
<<
"====================================="
;
}
bool
NextBatch
(
PaddleTensor
*
tensor
,
int
batch_size
)
{
PADDLE_ENFORCE_EQ
(
batch_size
,
1
)
;
std
::
string
line
;
tensor
->
lod
.
clear
()
;
tensor
->
lod
.
emplace_back
(
std
::
vector
<
size_t
>
({
0
}))
;
std
::
vector
<
int64_t
>
data
;
void
Main
(
int
batch_size
)
{
// Three sequence inputs.
std
::
vector
<
PaddleTensor
>
input_slots
(
1
);
// one batch starts
// data --
int64_t
data0
[]
=
{
0
,
1
,
2
};
for
(
auto
&
input
:
input_slots
)
{
input
.
data
.
Reset
(
data0
,
sizeof
(
data0
));
input
.
shape
=
std
::
vector
<
int
>
({
3
,
1
});
// dtype --
input
.
dtype
=
PaddleDType
::
INT64
;
// LoD --
input
.
lod
=
std
::
vector
<
std
::
vector
<
size_t
>>
({{
0
,
3
}});
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
if
(
!
std
::
getline
(
*
file
,
line
))
return
false
;
inference
::
split_to_int64
(
line
,
' '
,
&
data
);
}
tensor
->
lod
.
front
().
push_back
(
data
.
size
());
tensor
->
data
.
Resize
(
data
.
size
()
*
sizeof
(
int64_t
));
memcpy
(
tensor
->
data
.
data
(),
data
.
data
(),
data
.
size
()
*
sizeof
(
int64_t
));
tensor
->
shape
.
clear
();
tensor
->
shape
.
push_back
(
data
.
size
());
tensor
->
shape
.
push_back
(
1
);
return
true
;
}
std
::
unique_ptr
<
std
::
ifstream
>
file
;
};
void
Main
(
int
batch_size
)
{
// shape --
// Create Predictor --
AnalysisConfig
config
;
config
.
model_dir
=
FLAGS_infer_model
;
config
.
use_gpu
=
false
;
config
.
enable_ir_optim
=
true
;
config
.
ir_passes
.
push_back
(
"fc_lstm_fuse_pass"
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
std
::
vector
<
PaddleTensor
>
input_slots
(
1
);
// one batch starts
// data --
auto
&
input
=
input_slots
[
0
];
input
.
dtype
=
PaddleDType
::
INT64
;
inference
::
Timer
timer
;
double
sum
=
0
;
std
::
vector
<
PaddleTensor
>
output_slots
;
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
timer
.
tic
();
CHECK
(
predictor
->
Run
(
input_slots
,
&
output_slots
));
sum
+=
timer
.
toc
();
int
num_batches
=
0
;
for
(
int
t
=
0
;
t
<
FLAGS_repeat
;
t
++
)
{
DataReader
reader
(
FLAGS_infer_data
);
while
(
reader
.
NextBatch
(
&
input
,
FLAGS_batch_size
))
{
if
(
FLAGS_topn
>
0
&&
num_batches
>
FLAGS_topn
)
break
;
timer
.
tic
();
CHECK
(
predictor
->
Run
(
input_slots
,
&
output_slots
));
sum
+=
timer
.
toc
();
++
num_batches
;
}
}
PrintTime
(
sum
,
batch_size
,
FLAGS_repeat
);
PrintTime
(
batch_size
,
FLAGS_repeat
,
1
,
0
,
sum
/
FLAGS_repeat
);
// Get output
LOG
(
INFO
)
<<
"get outputs "
<<
output_slots
.
size
();
...
...
@@ -100,4 +114,5 @@ void Main(int batch_size) {
TEST
(
text_classification
,
basic
)
{
Main
(
FLAGS_batch_size
);
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
07f495ec
...
...
@@ -61,7 +61,7 @@ cc_library(paddle_inference_tensorrt_subgraph_engine
inference_api_test
(
test_api_tensorrt_subgraph_engine SRC api_tensorrt_subgraph_engine_tester.cc ARGS test_word2vec
)
endif
()
if
(
WITH_ANAKIN AND WITH_
GPU
)
# only needed in CI
if
(
WITH_ANAKIN AND WITH_
MKL
)
# only needed in CI
# compile the libinference_anakin_api.a and anakin.so.
cc_library
(
inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber mklml
)
cc_library
(
inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber
)
...
...
@@ -71,12 +71,24 @@ if (WITH_ANAKIN AND WITH_GPU) # only needed in CI
anakin_target
(
inference_anakin_api
)
anakin_target
(
inference_anakin_api_shared
)
if
(
WITH_TESTING
)
cc_test
(
api_anakin_engine_tester SRCS api_anakin_engine_tester.cc
ARGS --model=
${
ANAKIN_SOURCE_DIR
}
/mobilenet_v2.anakin.bin
DEPS inference_anakin_api_shared dynload_cuda SERIAL
)
# TODO(luotao): ANAKIN_MODLE_URL etc will move to demo ci later.
set
(
INFERENCE_URL
"http://paddle-inference-dist.bj.bcebos.com"
)
set
(
ANAKIN_RNN_MODLE_URL
"
${
INFERENCE_URL
}
/anakin_test%2Fditu_rnn.anakin2.model.bin"
)
set
(
ANAKIN_RNN_DATA_URL
"
${
INFERENCE_URL
}
/anakin_test%2Fditu_rnn_data.txt"
)
execute_process
(
COMMAND bash -c
"mkdir -p
${
ANAKIN_SOURCE_DIR
}
"
)
execute_process
(
COMMAND bash -c
"cd
${
ANAKIN_SOURCE_DIR
}
; wget -q --no-check-certificate
${
ANAKIN_RNN_MODLE_URL
}
-N"
)
execute_process
(
COMMAND bash -c
"cd
${
ANAKIN_SOURCE_DIR
}
; wget -q --no-check-certificate
${
ANAKIN_RNN_DATA_URL
}
-N"
)
if
(
WITH_GPU
)
set
(
anakin_test_extra_deps dynload_cuda
)
set
(
ANAKIN_MODLE_URL
"
${
INFERENCE_URL
}
/mobilenet_v2.anakin.bin"
)
execute_process
(
COMMAND bash -c
"cd
${
ANAKIN_SOURCE_DIR
}
; wget -q --no-check-certificate
${
ANAKIN_MODLE_URL
}
-N"
)
cc_test
(
api_anakin_engine_tester SRCS api_anakin_engine_tester.cc
ARGS --model=
${
ANAKIN_SOURCE_DIR
}
/mobilenet_v2.anakin.bin
DEPS inference_anakin_api_shared
${
anakin_test_extra_deps
}
SERIAL
)
endif
()
cc_test
(
api_anakin_engine_rnn_tester SRCS api_anakin_engine_rnn_tester.cc
ARGS --model=
${
ANAKIN_SOURCE_DIR
}
/anakin_test%2Fditu_rnn.anakin2.model.bin
--datapath=
${
ANAKIN_SOURCE_DIR
}
/anakin_test%2Fditu_rnn_data.txt
DEPS inference_anakin_api_shared
dynload_cuda
SERIAL
)
DEPS inference_anakin_api_shared
${
anakin_test_extra_deps
}
SERIAL
)
endif
(
WITH_TESTING
)
endif
()
paddle/fluid/inference/api/api_anakin_engine.cc
浏览文件 @
07f495ec
...
...
@@ -193,7 +193,9 @@ PaddleInferenceAnakinPredictor<Target>::Clone() {
return
std
::
move
(
cls
);
}
#ifdef PADDLE_WITH_CUDA
template
class
PaddleInferenceAnakinPredictor
<
anakin
::
NV
>;
#endif
template
class
PaddleInferenceAnakinPredictor
<
anakin
::
X86
>;
// A factory to help create difference predictor.
...
...
@@ -202,10 +204,15 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
AnakinConfig
,
PaddleEngineKind
::
kAnakin
>
(
const
AnakinConfig
&
config
)
{
VLOG
(
3
)
<<
"Anakin Predictor create."
;
if
(
config
.
target_type
==
AnakinConfig
::
NVGPU
)
{
#ifdef PADDLE_WITH_CUDA
VLOG
(
3
)
<<
"Anakin Predictor create on [ NVIDIA GPU ]."
;
std
::
unique_ptr
<
PaddlePredictor
>
x
(
new
PaddleInferenceAnakinPredictor
<
anakin
::
NV
>
(
config
));
return
x
;
#else
LOG
(
ERROR
)
<<
"AnakinConfig::NVGPU could not used in ONLY-CPU environment"
;
return
nullptr
;
#endif
}
else
if
(
config
.
target_type
==
AnakinConfig
::
X86
)
{
VLOG
(
3
)
<<
"Anakin Predictor create on [ Intel X86 ]."
;
std
::
unique_ptr
<
PaddlePredictor
>
x
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录