Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
07f495ec
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
07f495ec
编写于
9月 07, 2018
作者:
M
minqiyang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into fix_python35_CI_random_fail
上级
2bb0ac92
f76f42c2
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
153 addition
and
82 deletion
+153
-82
CMakeLists.txt
CMakeLists.txt
+3
-1
cmake/external/anakin.cmake
cmake/external/anakin.cmake
+6
-13
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+1
-1
doc/fluid/api/layers.rst
doc/fluid/api/layers.rst
+16
-0
doc/survey/dynamic_graph.md
doc/survey/dynamic_graph.md
+1
-1
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+18
-11
paddle/fluid/inference/analysis/CMakeLists.txt
paddle/fluid/inference/analysis/CMakeLists.txt
+6
-1
paddle/fluid/inference/analysis/analyzer_lac_tester.cc
paddle/fluid/inference/analysis/analyzer_lac_tester.cc
+2
-8
paddle/fluid/inference/analysis/analyzer_ner_tester.cc
paddle/fluid/inference/analysis/analyzer_ner_tester.cc
+28
-8
paddle/fluid/inference/analysis/analyzer_text_classification_tester.cc
...inference/analysis/analyzer_text_classification_tester.cc
+48
-33
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+17
-5
paddle/fluid/inference/api/api_anakin_engine.cc
paddle/fluid/inference/api/api_anakin_engine.cc
+7
-0
未找到文件。
CMakeLists.txt
浏览文件 @
07f495ec
...
...
@@ -213,9 +213,11 @@ include(configure) # add paddle env configuration
if
(
WITH_GPU
)
include
(
cuda
)
include
(
tensorrt
)
endif
()
if
(
WITH_MKL OR WITH_MKLML
)
include
(
external/anakin
)
elseif
()
set
(
WITH_ANAKIN OFF CACHE STRING
"Anakin is used in
GPU
only now."
FORCE
)
set
(
WITH_ANAKIN OFF CACHE STRING
"Anakin is used in
MKL
only now."
FORCE
)
endif
()
include
(
generic
)
# simplify cmake module
...
...
cmake/external/anakin.cmake
浏览文件 @
07f495ec
...
...
@@ -16,16 +16,6 @@ set(ANAKIN_LIBRARY ${ANAKIN_INSTALL_DIR})
set
(
ANAKIN_SHARED_LIB
${
ANAKIN_LIBRARY
}
/libanakin.so
)
set
(
ANAKIN_SABER_LIB
${
ANAKIN_LIBRARY
}
/libanakin_saber_common.so
)
# TODO(luotao): ANAKIN_MODLE_URL etc will move to demo ci later.
set
(
INFERENCE_URL
"http://paddle-inference-dist.bj.bcebos.com"
)
set
(
ANAKIN_MODLE_URL
"
${
INFERENCE_URL
}
/mobilenet_v2.anakin.bin"
)
set
(
ANAKIN_RNN_MODLE_URL
"
${
INFERENCE_URL
}
/anakin_test%2Fditu_rnn.anakin2.model.bin"
)
set
(
ANAKIN_RNN_DATA_URL
"
${
INFERENCE_URL
}
/anakin_test%2Fditu_rnn_data.txt"
)
execute_process
(
COMMAND bash -c
"mkdir -p
${
ANAKIN_SOURCE_DIR
}
"
)
execute_process
(
COMMAND bash -c
"cd
${
ANAKIN_SOURCE_DIR
}
; wget -q --no-check-certificate
${
ANAKIN_MODLE_URL
}
-N"
)
execute_process
(
COMMAND bash -c
"cd
${
ANAKIN_SOURCE_DIR
}
; wget -q --no-check-certificate
${
ANAKIN_RNN_MODLE_URL
}
-N"
)
execute_process
(
COMMAND bash -c
"cd
${
ANAKIN_SOURCE_DIR
}
; wget -q --no-check-certificate
${
ANAKIN_RNN_DATA_URL
}
-N"
)
include_directories
(
${
ANAKIN_INCLUDE
}
)
include_directories
(
${
ANAKIN_INCLUDE
}
/saber/
)
include_directories
(
${
ANAKIN_INCLUDE
}
/saber/core/
)
...
...
@@ -48,6 +38,11 @@ set(ANAKIN_COMPILE_EXTRA_FLAGS
-Wno-reorder
-Wno-error=cpp
)
if
(
WITH_GPU
)
set
(
CMAKE_ARGS_PREFIX -DUSE_GPU_PLACE=YES -DCUDNN_ROOT=
${
CUDNN_ROOT
}
-DCUDNN_INCLUDE_DIR=
${
CUDNN_INCLUDE_DIR
}
)
else
()
set
(
CMAKE_ARGS_PREFIX -DUSE_GPU_PLACE=NO
)
endif
()
ExternalProject_Add
(
extern_anakin
${
EXTERNAL_PROJECT_LOG_ARGS
}
...
...
@@ -56,13 +51,11 @@ ExternalProject_Add(
GIT_TAG
"9424277cf9ae180a14aff09560d3cd60a49c76d2"
PREFIX
${
ANAKIN_SOURCE_DIR
}
UPDATE_COMMAND
""
CMAKE_ARGS
-DUSE_GPU_PLACE=YES
CMAKE_ARGS
${
CMAKE_ARGS_PREFIX
}
-DUSE_X86_PLACE=YES
-DBUILD_WITH_UNIT_TEST=NO
-DPROTOBUF_ROOT=
${
THIRD_PARTY_PATH
}
/install/protobuf
-DMKLML_ROOT=
${
THIRD_PARTY_PATH
}
/install/mklml
-DCUDNN_ROOT=
${
CUDNN_ROOT
}
-DCUDNN_INCLUDE_DIR=
${
CUDNN_INCLUDE_DIR
}
-DENABLE_OP_TIMER=
${
ANAKIN_ENABLE_OP_TIMER
}
${
EXTERNAL_OPTIONAL_ARGS
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
ANAKIN_INSTALL_DIR
}
...
...
cmake/inference_lib.cmake
浏览文件 @
07f495ec
...
...
@@ -145,7 +145,7 @@ copy(memory_lib
set
(
inference_deps paddle_fluid_shared paddle_fluid
)
set
(
module
"inference/api"
)
if
(
WITH_ANAKIN AND WITH_
GPU
)
if
(
WITH_ANAKIN AND WITH_
MKL
)
copy
(
anakin_inference_lib DEPS paddle_inference_api inference_anakin_api
SRCS
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/libinference_anakin_api*
# compiled anakin api
...
...
doc/fluid/api/layers.rst
浏览文件 @
07f495ec
...
...
@@ -822,6 +822,14 @@ pad
.. autofunction:: paddle.fluid.layers.pad
:noindex:
.. _api_fluid_layers_pad_constant_like:
pad_constant_like
---
.. autofunction:: paddle.fluid.layers.pad_constant_like
:noindex:
.. _api_fluid_layers_label_smooth:
label_smooth
...
...
@@ -1145,6 +1153,14 @@ sigmoid
.. autofunction:: paddle.fluid.layers.sigmoid
:noindex:
.. _api_fluid_layers_hsigmoid:
hsigmoid
-------
.. autofunction:: paddle.fluid.layers.hsigmoid
:noindex:
.. _api_fluid_layers_logsigmoid:
logsigmoid
...
...
doc/survey/dynamic_graph.md
浏览文件 @
07f495ec
...
...
@@ -30,7 +30,7 @@ x = Variable(randn(20, 1)))
label
=
Variable
(
randint
(
1
))
W_1
,
W_2
=
Variable
(
randn
(
20
,
20
)),
Variable
(
randn
(
10
,
20
))
h
=
matmul
(
W_1
,
x
)
pred
=
matmul
(
W_2
,
x
)
pred
=
matmul
(
W_2
,
h
)
loss
=
softmax
(
pred
,
label
)
loss
.
backward
()
```
...
...
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
07f495ec
set
(
pass_file
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/paddle_inference_pass.h
)
file
(
WRITE
${
pass_file
}
"// Generated by the paddle/fluid/framework/ir/CMakeLists.txt. DO NOT EDIT!
\n\n
"
)
file
(
APPEND
${
pass_file
}
"
\#
include
\"
paddle/fluid/framework/ir/pass.h
\"\n
"
)
function
(
pass_library TARGET
)
# Usage: pass_library(target inference) will append to paddle_inference_pass.h
function
(
pass_library TARGET DEST
)
set
(
options
""
)
set
(
oneValueArgs
""
)
set
(
multiValueArgs SRCS DEPS
)
cmake_parse_arguments
(
op_library
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cc_library
(
${
TARGET
}
SRCS
${
TARGET
}
.cc DEPS graph_pattern_detector pass
)
file
(
APPEND
${
pass_file
}
"USE_PASS(
${
TARGET
}
);
\n
"
)
set
(
PASS_LIBRARY
${
TARGET
}
${
PASS_LIBRARY
}
PARENT_SCOPE
)
cc_library
(
${
TARGET
}
SRCS
${
TARGET
}
.cc DEPS graph_pattern_detector pass
${
op_library_DEPS
}
)
# add more DEST here, such as train, dist and collect USE_PASS into a file automatically.
if
(
${
DEST
}
STREQUAL
"base"
OR
${
DEST
}
STREQUAL
"inference"
)
message
(
STATUS
"add pass
${
TARGET
}
${
DEST
}
"
)
file
(
APPEND
${
pass_file
}
"USE_PASS(
${
TARGET
}
);
\n
"
)
set
(
PASS_LIBRARY
${
TARGET
}
${
PASS_LIBRARY
}
PARENT_SCOPE
)
endif
()
endfunction
()
cc_library
(
node SRCS node.cc DEPS proto_desc
)
...
...
@@ -18,13 +25,13 @@ cc_library(pass SRCS pass.cc DEPS graph node graph_helper)
cc_library
(
graph_traits SRCS graph_traits.cc DEPS graph
)
cc_library
(
graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph graph_helper graph_traits
)
pass_library
(
graph_to_program_pass
)
pass_library
(
graph_viz_pass
)
pass_library
(
fc_fuse_pass
)
pass_library
(
attention_lstm_fuse_pass
)
pass_library
(
infer_clean_graph_pass
)
pass_library
(
fc_lstm_fuse_pass
)
pass_library
(
seq_concat_fc_fuse_pass
)
pass_library
(
graph_to_program_pass
base
)
pass_library
(
graph_viz_pass
base
)
pass_library
(
fc_fuse_pass
inference
)
pass_library
(
attention_lstm_fuse_pass
inference
)
pass_library
(
infer_clean_graph_pass
inference
)
pass_library
(
fc_lstm_fuse_pass
inference
)
pass_library
(
seq_concat_fc_fuse_pass
inference
)
set
(
GLOB_PASS_LIB
${
PASS_LIBRARY
}
CACHE INTERNAL
"Global PASS library"
)
cc_test
(
pass_test SRCS pass_test.cc DEPS graph pass graph_helper
)
...
...
paddle/fluid/inference/analysis/CMakeLists.txt
浏览文件 @
07f495ec
...
...
@@ -100,12 +100,17 @@ inference_analysis_test(test_analyzer_lac SRCS analyzer_lac_tester.cc
set
(
TEXT_CLASSIFICATION_MODEL_URL
"http://paddle-inference-dist.bj.bcebos.com/text-classification-Senta.tar.gz"
)
set
(
TEXT_CLASSIFICATION_DATA_URL
"http://paddle-inference-dist.bj.bcebos.com/text_classification_data.txt.tar.gz"
)
set
(
TEXT_CLASSIFICATION_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo/text_classification"
CACHE PATH
"Text Classification model and data root."
FORCE
)
if
(
NOT EXISTS
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
AND WITH_TESTING AND WITH_INFERENCE
)
inference_download_and_uncompress
(
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
${
TEXT_CLASSIFICATION_MODEL_URL
}
"text-classification-Senta.tar.gz"
)
inference_download_and_uncompress
(
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
${
TEXT_CLASSIFICATION_DATA_URL
}
"text_classification_data.txt.tar.gz"
)
endif
()
inference_analysis_test
(
test_text_classification SRCS analyzer_text_classification_tester.cc
EXTRA_DEPS paddle_inference_api paddle_fluid_api analysis_predictor
ARGS --infer_model=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/text-classification-Senta
)
ARGS --infer_model=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/text-classification-Senta
--infer_data=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/data.txt
--topn=1
# Just run top 1 batch.
)
paddle/fluid/inference/analysis/analyzer_lac_tester.cc
浏览文件 @
07f495ec
...
...
@@ -114,12 +114,6 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
PADDLE_ENFORCE_EQ
(
batch_size
,
static_cast
<
int
>
(
one_batch
.
lod
.
size
()
-
1
));
input_slots
->
assign
({
input_tensor
});
}
static
void
PrintTime
(
const
double
latency
,
const
int
bs
,
const
int
repeat
)
{
LOG
(
INFO
)
<<
"===========profile result==========="
;
LOG
(
INFO
)
<<
"batch_size: "
<<
bs
<<
", repeat: "
<<
repeat
<<
", avg latency: "
<<
latency
/
repeat
<<
"ms"
;
LOG
(
INFO
)
<<
"====================================="
;
}
void
BenchAllData
(
const
std
::
string
&
model_path
,
const
std
::
string
&
data_file
,
const
int
batch_size
,
const
int
repeat
)
{
NativeConfig
config
;
...
...
@@ -145,7 +139,7 @@ void BenchAllData(const std::string &model_path, const std::string &data_file,
sum
+=
timer
.
toc
();
}
}
PrintTime
(
sum
,
batch_size
,
repeat
);
PrintTime
(
batch_size
,
repeat
,
1
,
0
,
sum
/
repeat
);
}
const
int64_t
lac_ref_data
[]
=
{
24
,
25
,
25
,
25
,
38
,
30
,
31
,
14
,
15
,
44
,
24
,
25
,
25
,
25
,
25
,
25
,
44
,
24
,
25
,
25
,
25
,
36
,
42
,
43
,
...
...
@@ -176,7 +170,7 @@ void TestLACPrediction(const std::string &model_path,
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs_slots
);
}
PrintTime
(
timer
.
toc
(),
batch_size
,
repeat
);
PrintTime
(
batch_size
,
repeat
,
1
,
0
,
timer
.
toc
()
/
repeat
);
EXPECT_EQ
(
outputs_slots
.
size
(),
1UL
);
auto
&
out
=
outputs_slots
[
0
];
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
...
...
paddle/fluid/inference/analysis/analyzer_ner_tester.cc
浏览文件 @
07f495ec
...
...
@@ -25,6 +25,7 @@ DEFINE_string(infer_model, "", "model path");
DEFINE_string
(
infer_data
,
""
,
"data path"
);
DEFINE_int32
(
batch_size
,
10
,
"batch size."
);
DEFINE_int32
(
repeat
,
1
,
"Running the inference program repeat times."
);
DEFINE_bool
(
test_all_data
,
false
,
"Test the all dataset in data file."
);
namespace
paddle
{
namespace
inference
{
...
...
@@ -35,6 +36,7 @@ struct DataRecord {
std
::
vector
<
size_t
>
lod
;
// two inputs have the same lod info.
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
size_t
num_samples
;
// total number of samples
DataRecord
()
=
default
;
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
:
batch_size
(
batch_size
)
{
...
...
@@ -81,6 +83,7 @@ struct DataRecord {
word_data_all
.
push_back
(
std
::
move
(
word_data
));
mention_data_all
.
push_back
(
std
::
move
(
mention_data
));
}
num_samples
=
num_lines
;
}
};
...
...
@@ -120,21 +123,38 @@ void TestChineseNERPrediction() {
auto
predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
std
::
vector
<
PaddleTensor
>
input_slots
;
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
std
::
vector
<
PaddleTensor
>
outputs
;
Timer
timer
;
if
(
FLAGS_test_all_data
)
{
LOG
(
INFO
)
<<
"test all data"
;
double
sum
=
0
;
size_t
num_samples
;
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
num_samples
=
data
.
num_samples
;
for
(
size_t
bid
=
0
;
bid
<
num_samples
;
++
bid
)
{
PrepareInputs
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
timer
.
tic
();
predictor
->
Run
(
input_slots
,
&
outputs
);
sum
+=
timer
.
toc
();
}
}
LOG
(
INFO
)
<<
"total number of samples: "
<<
num_samples
;
PrintTime
(
FLAGS_batch_size
,
FLAGS_repeat
,
1
,
0
,
sum
/
FLAGS_repeat
);
LOG
(
INFO
)
<<
"average latency of each sample: "
<<
sum
/
FLAGS_repeat
/
num_samples
;
return
;
}
// Prepare inputs.
DataRecord
data
(
FLAGS_infer_data
,
FLAGS_batch_size
);
PrepareInputs
(
&
input_slots
,
&
data
,
FLAGS_batch_size
);
std
::
vector
<
PaddleTensor
>
outputs
;
Timer
timer
;
timer
.
tic
();
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs
);
}
LOG
(
INFO
)
<<
"===========profile result==========="
;
LOG
(
INFO
)
<<
"batch_size: "
<<
FLAGS_batch_size
<<
", repeat: "
<<
FLAGS_repeat
<<
", latency: "
<<
timer
.
toc
()
/
FLAGS_repeat
<<
"ms"
;
LOG
(
INFO
)
<<
"====================================="
;
PrintTime
(
FLAGS_batch_size
,
FLAGS_repeat
,
1
,
0
,
timer
.
toc
()
/
FLAGS_repeat
);
PADDLE_ENFORCE
(
outputs
.
size
(),
1UL
);
auto
&
out
=
outputs
[
0
];
...
...
paddle/fluid/inference/analysis/analyzer_text_classification_tester.cc
浏览文件 @
07f495ec
...
...
@@ -16,8 +16,10 @@
#include <gflags/gflags.h>
#include <glog/logging.h> // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files.
#include <gtest/gtest.h>
#include <fstream>
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/api/timer.h"
...
...
@@ -26,60 +28,72 @@ DEFINE_string(infer_model, "", "Directory of the inference model.");
DEFINE_string
(
infer_data
,
""
,
"Path of the dataset."
);
DEFINE_int32
(
batch_size
,
1
,
"batch size."
);
DEFINE_int32
(
repeat
,
1
,
"How many times to repeat run."
);
DEFINE_int32
(
topn
,
-
1
,
"Run top n batches of data to save time"
);
namespace
paddle
{
namespace
inference
{
template
<
typename
T
>
std
::
string
to_string
(
const
std
::
vector
<
T
>
&
vec
)
{
std
::
stringstream
ss
;
for
(
const
auto
&
c
:
vec
)
{
ss
<<
c
<<
" "
;
}
return
ss
.
str
();
}
struct
DataReader
{
explicit
DataReader
(
const
std
::
string
&
path
)
:
file
(
new
std
::
ifstream
(
path
))
{}
void
PrintTime
(
const
double
latency
,
const
int
bs
,
const
int
repeat
)
{
LOG
(
INFO
)
<<
"===========profile result==========="
;
LOG
(
INFO
)
<<
"batch_size: "
<<
bs
<<
", repeat: "
<<
repeat
<<
", avg latency: "
<<
latency
/
repeat
<<
"ms"
;
LOG
(
INFO
)
<<
"====================================="
;
}
bool
NextBatch
(
PaddleTensor
*
tensor
,
int
batch_size
)
{
PADDLE_ENFORCE_EQ
(
batch_size
,
1
)
;
std
::
string
line
;
tensor
->
lod
.
clear
()
;
tensor
->
lod
.
emplace_back
(
std
::
vector
<
size_t
>
({
0
}))
;
std
::
vector
<
int64_t
>
data
;
void
Main
(
int
batch_size
)
{
// Three sequence inputs.
std
::
vector
<
PaddleTensor
>
input_slots
(
1
);
// one batch starts
// data --
int64_t
data0
[]
=
{
0
,
1
,
2
};
for
(
auto
&
input
:
input_slots
)
{
input
.
data
.
Reset
(
data0
,
sizeof
(
data0
));
input
.
shape
=
std
::
vector
<
int
>
({
3
,
1
});
// dtype --
input
.
dtype
=
PaddleDType
::
INT64
;
// LoD --
input
.
lod
=
std
::
vector
<
std
::
vector
<
size_t
>>
({{
0
,
3
}});
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
if
(
!
std
::
getline
(
*
file
,
line
))
return
false
;
inference
::
split_to_int64
(
line
,
' '
,
&
data
);
}
tensor
->
lod
.
front
().
push_back
(
data
.
size
());
tensor
->
data
.
Resize
(
data
.
size
()
*
sizeof
(
int64_t
));
memcpy
(
tensor
->
data
.
data
(),
data
.
data
(),
data
.
size
()
*
sizeof
(
int64_t
));
tensor
->
shape
.
clear
();
tensor
->
shape
.
push_back
(
data
.
size
());
tensor
->
shape
.
push_back
(
1
);
return
true
;
}
std
::
unique_ptr
<
std
::
ifstream
>
file
;
};
void
Main
(
int
batch_size
)
{
// shape --
// Create Predictor --
AnalysisConfig
config
;
config
.
model_dir
=
FLAGS_infer_model
;
config
.
use_gpu
=
false
;
config
.
enable_ir_optim
=
true
;
config
.
ir_passes
.
push_back
(
"fc_lstm_fuse_pass"
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
std
::
vector
<
PaddleTensor
>
input_slots
(
1
);
// one batch starts
// data --
auto
&
input
=
input_slots
[
0
];
input
.
dtype
=
PaddleDType
::
INT64
;
inference
::
Timer
timer
;
double
sum
=
0
;
std
::
vector
<
PaddleTensor
>
output_slots
;
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
timer
.
tic
();
CHECK
(
predictor
->
Run
(
input_slots
,
&
output_slots
));
sum
+=
timer
.
toc
();
int
num_batches
=
0
;
for
(
int
t
=
0
;
t
<
FLAGS_repeat
;
t
++
)
{
DataReader
reader
(
FLAGS_infer_data
);
while
(
reader
.
NextBatch
(
&
input
,
FLAGS_batch_size
))
{
if
(
FLAGS_topn
>
0
&&
num_batches
>
FLAGS_topn
)
break
;
timer
.
tic
();
CHECK
(
predictor
->
Run
(
input_slots
,
&
output_slots
));
sum
+=
timer
.
toc
();
++
num_batches
;
}
}
PrintTime
(
sum
,
batch_size
,
FLAGS_repeat
);
PrintTime
(
batch_size
,
FLAGS_repeat
,
1
,
0
,
sum
/
FLAGS_repeat
);
// Get output
LOG
(
INFO
)
<<
"get outputs "
<<
output_slots
.
size
();
...
...
@@ -100,4 +114,5 @@ void Main(int batch_size) {
TEST
(
text_classification
,
basic
)
{
Main
(
FLAGS_batch_size
);
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
07f495ec
...
...
@@ -61,7 +61,7 @@ cc_library(paddle_inference_tensorrt_subgraph_engine
inference_api_test
(
test_api_tensorrt_subgraph_engine SRC api_tensorrt_subgraph_engine_tester.cc ARGS test_word2vec
)
endif
()
if
(
WITH_ANAKIN AND WITH_
GPU
)
# only needed in CI
if
(
WITH_ANAKIN AND WITH_
MKL
)
# only needed in CI
# compile the libinference_anakin_api.a and anakin.so.
cc_library
(
inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber mklml
)
cc_library
(
inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber
)
...
...
@@ -71,12 +71,24 @@ if (WITH_ANAKIN AND WITH_GPU) # only needed in CI
anakin_target
(
inference_anakin_api
)
anakin_target
(
inference_anakin_api_shared
)
if
(
WITH_TESTING
)
cc_test
(
api_anakin_engine_tester SRCS api_anakin_engine_tester.cc
ARGS --model=
${
ANAKIN_SOURCE_DIR
}
/mobilenet_v2.anakin.bin
DEPS inference_anakin_api_shared dynload_cuda SERIAL
)
# TODO(luotao): ANAKIN_MODLE_URL etc will move to demo ci later.
set
(
INFERENCE_URL
"http://paddle-inference-dist.bj.bcebos.com"
)
set
(
ANAKIN_RNN_MODLE_URL
"
${
INFERENCE_URL
}
/anakin_test%2Fditu_rnn.anakin2.model.bin"
)
set
(
ANAKIN_RNN_DATA_URL
"
${
INFERENCE_URL
}
/anakin_test%2Fditu_rnn_data.txt"
)
execute_process
(
COMMAND bash -c
"mkdir -p
${
ANAKIN_SOURCE_DIR
}
"
)
execute_process
(
COMMAND bash -c
"cd
${
ANAKIN_SOURCE_DIR
}
; wget -q --no-check-certificate
${
ANAKIN_RNN_MODLE_URL
}
-N"
)
execute_process
(
COMMAND bash -c
"cd
${
ANAKIN_SOURCE_DIR
}
; wget -q --no-check-certificate
${
ANAKIN_RNN_DATA_URL
}
-N"
)
if
(
WITH_GPU
)
set
(
anakin_test_extra_deps dynload_cuda
)
set
(
ANAKIN_MODLE_URL
"
${
INFERENCE_URL
}
/mobilenet_v2.anakin.bin"
)
execute_process
(
COMMAND bash -c
"cd
${
ANAKIN_SOURCE_DIR
}
; wget -q --no-check-certificate
${
ANAKIN_MODLE_URL
}
-N"
)
cc_test
(
api_anakin_engine_tester SRCS api_anakin_engine_tester.cc
ARGS --model=
${
ANAKIN_SOURCE_DIR
}
/mobilenet_v2.anakin.bin
DEPS inference_anakin_api_shared
${
anakin_test_extra_deps
}
SERIAL
)
endif
()
cc_test
(
api_anakin_engine_rnn_tester SRCS api_anakin_engine_rnn_tester.cc
ARGS --model=
${
ANAKIN_SOURCE_DIR
}
/anakin_test%2Fditu_rnn.anakin2.model.bin
--datapath=
${
ANAKIN_SOURCE_DIR
}
/anakin_test%2Fditu_rnn_data.txt
DEPS inference_anakin_api_shared
dynload_cuda
SERIAL
)
DEPS inference_anakin_api_shared
${
anakin_test_extra_deps
}
SERIAL
)
endif
(
WITH_TESTING
)
endif
()
paddle/fluid/inference/api/api_anakin_engine.cc
浏览文件 @
07f495ec
...
...
@@ -193,7 +193,9 @@ PaddleInferenceAnakinPredictor<Target>::Clone() {
return
std
::
move
(
cls
);
}
#ifdef PADDLE_WITH_CUDA
template
class
PaddleInferenceAnakinPredictor
<
anakin
::
NV
>;
#endif
template
class
PaddleInferenceAnakinPredictor
<
anakin
::
X86
>;
// A factory to help create difference predictor.
...
...
@@ -202,10 +204,15 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
AnakinConfig
,
PaddleEngineKind
::
kAnakin
>
(
const
AnakinConfig
&
config
)
{
VLOG
(
3
)
<<
"Anakin Predictor create."
;
if
(
config
.
target_type
==
AnakinConfig
::
NVGPU
)
{
#ifdef PADDLE_WITH_CUDA
VLOG
(
3
)
<<
"Anakin Predictor create on [ NVIDIA GPU ]."
;
std
::
unique_ptr
<
PaddlePredictor
>
x
(
new
PaddleInferenceAnakinPredictor
<
anakin
::
NV
>
(
config
));
return
x
;
#else
LOG
(
ERROR
)
<<
"AnakinConfig::NVGPU could not used in ONLY-CPU environment"
;
return
nullptr
;
#endif
}
else
if
(
config
.
target_type
==
AnakinConfig
::
X86
)
{
VLOG
(
3
)
<<
"Anakin Predictor create on [ Intel X86 ]."
;
std
::
unique_ptr
<
PaddlePredictor
>
x
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录