Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
875a07c3
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
875a07c3
编写于
1月 07, 2019
作者:
Y
Yan Chunwei
提交者:
GitHub
1月 07, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor inference analysis api (#14634)
上级
99e6e8b0
变更
27
显示空白变更内容
内联
并排
Showing
27 changed file
with
418 addition
and
256 deletion
+418
-256
cmake/configure.cmake
cmake/configure.cmake
+1
-0
paddle/fluid/framework/naive_executor.cc
paddle/fluid/framework/naive_executor.cc
+8
-8
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+154
-66
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+46
-37
paddle/fluid/inference/api/analysis_predictor_tester.cc
paddle/fluid/inference/api/analysis_predictor_tester.cc
+15
-15
paddle/fluid/inference/api/api_anakin_engine.h
paddle/fluid/inference/api/api_anakin_engine.h
+0
-2
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+1
-1
paddle/fluid/inference/api/api_impl_tester.cc
paddle/fluid/inference/api/api_impl_tester.cc
+2
-1
paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
+4
-5
paddle/fluid/inference/api/demo_ci/vis_demo.cc
paddle/fluid/inference/api/demo_ci/vis_demo.cc
+6
-7
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+88
-21
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+2
-3
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+11
-1
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
+3
-6
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
+4
-5
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
+4
-5
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
+5
-6
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
+4
-6
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+14
-14
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
+4
-6
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
+4
-5
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
+3
-6
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
...nference/tests/api/analyzer_text_classification_tester.cc
+4
-5
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
+5
-6
paddle/fluid/inference/tests/api/config_printer.h
paddle/fluid/inference/tests/api/config_printer.h
+10
-6
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+4
-1
paddle/fluid/inference/tests/api/trt_models_tester.cc
paddle/fluid/inference/tests/api/trt_models_tester.cc
+12
-12
未找到文件。
cmake/configure.cmake
浏览文件 @
875a07c3
...
@@ -134,6 +134,7 @@ if(WITH_GPU)
...
@@ -134,6 +134,7 @@ if(WITH_GPU)
message
(
WARNING
"Anakin needs CUDNN >= 7.0 to compile. Force WITH_ANAKIN=OFF"
)
message
(
WARNING
"Anakin needs CUDNN >= 7.0 to compile. Force WITH_ANAKIN=OFF"
)
set
(
WITH_ANAKIN OFF CACHE STRING
"Anakin is valid only when CUDNN >= 7.0."
FORCE
)
set
(
WITH_ANAKIN OFF CACHE STRING
"Anakin is valid only when CUDNN >= 7.0."
FORCE
)
endif
()
endif
()
add_definitions
(
-DWITH_ANAKIN
)
endif
()
endif
()
if
(
WITH_ANAKIN
)
if
(
WITH_ANAKIN
)
# NOTICE(minqiyang): the end slash is important because $CUDNN_INCLUDE_DIR
# NOTICE(minqiyang): the end slash is important because $CUDNN_INCLUDE_DIR
...
...
paddle/fluid/framework/naive_executor.cc
浏览文件 @
875a07c3
...
@@ -40,12 +40,12 @@ void NaiveExecutor::Prepare(Scope *scope, const ProgramDesc &program_desc,
...
@@ -40,12 +40,12 @@ void NaiveExecutor::Prepare(Scope *scope, const ProgramDesc &program_desc,
void
NaiveExecutor
::
Run
()
{
void
NaiveExecutor
::
Run
()
{
#ifndef PADDLE_ON_INFERENCE
#ifndef PADDLE_ON_INFERENCE
LOG_FIRST_N
(
WARNING
,
1
5
)
<<
"The NaiveExecutor can not work properly if the "
LOG_FIRST_N
(
WARNING
,
5
)
<<
"The NaiveExecutor can not work properly if the "
"cmake flag ON_INFER is not set."
;
"cmake flag ON_INFER is not set."
;
LOG_FIRST_N
(
WARNING
,
1
5
)
<<
"Unlike the training phase, all the scopes and "
LOG_FIRST_N
(
WARNING
,
5
)
<<
"Unlike the training phase, all the scopes and "
"variables will be reused to save the allocation "
"variables will be reused to save the allocation "
"overhead."
;
"overhead."
;
LOG_FIRST_N
(
WARNING
,
1
5
)
<<
"Please re-compile the inference library by "
LOG_FIRST_N
(
WARNING
,
5
)
<<
"Please re-compile the inference library by "
"setting the cmake flag ON_INFER=ON if you are "
"setting the cmake flag ON_INFER=ON if you are "
"running Paddle Inference"
;
"running Paddle Inference"
;
#endif // PADDLE_ON_INFERENCE
#endif // PADDLE_ON_INFERENCE
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
875a07c3
...
@@ -14,86 +14,101 @@
...
@@ -14,86 +14,101 @@
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_pass_builder.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle
_pass_builder.h" // NOLINT
#include "paddle
/fluid/platform/gpu_info.h"
namespace
paddle
{
namespace
paddle
{
PassStrategy
*
contrib
::
AnalysisConfig
::
pass_builder
()
const
{
PassStrategy
*
contrib
::
AnalysisConfig
::
pass_builder
()
const
{
PADDLE_ENFORCE
(
if
(
!
pass_builder_
.
get
())
{
pass_builder_
.
get
(),
if
(
use_gpu_
)
{
"Should call constructor first, that will init the pass_builder_."
);
LOG
(
INFO
)
<<
"Create GPU IR passes"
;
return
pass_builder_
.
get
();
}
contrib
::
AnalysisConfig
::
AnalysisConfig
(
bool
use_gpu
)
{
this
->
use_gpu
=
use_gpu
;
if
(
use_gpu
)
{
pass_builder_
.
reset
(
new
GpuPassStrategy
);
pass_builder_
.
reset
(
new
GpuPassStrategy
);
}
else
{
}
else
{
LOG
(
INFO
)
<<
"Create CPU IR passes"
;
pass_builder_
.
reset
(
new
CpuPassStrategy
);
pass_builder_
.
reset
(
new
CpuPassStrategy
);
}
}
}
else
if
(
pass_builder_
->
use_gpu
()
^
use_gpu
())
{
LOG
(
WARNING
)
<<
"The use_gpu flag is not compatible between Config and "
"PassBuilder, the flags are "
<<
use_gpu
()
<<
" "
<<
pass_builder_
->
use_gpu
();
LOG
(
WARNING
)
<<
"Please make them compatible, still use the existing "
"PassBuilder."
;
}
return
pass_builder_
.
get
();
}
contrib
::
AnalysisConfig
::
AnalysisConfig
(
const
std
::
string
&
model_dir
)
{
model_dir_
=
model_dir
;
}
contrib
::
AnalysisConfig
::
AnalysisConfig
(
const
std
::
string
&
prog_file
,
const
std
::
string
&
params_file
)
{
prog_file_
=
prog_file
;
params_file_
=
params_file
;
}
}
void
contrib
::
AnalysisConfig
::
SetModel
(
const
std
::
string
&
prog_file_path
,
const
std
::
string
&
params_file_path
)
{
prog_file_
=
prog_file_path
;
params_file_
=
params_file_path
;
}
void
contrib
::
AnalysisConfig
::
EnableUseGpu
(
uint64_t
memory_pool_init_size_mb
,
int
device_id
)
{
#ifdef PADDLE_WITH_CUDA
use_gpu_
=
true
;
memory_pool_init_size_mb_
=
memory_pool_init_size_mb
;
device_id_
=
device_id
;
#else
LOG
(
ERROR
)
<<
"Please compile with gpu to EnableGpu"
;
use_gpu_
=
false
;
#endif
}
void
contrib
::
AnalysisConfig
::
DisableGpu
()
{
use_gpu_
=
false
;
}
contrib
::
AnalysisConfig
::
AnalysisConfig
(
const
contrib
::
AnalysisConfig
&
other
)
{
contrib
::
AnalysisConfig
::
AnalysisConfig
(
const
contrib
::
AnalysisConfig
&
other
)
{
// fields from Config
#define CP_MEMBER(member__) member__ = other.member__;
model_dir
=
other
.
model_dir
;
// fields from NativeConfig
// Model related.
use_gpu
=
other
.
use_gpu
;
CP_MEMBER
(
model_dir_
);
device
=
other
.
device
;
CP_MEMBER
(
prog_file_
);
fraction_of_gpu_memory
=
other
.
fraction_of_gpu_memory
;
CP_MEMBER
(
params_file_
);
prog_file
=
other
.
prog_file
;
CP_MEMBER
(
model_from_memory_
);
// the memory model reuses prog_file_ and
param_file
=
other
.
param_file
;
// params_file_ fields.
specify_input_name
=
other
.
specify_input_name
;
// Gpu releated.
cpu_math_library_num_threads_
=
other
.
cpu_math_library_num_threads_
;
CP_MEMBER
(
use_gpu_
);
// fields from this.
CP_MEMBER
(
device_id_
);
enable_ir_optim
=
other
.
enable_ir_optim
;
CP_MEMBER
(
memory_pool_init_size_mb_
);
// For mkldnn
// TensorRT releated.
use_mkldnn_
=
other
.
use_mkldnn_
;
CP_MEMBER
(
use_tensorrt_
);
mkldnn_enabled_op_types_
=
other
.
mkldnn_enabled_op_types_
;
CP_MEMBER
(
tensorrt_workspace_size_
);
CP_MEMBER
(
tensorrt_max_batchsize_
);
use_feed_fetch_ops
=
other
.
use_feed_fetch_ops
;
CP_MEMBER
(
tensorrt_min_subgraph_size_
);
use_tensorrt_
=
other
.
use_tensorrt_
;
// MKLDNN releated.
tensorrt_max_batchsize_
=
other
.
tensorrt_max_batchsize_
;
CP_MEMBER
(
use_mkldnn_
);
tensorrt_workspace_size_
=
other
.
tensorrt_workspace_size_
;
CP_MEMBER
(
mkldnn_enabled_op_types_
);
tensorrt_min_subgraph_size_
=
other
.
tensorrt_min_subgraph_size_
;
model_from_memory_
=
other
.
model_from_memory_
;
// Ir related.
CP_MEMBER
(
enable_ir_optim_
);
if
(
use_gpu
)
{
CP_MEMBER
(
use_feed_fetch_ops_
);
CP_MEMBER
(
ir_debug_
);
CP_MEMBER
(
specify_input_name_
);
CP_MEMBER
(
cpu_math_library_num_threads_
);
CP_MEMBER
(
serialized_info_cache_
);
if
(
use_gpu_
)
{
pass_builder_
.
reset
(
new
GpuPassStrategy
(
pass_builder_
.
reset
(
new
GpuPassStrategy
(
*
static_cast
<
GpuPassStrategy
*>
(
other
.
pass_builder
())));
*
static_cast
<
GpuPassStrategy
*>
(
other
.
pass_builder
())));
}
else
{
}
else
{
pass_builder_
.
reset
(
new
CpuPassStrategy
(
pass_builder_
.
reset
(
new
CpuPassStrategy
(
*
static_cast
<
CpuPassStrategy
*>
(
other
.
pass_builder
())));
*
static_cast
<
CpuPassStrategy
*>
(
other
.
pass_builder
())));
}
}
}
contrib
::
AnalysisConfig
::
AnalysisConfig
(
contrib
::
AnalysisConfig
&&
other
)
{
#undef CP_MEMBER
// fields from Config
model_dir
=
other
.
model_dir
;
// fields from NativeConfig
use_gpu
=
other
.
use_gpu
;
device
=
other
.
device
;
fraction_of_gpu_memory
=
other
.
fraction_of_gpu_memory
;
prog_file
=
other
.
prog_file
;
param_file
=
other
.
param_file
;
specify_input_name
=
other
.
specify_input_name
;
cpu_math_library_num_threads_
=
other
.
cpu_math_library_num_threads_
;
// fields from this.
enable_ir_optim
=
other
.
enable_ir_optim
;
// For mkldnn
use_mkldnn_
=
other
.
use_mkldnn_
;
mkldnn_enabled_op_types_
=
other
.
mkldnn_enabled_op_types_
;
use_feed_fetch_ops
=
other
.
use_feed_fetch_ops
;
use_tensorrt_
=
other
.
use_tensorrt_
;
tensorrt_max_batchsize_
=
other
.
tensorrt_max_batchsize_
;
tensorrt_workspace_size_
=
other
.
tensorrt_workspace_size_
;
tensorrt_min_subgraph_size_
=
other
.
tensorrt_min_subgraph_size_
;
model_from_memory_
=
other
.
model_from_memory_
;
pass_builder_
=
std
::
move
(
other
.
pass_builder_
);
}
}
void
contrib
::
AnalysisConfig
::
EnableMKLDNN
()
{
void
contrib
::
AnalysisConfig
::
EnableMKLDNN
()
{
...
@@ -112,17 +127,90 @@ void contrib::AnalysisConfig::EnableTensorRtEngine(int workspace_size,
...
@@ -112,17 +127,90 @@ void contrib::AnalysisConfig::EnableTensorRtEngine(int workspace_size,
use_tensorrt_
=
true
;
use_tensorrt_
=
true
;
tensorrt_workspace_size_
=
workspace_size
;
tensorrt_workspace_size_
=
workspace_size
;
tensorrt_max_batchsize_
=
max_batch_size
;
tensorrt_max_batchsize_
=
max_batch_size
;
tensorrt_min_subgraph_size_
=
min_subgraph_size
;
}
// Append after the conv+affine_channel fuse pass.
pass_builder
()
->
InsertPass
(
3
,
"tensorrt_subgraph_pass"
);
void
contrib
::
AnalysisConfig
::
Update
()
{
auto
info
=
SerializeInfoCache
();
if
(
info
==
serialized_info_cache_
)
return
;
if
(
use_gpu_
)
{
pass_builder_
.
reset
(
new
GpuPassStrategy
);
}
else
{
pass_builder_
.
reset
(
new
CpuPassStrategy
);
}
if
(
use_tensorrt_
)
{
if
(
!
use_gpu_
)
{
LOG
(
ERROR
)
<<
"TensorRT engine is not available when EnableGpu() not actived."
;
}
else
{
// Append after the infer_clean pass.
pass_builder
()
->
InsertPass
(
1
,
"tensorrt_subgraph_pass"
);
}
}
if
(
use_mkldnn_
)
{
if
(
!
enable_ir_optim_
)
{
LOG
(
ERROR
)
<<
"EnableMKLDNN() only works when IR optimization is enabled."
;
}
#ifdef PADDLE_WITH_MKLDNN
pass_builder
()
->
EnableMKLDNN
();
use_mkldnn_
=
true
;
#else
LOG
(
ERROR
)
<<
"Please compile with MKLDNN first to use MKLDNN"
;
use_mkldnn_
=
false
;
#endif
}
if
(
ir_debug_
)
{
pass_builder
()
->
TurnOnDebug
();
}
}
std
::
string
contrib
::
AnalysisConfig
::
SerializeInfoCache
()
{
std
::
stringstream
ss
;
ss
<<
use_gpu_
;
ss
<<
memory_pool_init_size_mb_
;
ss
<<
use_tensorrt_
;
ss
<<
tensorrt_workspace_size_
;
ss
<<
tensorrt_max_batchsize_
;
ss
<<
use_mkldnn_
;
ss
<<
enable_ir_optim_
;
ss
<<
use_feed_fetch_ops_
;
ss
<<
ir_debug_
;
return
ss
.
str
();
}
void
contrib
::
AnalysisConfig
::
SetCpuMathLibraryNumThreads
(
int
cpu_math_library_num_threads
)
{
cpu_math_library_num_threads_
=
cpu_math_library_num_threads
;
}
float
contrib
::
AnalysisConfig
::
fraction_of_gpu_memory_for_pool
()
const
{
#ifdef PADDLE_WITH_CUDA
// Get the GPU memory details and calculate the fraction of memory for the
// GPU memory pool.
size_t
gpu_used
,
gpu_available
;
platform
::
GpuMemoryUsage
(
&
gpu_used
,
&
gpu_available
);
double
total_gpu_memory
=
(
gpu_used
+
gpu_available
)
/
1024.
/
1024.
;
float
fraction_of_gpu_memory
=
static_cast
<
double
>
(
memory_pool_init_size_mb
())
/
total_gpu_memory
;
return
fraction_of_gpu_memory
;
#else
return
0.
;
#endif
}
}
void
contrib
::
AnalysisConfig
::
SetModelBuffer
(
const
char
*
prog_buffer
,
void
contrib
::
AnalysisConfig
::
SetModelBuffer
(
const
char
*
prog_buffer
,
size_t
prog_buffer_size
,
size_t
prog_buffer_size
,
const
char
*
param_buffer
,
const
char
*
param_buffer
,
size_t
param_buffer_size
)
{
size_t
param_buffer_size
)
{
prog_file
=
std
::
string
(
prog_buffer
,
prog_buffer
+
prog_buffer_size
);
prog_file
_
=
std
::
string
(
prog_buffer
,
prog_buffer
+
prog_buffer_size
);
param
_file
=
std
::
string
(
param_buffer
,
param_buffer
+
param_buffer_size
);
param
s_file_
=
std
::
string
(
param_buffer
,
param_buffer
+
param_buffer_size
);
model_from_memory_
=
true
;
model_from_memory_
=
true
;
}
}
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
875a07c3
...
@@ -33,6 +33,7 @@
...
@@ -33,6 +33,7 @@
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
DECLARE_bool
(
profile
);
DECLARE_bool
(
profile
);
...
@@ -59,7 +60,7 @@ bool AnalysisPredictor::Init(
...
@@ -59,7 +60,7 @@ bool AnalysisPredictor::Init(
if
(
FLAGS_profile
)
{
if
(
FLAGS_profile
)
{
LOG
(
WARNING
)
<<
"Profiler is actived, might affect the performance"
;
LOG
(
WARNING
)
<<
"Profiler is actived, might affect the performance"
;
LOG
(
INFO
)
<<
"You can turn off by set gflags '-profile false'"
;
LOG
(
INFO
)
<<
"You can turn off by set gflags '-profile false'"
;
auto
tracking_device
=
config_
.
use_gpu
?
platform
::
ProfilerState
::
kAll
auto
tracking_device
=
config_
.
use_gpu
()
?
platform
::
ProfilerState
::
kAll
:
platform
::
ProfilerState
::
kCPU
;
:
platform
::
ProfilerState
::
kCPU
;
platform
::
EnableProfiler
(
tracking_device
);
platform
::
EnableProfiler
(
tracking_device
);
}
}
...
@@ -112,7 +113,7 @@ bool AnalysisPredictor::PrepareProgram(
...
@@ -112,7 +113,7 @@ bool AnalysisPredictor::PrepareProgram(
// Optimize the program, and load parameters and modify them in the
// Optimize the program, and load parameters and modify them in the
// scope_.
// scope_.
// This will change the scope_ address.
// This will change the scope_ address.
if
(
config_
.
enable_ir_optim
)
{
if
(
config_
.
ir_optim
()
)
{
status_ir_optim_enabled_
=
true
;
status_ir_optim_enabled_
=
true
;
OptimizeInferenceProgram
();
OptimizeInferenceProgram
();
}
else
{
}
else
{
...
@@ -140,9 +141,9 @@ bool AnalysisPredictor::PrepareProgram(
...
@@ -140,9 +141,9 @@ bool AnalysisPredictor::PrepareProgram(
return
true
;
return
true
;
}
}
bool
AnalysisPredictor
::
CreateExecutor
()
{
bool
AnalysisPredictor
::
CreateExecutor
()
{
if
(
config_
.
use_gpu
)
{
if
(
config_
.
use_gpu
_
)
{
status_use_gpu_
=
true
;
status_use_gpu_
=
true
;
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
_id_
);
}
else
{
}
else
{
place_
=
paddle
::
platform
::
CPUPlace
();
place_
=
paddle
::
platform
::
CPUPlace
();
}
}
...
@@ -151,7 +152,7 @@ bool AnalysisPredictor::CreateExecutor() {
...
@@ -151,7 +152,7 @@ bool AnalysisPredictor::CreateExecutor() {
}
}
bool
AnalysisPredictor
::
PrepareExecutor
()
{
bool
AnalysisPredictor
::
PrepareExecutor
()
{
executor_
->
Prepare
(
sub_scope_
,
*
inference_program_
,
0
,
executor_
->
Prepare
(
sub_scope_
,
*
inference_program_
,
0
,
config_
.
use_feed_fetch_ops
);
config_
.
use_feed_fetch_ops
_
);
PADDLE_ENFORCE_NOT_NULL
(
sub_scope_
);
PADDLE_ENFORCE_NOT_NULL
(
sub_scope_
);
...
@@ -250,7 +251,7 @@ bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
...
@@ -250,7 +251,7 @@ bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
}
}
input
.
set_lod
(
lod
);
input
.
set_lod
(
lod
);
int
idx
=
-
1
;
int
idx
=
-
1
;
if
(
config_
.
specify_input_name
)
{
if
(
config_
.
specify_input_name
_
)
{
auto
name
=
inputs
[
i
].
name
;
auto
name
=
inputs
[
i
].
name
;
if
(
feed_names_
.
find
(
name
)
==
feed_names_
.
end
())
{
if
(
feed_names_
.
find
(
name
)
==
feed_names_
.
end
())
{
LOG
(
ERROR
)
<<
"feed names from program do not have name: ["
<<
name
LOG
(
ERROR
)
<<
"feed names from program do not have name: ["
<<
name
...
@@ -314,22 +315,22 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
...
@@ -314,22 +315,22 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
void
AnalysisPredictor
::
OptimizeInferenceProgram
()
{
void
AnalysisPredictor
::
OptimizeInferenceProgram
()
{
status_program_optimized_
=
true
;
status_program_optimized_
=
true
;
argument_
.
SetUseGPU
(
config_
.
use_gpu
);
argument_
.
SetUseGPU
(
config_
.
use_gpu
()
);
argument_
.
SetGPUDeviceId
(
config_
.
device
);
argument_
.
SetGPUDeviceId
(
config_
.
gpu_device_id
()
);
argument_
.
SetModelFromMemory
(
config_
.
model_from_memory_
);
argument_
.
SetModelFromMemory
(
config_
.
model_from_memory_
);
// Analyze inference_program
// Analyze inference_program
if
(
!
config_
.
model_dir
.
empty
())
{
if
(
!
config_
.
model_dir
()
.
empty
())
{
argument_
.
SetModelDir
(
config_
.
model_dir
);
argument_
.
SetModelDir
(
config_
.
model_dir
()
);
}
else
{
}
else
{
PADDLE_ENFORCE
(
PADDLE_ENFORCE
(
!
config_
.
param
_file
.
empty
(),
!
config_
.
param
s_file
()
.
empty
(),
"Either model_dir or (param_file, prog_file) should be set."
);
"Either model_dir or (param_file, prog_file) should be set."
);
PADDLE_ENFORCE
(
!
config_
.
prog_file
.
empty
());
PADDLE_ENFORCE
(
!
config_
.
prog_file
()
.
empty
());
argument_
.
SetModelProgramPath
(
config_
.
prog_file
);
argument_
.
SetModelProgramPath
(
config_
.
prog_file
()
);
argument_
.
SetModelParamsPath
(
config_
.
param
_file
);
argument_
.
SetModelParamsPath
(
config_
.
param
s_file
()
);
}
}
if
(
config_
.
use_gpu
&&
config_
.
use_tensorrt_
)
{
if
(
config_
.
use_gpu
()
&&
config_
.
tensorrt_engine_enabled
()
)
{
argument_
.
SetUseTensorRT
(
true
);
argument_
.
SetUseTensorRT
(
true
);
argument_
.
SetTensorRtWorkspaceSize
(
config_
.
tensorrt_workspace_size_
);
argument_
.
SetTensorRtWorkspaceSize
(
config_
.
tensorrt_workspace_size_
);
argument_
.
SetTensorRtMaxBatchSize
(
config_
.
tensorrt_max_batchsize_
);
argument_
.
SetTensorRtMaxBatchSize
(
config_
.
tensorrt_max_batchsize_
);
...
@@ -341,7 +342,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
...
@@ -341,7 +342,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
}
}
auto
passes
=
config_
.
pass_builder
()
->
AllPasses
();
auto
passes
=
config_
.
pass_builder
()
->
AllPasses
();
if
(
!
config_
.
enable_ir_optim
)
passes
.
clear
();
if
(
!
config_
.
ir_optim
()
)
passes
.
clear
();
argument_
.
SetIrAnalysisPasses
(
passes
);
argument_
.
SetIrAnalysisPasses
(
passes
);
argument_
.
SetScopeNotOwned
(
const_cast
<
framework
::
Scope
*>
(
scope_
.
get
()));
argument_
.
SetScopeNotOwned
(
const_cast
<
framework
::
Scope
*>
(
scope_
.
get
()));
Analyzer
().
Run
(
&
argument_
);
Analyzer
().
Run
(
&
argument_
);
...
@@ -358,18 +359,26 @@ template <>
...
@@ -358,18 +359,26 @@ template <>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
const
AnalysisConfig
&
config
)
{
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
const
AnalysisConfig
&
config
)
{
VLOG
(
3
)
<<
"create AnalysisConfig"
;
VLOG
(
3
)
<<
"create AnalysisConfig"
;
if
(
config
.
use_gpu
)
{
if
(
config
.
use_gpu
()
)
{
// 1. GPU memeroy
// 1. GPU memeroy
PADDLE_ENFORCE_GT
(
PADDLE_ENFORCE_GT
(
config
.
memory_pool_init_size_mb
(),
0.
f
);
config
.
fraction_of_gpu_memory
,
0.
f
,
PADDLE_ENFORCE_GE
(
config
.
gpu_device_id
(),
0
,
"Invalid device id %d"
,
"fraction_of_gpu_memory in the config should be set to range (0., 1.]"
);
config
.
gpu_device_id
());
PADDLE_ENFORCE_GE
(
config
.
device
,
0
,
"Invalid device id %d"
,
config
.
device
);
std
::
vector
<
std
::
string
>
flags
;
std
::
vector
<
std
::
string
>
flags
;
if
(
config
.
fraction_of_gpu_memory
>=
0.0
f
||
config
.
fraction_of_gpu_memory
<=
0.95
f
)
{
float
fraction_of_gpu_memory
=
config
.
fraction_of_gpu_memory_for_pool
();
if
(
fraction_of_gpu_memory
>
0.95
f
)
{
LOG
(
ERROR
)
<<
"Allocate too much memory for the GPU memory pool, assigned "
<<
config
.
memory_pool_init_size_mb
()
<<
" MB"
;
LOG
(
ERROR
)
<<
"Try to shink the value by setting AnalysisConfig::EnableGpu(...)"
;
}
if
(
fraction_of_gpu_memory
>=
0.0
f
||
fraction_of_gpu_memory
<=
0.95
f
)
{
flags
.
push_back
(
"dummpy"
);
flags
.
push_back
(
"dummpy"
);
std
::
string
flag
=
"--fraction_of_gpu_memory_to_use="
+
std
::
string
flag
=
"--fraction_of_gpu_memory_to_use="
+
std
::
to_string
(
config
.
fraction_of_gpu_memory
);
std
::
to_string
(
fraction_of_gpu_memory
);
flags
.
push_back
(
flag
);
flags
.
push_back
(
flag
);
VLOG
(
3
)
<<
"set flag: "
<<
flag
;
VLOG
(
3
)
<<
"set flag: "
<<
flag
;
framework
::
InitGflags
(
flags
);
framework
::
InitGflags
(
flags
);
...
@@ -443,22 +452,22 @@ bool AnalysisPredictor::ZeroCopyRun() {
...
@@ -443,22 +452,22 @@ bool AnalysisPredictor::ZeroCopyRun() {
bool
AnalysisPredictor
::
LoadProgramDesc
()
{
bool
AnalysisPredictor
::
LoadProgramDesc
()
{
// Initialize the inference program
// Initialize the inference program
std
::
string
filename
;
std
::
string
filename
;
if
(
!
config_
.
model_dir
.
empty
())
{
if
(
!
config_
.
model_dir
()
.
empty
())
{
filename
=
config_
.
model_dir
+
"/__model__"
;
filename
=
config_
.
model_dir
()
+
"/__model__"
;
}
else
if
(
!
config_
.
prog_file
.
empty
()
&&
!
config_
.
param_file
.
empty
())
{
}
else
if
(
!
config_
.
prog_file
().
empty
()
&&
!
config_
.
params_file
()
.
empty
())
{
// All parameters are saved in a single file.
// All parameters are saved in a single file.
// The file names should be consistent with that used
// The file names should be consistent with that used
// in Python API `fluid.io.save_inference_model`.
// in Python API `fluid.io.save_inference_model`.
filename
=
config_
.
prog_file
;
filename
=
config_
.
prog_file
()
;
}
else
{
}
else
{
if
(
config_
.
model_dir
.
empty
()
&&
config_
.
prog_file
.
empty
())
{
if
(
config_
.
model_dir
().
empty
()
&&
config_
.
prog_file
()
.
empty
())
{
LOG
(
ERROR
)
LOG
(
ERROR
)
<<
"Either model_dir or (prog_file, param_file) should be set."
;
<<
"Either model_dir or (prog_file, param_file) should be set."
;
return
false
;
return
false
;
}
}
LOG
(
ERROR
)
<<
string
::
Sprintf
(
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"not valid model path '%s' or program path '%s'."
,
config_
.
model_dir
,
"not valid model path '%s' or program path '%s'."
,
config_
.
model_dir
()
,
config_
.
param
_file
);
config_
.
param
s_file
()
);
return
false
;
return
false
;
}
}
...
@@ -478,7 +487,7 @@ bool AnalysisPredictor::LoadProgramDesc() {
...
@@ -478,7 +487,7 @@ bool AnalysisPredictor::LoadProgramDesc() {
proto
.
ParseFromString
(
pb_content
);
proto
.
ParseFromString
(
pb_content
);
}
else
{
}
else
{
proto
.
ParseFromString
(
config_
.
prog_file
);
proto
.
ParseFromString
(
config_
.
prog_file
()
);
}
}
inference_program_
.
reset
(
new
framework
::
ProgramDesc
(
proto
));
inference_program_
.
reset
(
new
framework
::
ProgramDesc
(
proto
));
return
true
;
return
true
;
...
@@ -508,27 +517,27 @@ bool AnalysisPredictor::LoadParameters() {
...
@@ -508,27 +517,27 @@ bool AnalysisPredictor::LoadParameters() {
new_var
->
SetLoDLevel
(
var
->
GetLoDLevel
());
new_var
->
SetLoDLevel
(
var
->
GetLoDLevel
());
new_var
->
SetPersistable
(
true
);
new_var
->
SetPersistable
(
true
);
if
(
!
config_
.
param
_file
.
empty
())
{
if
(
!
config_
.
param
s_file
()
.
empty
())
{
params
.
push_back
(
new_var
->
Name
());
params
.
push_back
(
new_var
->
Name
());
}
else
{
}
else
{
// append_op
// append_op
framework
::
OpDesc
*
op
=
load_block
->
AppendOp
();
framework
::
OpDesc
*
op
=
load_block
->
AppendOp
();
op
->
SetType
(
"load"
);
op
->
SetType
(
"load"
);
op
->
SetOutput
(
"Out"
,
{
new_var
->
Name
()});
op
->
SetOutput
(
"Out"
,
{
new_var
->
Name
()});
op
->
SetAttr
(
"file_path"
,
{
config_
.
model_dir
+
"/"
+
new_var
->
Name
()});
op
->
SetAttr
(
"file_path"
,
{
config_
.
model_dir
()
+
"/"
+
new_var
->
Name
()});
op
->
CheckAttrs
();
op
->
CheckAttrs
();
}
}
}
}
}
}
if
(
!
config_
.
param
_file
.
empty
())
{
if
(
!
config_
.
param
s_file
()
.
empty
())
{
// sort paramlist to have consistent ordering
// sort paramlist to have consistent ordering
std
::
sort
(
params
.
begin
(),
params
.
end
());
std
::
sort
(
params
.
begin
(),
params
.
end
());
// append just the load_combine op
// append just the load_combine op
framework
::
OpDesc
*
op
=
load_block
->
AppendOp
();
framework
::
OpDesc
*
op
=
load_block
->
AppendOp
();
op
->
SetType
(
"load_combine"
);
op
->
SetType
(
"load_combine"
);
op
->
SetOutput
(
"Out"
,
params
);
op
->
SetOutput
(
"Out"
,
params
);
op
->
SetAttr
(
"file_path"
,
{
config_
.
param
_file
});
op
->
SetAttr
(
"file_path"
,
{
config_
.
param
s_file
()
});
op
->
CheckAttrs
();
op
->
CheckAttrs
();
}
}
...
...
paddle/fluid/inference/api/analysis_predictor_tester.cc
浏览文件 @
875a07c3
...
@@ -25,9 +25,9 @@ namespace paddle {
...
@@ -25,9 +25,9 @@ namespace paddle {
using
contrib
::
AnalysisConfig
;
using
contrib
::
AnalysisConfig
;
TEST
(
AnalysisPredictor
,
analysis_off
)
{
TEST
(
AnalysisPredictor
,
analysis_off
)
{
AnalysisConfig
config
(
false
)
;
AnalysisConfig
config
;
config
.
model_dir
=
FLAGS_dirname
;
config
.
SetModel
(
FLAGS_dirname
)
;
config
.
enable_ir_optim
=
false
;
config
.
SwitchIrOptim
(
false
)
;
auto
_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
auto
_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
auto
*
predictor
=
static_cast
<
AnalysisPredictor
*>
(
_predictor
.
get
());
auto
*
predictor
=
static_cast
<
AnalysisPredictor
*>
(
_predictor
.
get
());
...
@@ -55,14 +55,14 @@ TEST(AnalysisPredictor, analysis_off) {
...
@@ -55,14 +55,14 @@ TEST(AnalysisPredictor, analysis_off) {
}
}
TEST
(
AnalysisPredictor
,
analysis_on
)
{
TEST
(
AnalysisPredictor
,
analysis_on
)
{
AnalysisConfig
config
;
config
.
SetModel
(
FLAGS_dirname
);
config
.
SwitchIrOptim
(
true
);
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
AnalysisConfig
config
(
true
);
config
.
EnableUseGpu
(
100
,
0
);
config
.
fraction_of_gpu_memory
=
0.15
;
#else
#else
AnalysisConfig
config
;
config
.
DisableGpu
()
;
#endif
#endif
config
.
model_dir
=
FLAGS_dirname
;
config
.
enable_ir_optim
=
true
;
auto
_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
auto
_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
auto
*
predictor
=
static_cast
<
AnalysisPredictor
*>
(
_predictor
.
get
());
auto
*
predictor
=
static_cast
<
AnalysisPredictor
*>
(
_predictor
.
get
());
...
@@ -89,7 +89,8 @@ TEST(AnalysisPredictor, analysis_on) {
...
@@ -89,7 +89,8 @@ TEST(AnalysisPredictor, analysis_on) {
}
}
// compare with NativePredictor
// compare with NativePredictor
auto
naive_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
auto
naive_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
.
ToNativeConfig
());
std
::
vector
<
PaddleTensor
>
naive_outputs
;
std
::
vector
<
PaddleTensor
>
naive_outputs
;
ASSERT_TRUE
(
naive_predictor
->
Run
(
inputs
,
&
naive_outputs
));
ASSERT_TRUE
(
naive_predictor
->
Run
(
inputs
,
&
naive_outputs
));
ASSERT_EQ
(
naive_outputs
.
size
(),
1UL
);
ASSERT_EQ
(
naive_outputs
.
size
(),
1UL
);
...
@@ -98,9 +99,8 @@ TEST(AnalysisPredictor, analysis_on) {
...
@@ -98,9 +99,8 @@ TEST(AnalysisPredictor, analysis_on) {
TEST
(
AnalysisPredictor
,
ZeroCopy
)
{
TEST
(
AnalysisPredictor
,
ZeroCopy
)
{
AnalysisConfig
config
;
AnalysisConfig
config
;
config
.
model_dir
=
FLAGS_dirname
;
config
.
SetModel
(
FLAGS_dirname
);
config
.
use_feed_fetch_ops
=
false
;
config
.
SwitchUseFeedFetchOps
(
false
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
auto
w0
=
predictor
->
GetInputTensor
(
"firstw"
);
auto
w0
=
predictor
->
GetInputTensor
(
"firstw"
);
...
@@ -137,9 +137,9 @@ TEST(AnalysisPredictor, ZeroCopy) {
...
@@ -137,9 +137,9 @@ TEST(AnalysisPredictor, ZeroCopy) {
TEST
(
AnalysisPredictor
,
Clone
)
{
TEST
(
AnalysisPredictor
,
Clone
)
{
AnalysisConfig
config
;
AnalysisConfig
config
;
config
.
model_dir
=
FLAGS_dirname
;
config
.
SetModel
(
FLAGS_dirname
)
;
config
.
use_feed_fetch_ops
=
true
;
config
.
SwitchUseFeedFetchOps
(
true
)
;
config
.
enable_ir_optim
=
true
;
config
.
SwitchIrOptim
(
true
)
;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
predictors
.
emplace_back
(
CreatePaddlePredictor
(
config
));
predictors
.
emplace_back
(
CreatePaddlePredictor
(
config
));
...
...
paddle/fluid/inference/api/api_anakin_engine.h
浏览文件 @
875a07c3
...
@@ -19,8 +19,6 @@ limitations under the License. */
...
@@ -19,8 +19,6 @@ limitations under the License. */
#pragma once
#pragma once
#define WITH_ANAKIN
#include <vector>
#include <vector>
#include "framework/core/net/net.h"
#include "framework/core/net/net.h"
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
875a07c3
...
@@ -288,7 +288,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
...
@@ -288,7 +288,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
VLOG
(
3
)
<<
"create NativePaddlePredictor"
;
VLOG
(
3
)
<<
"create NativePaddlePredictor"
;
if
(
config
.
use_gpu
)
{
if
(
config
.
use_gpu
)
{
// 1. GPU memeroy
// 1. GPU memeroy
PADDLE_ENFORCE_G
T
(
PADDLE_ENFORCE_G
E
(
config
.
fraction_of_gpu_memory
,
0.
f
,
config
.
fraction_of_gpu_memory
,
0.
f
,
"fraction_of_gpu_memory in the config should be set to range (0., 1.]"
);
"fraction_of_gpu_memory in the config should be set to range (0., 1.]"
);
PADDLE_ENFORCE_GE
(
config
.
device
,
0
,
"Invalid device id %d"
,
config
.
device
);
PADDLE_ENFORCE_GE
(
config
.
device
,
0
,
"Invalid device id %d"
,
config
.
device
);
...
...
paddle/fluid/inference/api/api_impl_tester.cc
浏览文件 @
875a07c3
...
@@ -295,7 +295,8 @@ TEST(inference_api_native, image_classification_gpu) {
...
@@ -295,7 +295,8 @@ TEST(inference_api_native, image_classification_gpu) {
#endif
#endif
TEST
(
PassBuilder
,
Delete
)
{
TEST
(
PassBuilder
,
Delete
)
{
contrib
::
AnalysisConfig
config
(
false
);
contrib
::
AnalysisConfig
config
;
config
.
DisableGpu
();
config
.
pass_builder
()
->
DeletePass
(
"attention_lstm_fuse_pass"
);
config
.
pass_builder
()
->
DeletePass
(
"attention_lstm_fuse_pass"
);
const
auto
&
passes
=
config
.
pass_builder
()
->
AllPasses
();
const
auto
&
passes
=
config
.
pass_builder
()
->
AllPasses
();
auto
it
=
std
::
find
(
passes
.
begin
(),
passes
.
end
(),
"attention_lstm_fuse_pass"
);
auto
it
=
std
::
find
(
passes
.
begin
(),
passes
.
end
(),
"attention_lstm_fuse_pass"
);
...
...
paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
浏览文件 @
875a07c3
...
@@ -36,12 +36,11 @@ namespace demo {
...
@@ -36,12 +36,11 @@ namespace demo {
*/
*/
void
Main
()
{
void
Main
()
{
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
paddle
::
contrib
::
AnalysisConfig
config
(
true
)
;
paddle
::
contrib
::
AnalysisConfig
config
;
config
.
param_file
=
FLAGS_modeldir
+
"/__params__"
;
config
.
EnableUseGpu
(
100
,
0
)
;
config
.
prog_file
=
FLAGS_modeldir
+
"/__model__"
;
config
.
SetModel
(
FLAGS_modeldir
+
"/__params__"
,
config
.
device
=
0
;
FLAGS_modeldir
+
"/__model__"
)
;
config
.
EnableTensorRtEngine
();
config
.
EnableTensorRtEngine
();
config
.
fraction_of_gpu_memory
=
0.1
;
// set by yourself
predictor
=
CreatePaddlePredictor
(
config
);
predictor
=
CreatePaddlePredictor
(
config
);
VLOG
(
3
)
<<
"begin to process data"
;
VLOG
(
3
)
<<
"begin to process data"
;
...
...
paddle/fluid/inference/api/demo_ci/vis_demo.cc
浏览文件 @
875a07c3
...
@@ -40,15 +40,14 @@ using contrib::AnalysisConfig;
...
@@ -40,15 +40,14 @@ using contrib::AnalysisConfig;
*/
*/
void
Main
(
bool
use_gpu
)
{
void
Main
(
bool
use_gpu
)
{
std
::
unique_ptr
<
PaddlePredictor
>
predictor
,
analysis_predictor
;
std
::
unique_ptr
<
PaddlePredictor
>
predictor
,
analysis_predictor
;
AnalysisConfig
config
(
use_gpu
);
AnalysisConfig
config
;
config
.
param_file
=
FLAGS_modeldir
+
"/__params__"
;
if
(
use_gpu
)
{
config
.
prog_file
=
FLAGS_modeldir
+
"/__model__"
;
config
.
EnableUseGpu
(
100
,
0
);
config
.
device
=
0
;
if
(
FLAGS_use_gpu
)
{
config
.
fraction_of_gpu_memory
=
0.1
;
// set by yourself
}
}
config
.
SetModel
(
FLAGS_modeldir
+
"/__model__"
,
FLAGS_modeldir
+
"/__params__"
);
predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
.
ToNativeConfig
()
);
analysis_predictor
=
CreatePaddlePredictor
(
config
);
analysis_predictor
=
CreatePaddlePredictor
(
config
);
// Just a single batch of data.
// Just a single batch of data.
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
875a07c3
...
@@ -34,26 +34,67 @@ class AnalysisPredictor;
...
@@ -34,26 +34,67 @@ class AnalysisPredictor;
namespace
contrib
{
namespace
contrib
{
// NOTE WIP, not stable yet.
// NOTE WIP, not stable yet.
struct
AnalysisConfig
:
public
NativeConfig
{
struct
AnalysisConfig
{
explicit
AnalysisConfig
(
bool
use_gpu
=
false
)
;
AnalysisConfig
()
=
default
;
explicit
AnalysisConfig
(
const
AnalysisConfig
&
other
);
explicit
AnalysisConfig
(
const
AnalysisConfig
&
other
);
explicit
AnalysisConfig
(
AnalysisConfig
&&
other
);
explicit
AnalysisConfig
(
const
std
::
string
&
model_dir
);
explicit
AnalysisConfig
(
const
std
::
string
&
prog_file
,
const
std
::
string
&
params_file
);
// Model path related.
void
SetModel
(
const
std
::
string
&
model_dir
)
{
model_dir_
=
model_dir
;
}
void
SetModel
(
const
std
::
string
&
prog_file_path
,
const
std
::
string
&
params_file_path
);
void
SetProgFile
(
const
std
::
string
&
x
)
{
prog_file_
=
x
;
}
void
SetParamsFile
(
const
std
::
string
&
x
)
{
params_file_
=
x
;
}
const
std
::
string
&
model_dir
()
const
{
return
model_dir_
;
}
const
std
::
string
&
prog_file
()
const
{
return
prog_file_
;
}
const
std
::
string
&
params_file
()
const
{
return
params_file_
;
}
// GPU related.
void
EnableUseGpu
(
uint64_t
memory_pool_init_size_mb
,
int
device_id
=
0
);
void
DisableGpu
();
bool
use_gpu
()
const
{
return
use_gpu_
;
}
int
gpu_device_id
()
const
{
return
device_id_
;
}
int
memory_pool_init_size_mb
()
const
{
return
memory_pool_init_size_mb_
;
}
float
fraction_of_gpu_memory_for_pool
()
const
;
// Determine whether to perform graph optimization.
// Determine whether to perform graph optimization.
bool
enable_ir_optim
=
true
;
void
SwitchIrOptim
(
int
x
=
true
)
{
enable_ir_optim_
=
x
;
}
bool
ir_optim
()
const
{
return
enable_ir_optim_
;
}
// Get a pass builder for customize the passes in IR analysis phase.
void
SwitchUseFeedFetchOps
(
int
x
=
true
)
{
use_feed_fetch_ops_
=
x
;
}
PassStrategy
*
pass_builder
()
const
;
bool
use_feed_fetch_ops_enabled
()
const
{
return
use_feed_fetch_ops_
;
}
// NOT stable yet.
void
SwitchSpecifyInputNames
(
bool
x
=
true
)
{
specify_input_name_
=
x
;
}
bool
use_feed_fetch_ops
{
true
};
bool
specify_input_name
()
const
{
return
specify_input_name_
;
}
void
EnableTensorRtEngine
(
int
workspace_size
=
1
<<
20
,
void
EnableTensorRtEngine
(
int
workspace_size
=
1
<<
20
,
int
max_batch_size
=
1
,
int
min_subgraph_size
=
3
);
int
max_batch_size
=
1
,
int
min_subgraph_size
=
3
);
bool
use_tensorrt
()
const
{
return
use_tensorrt_
;
}
bool
tensorrt_engine_enabled
()
const
{
return
use_tensorrt_
;
}
void
SwitchIrDebug
(
int
x
=
true
)
{
ir_debug_
=
x
;
}
void
EnableMKLDNN
();
void
EnableMKLDNN
();
bool
use_mkldnn
()
const
{
return
use_mkldnn_
;
}
bool
mkldnn_enabled
()
const
{
return
use_mkldnn_
;
}
// Set and get the number of cpu math library threads.
void
SetCpuMathLibraryNumThreads
(
int
cpu_math_library_num_threads
);
int
cpu_math_library_num_threads
()
const
{
return
cpu_math_library_num_threads_
;
}
NativeConfig
ToNativeConfig
()
const
{
NativeConfig
config
;
config
.
model_dir
=
model_dir_
;
config
.
prog_file
=
prog_file_
;
config
.
param_file
=
params_file_
;
config
.
use_gpu
=
use_gpu_
;
config
.
device
=
device_id_
;
config
.
fraction_of_gpu_memory
=
fraction_of_gpu_memory_for_pool
();
config
.
specify_input_name
=
specify_input_name_
;
return
config
;
}
void
SetMKLDNNOp
(
std
::
unordered_set
<
std
::
string
>
op_list
)
{
void
SetMKLDNNOp
(
std
::
unordered_set
<
std
::
string
>
op_list
)
{
mkldnn_enabled_op_types_
=
op_list
;
mkldnn_enabled_op_types_
=
op_list
;
}
}
...
@@ -65,10 +106,29 @@ struct AnalysisConfig : public NativeConfig {
...
@@ -65,10 +106,29 @@ struct AnalysisConfig : public NativeConfig {
friend
class
::
paddle
::
AnalysisPredictor
;
friend
class
::
paddle
::
AnalysisPredictor
;
// NOTE just for developer, not an official API, easily to be broken.
// Get a pass builder for customize the passes in IR analysis phase.
PassStrategy
*
pass_builder
()
const
;
protected:
// Update the config.
void
Update
();
std
::
string
SerializeInfoCache
();
protected:
protected:
// Model pathes.
std
::
string
model_dir_
;
std
::
string
prog_file_
;
std
::
string
params_file_
;
// GPU releated.
bool
use_gpu_
{
false
};
int
device_id_
{
0
};
uint64_t
memory_pool_init_size_mb_
{
100
};
// initial size is 100MB.
// TensorRT releated.
bool
use_tensorrt_
{
false
};
bool
use_tensorrt_
{
false
};
bool
use_mkldnn_
{
false
};
std
::
unordered_set
<
std
::
string
>
mkldnn_enabled_op_types_
;
// For workspace_size, refer it from here:
// For workspace_size, refer it from here:
// https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting
// https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting
int
tensorrt_workspace_size_
;
int
tensorrt_workspace_size_
;
...
@@ -82,17 +142,24 @@ struct AnalysisConfig : public NativeConfig {
...
@@ -82,17 +142,24 @@ struct AnalysisConfig : public NativeConfig {
// We set this variable to control the minimum number of nodes in the
// We set this variable to control the minimum number of nodes in the
// subgraph, 3 as default value.
// subgraph, 3 as default value.
int
tensorrt_min_subgraph_size_
{
3
};
int
tensorrt_min_subgraph_size_
{
3
};
std
::
unique_ptr
<
PassStrategy
>
pass_builder_
;
bool
use_mkldnn_
{
false
};
std
::
unordered_set
<
std
::
string
>
mkldnn_enabled_op_types_
;
bool
model_from_memory_
{
false
};
bool
model_from_memory_
{
false
};
};
// Configurations for Anakin engine.
bool
enable_ir_optim_
{
true
};
struct
AnakinConfig
:
public
PaddlePredictor
::
Config
{
bool
use_feed_fetch_ops_
{
true
};
enum
TargetType
{
NVGPU
=
0
,
X86
};
bool
ir_debug_
{
false
};
int
device
;
std
::
string
model_file
;
bool
specify_input_name_
{
false
};
int
max_batch_size
{
-
1
};
TargetType
target_type
;
int
cpu_math_library_num_threads_
{
1
};
// A runtime cache, shouldn't be transferred to others.
std
::
string
serialized_info_cache_
;
mutable
std
::
unique_ptr
<
PassStrategy
>
pass_builder_
;
};
};
}
// namespace contrib
}
// namespace contrib
...
...
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
875a07c3
...
@@ -26,9 +26,8 @@ limitations under the License. */
...
@@ -26,9 +26,8 @@ limitations under the License. */
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle_api.h" // NOLINT
#ifndef WITH_ANAKIN
#include "paddle_analysis_config.h" // NOLINT
#include "paddle_analysis_config.h" // NOLINT
#else
#include "paddle_api.h" // NOLINT
#ifdef WITH_ANAKIN
#include "paddle_anakin_config.h" // NOLINT
#include "paddle_anakin_config.h" // NOLINT
#endif
#endif
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
875a07c3
...
@@ -62,7 +62,12 @@ class PassStrategy : public PaddlePassBuilder {
...
@@ -62,7 +62,12 @@ class PassStrategy : public PaddlePassBuilder {
// still some CPU kernels running in CPU mode.
// still some CPU kernels running in CPU mode.
virtual
void
EnableMKLDNN
()
=
0
;
virtual
void
EnableMKLDNN
()
=
0
;
bool
use_gpu
()
const
{
return
use_gpu_
;
}
virtual
~
PassStrategy
()
=
default
;
virtual
~
PassStrategy
()
=
default
;
protected:
bool
use_gpu_
{
false
};
};
};
/*
/*
...
@@ -88,6 +93,7 @@ class CpuPassStrategy : public PassStrategy {
...
@@ -88,6 +93,7 @@ class CpuPassStrategy : public PassStrategy {
"conv_eltwiseadd_bn_fuse_pass"
,
//
"conv_eltwiseadd_bn_fuse_pass"
,
//
"is_test_pass"
,
//
"is_test_pass"
,
//
});
});
use_gpu_
=
false
;
}
}
virtual
~
CpuPassStrategy
()
=
default
;
virtual
~
CpuPassStrategy
()
=
default
;
...
@@ -126,10 +132,14 @@ class GpuPassStrategy : public PassStrategy {
...
@@ -126,10 +132,14 @@ class GpuPassStrategy : public PassStrategy {
"conv_elementwise_add2_act_fuse_pass"
,
//
"conv_elementwise_add2_act_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
});
});
use_gpu_
=
true
;
}
}
GpuPassStrategy
(
const
GpuPassStrategy
&
other
)
GpuPassStrategy
(
const
GpuPassStrategy
&
other
)
:
PassStrategy
(
other
.
AllPasses
())
{}
:
PassStrategy
(
other
.
AllPasses
())
{
use_gpu_
=
true
;
}
void
EnableMKLDNN
()
override
;
void
EnableMKLDNN
()
override
;
...
...
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
浏览文件 @
875a07c3
...
@@ -165,12 +165,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
...
@@ -165,12 +165,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
void
SetConfig
(
contrib
::
AnalysisConfig
*
cfg
)
{
void
SetConfig
(
contrib
::
AnalysisConfig
*
cfg
)
{
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
->
SetModel
(
FLAGS_infer_model
+
"/__model__"
,
FLAGS_infer_model
+
"/param"
);
cfg
->
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
->
SwitchSpecifyInputNames
();
cfg
->
use_gpu
=
false
;
cfg
->
SwitchIrOptim
(
true
);
cfg
->
device
=
0
;
cfg
->
specify_input_name
=
true
;
cfg
->
enable_ir_optim
=
true
;
}
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
浏览文件 @
875a07c3
...
@@ -105,11 +105,10 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
...
@@ -105,11 +105,10 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
model_dir
=
FLAGS_infer_model
;
cfg
->
SetModel
(
FLAGS_infer_model
);
cfg
->
use_gpu
=
false
;
cfg
->
DisableGpu
();
cfg
->
device
=
0
;
cfg
->
SwitchSpecifyInputNames
();
cfg
->
specify_input_name
=
true
;
cfg
->
SwitchIrOptim
();
cfg
->
enable_ir_optim
=
true
;
}
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
浏览文件 @
875a07c3
...
@@ -76,11 +76,10 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
...
@@ -76,11 +76,10 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
void
SetConfig
(
contrib
::
AnalysisConfig
*
cfg
)
{
void
SetConfig
(
contrib
::
AnalysisConfig
*
cfg
)
{
cfg
->
model_dir
=
FLAGS_infer_model
;
cfg
->
SetModel
(
FLAGS_infer_model
);
cfg
->
use_gpu
=
false
;
cfg
->
DisableGpu
();
cfg
->
device
=
0
;
cfg
->
SwitchSpecifyInputNames
();
cfg
->
specify_input_name
=
true
;
cfg
->
SwitchIrOptim
();
cfg
->
enable_ir_optim
=
true
;
}
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
浏览文件 @
875a07c3
...
@@ -84,13 +84,12 @@ void SetConfig(contrib::AnalysisConfig *cfg, bool memory_load = false) {
...
@@ -84,13 +84,12 @@ void SetConfig(contrib::AnalysisConfig *cfg, bool memory_load = false) {
cfg
->
SetModelBuffer
(
&
buffer_prog
[
0
],
buffer_prog
.
size
(),
&
buffer_param
[
0
],
cfg
->
SetModelBuffer
(
&
buffer_prog
[
0
],
buffer_prog
.
size
(),
&
buffer_param
[
0
],
buffer_param
.
size
());
buffer_param
.
size
());
}
else
{
}
else
{
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
->
SetModel
(
FLAGS_infer_model
+
"/__model__"
,
cfg
->
param_file
=
FLAGS_infer_model
+
"/param"
;
FLAGS_infer_model
+
"/param"
)
;
}
}
cfg
->
use_gpu
=
false
;
cfg
->
DisableGpu
();
cfg
->
device
=
0
;
cfg
->
SwitchSpecifyInputNames
();
cfg
->
specify_input_name
=
true
;
cfg
->
SwitchIrOptim
();
cfg
->
enable_ir_optim
=
true
;
}
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
浏览文件 @
875a07c3
...
@@ -21,12 +21,10 @@ namespace inference {
...
@@ -21,12 +21,10 @@ namespace inference {
namespace
analysis
{
namespace
analysis
{
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
param_file
=
FLAGS_infer_model
+
"/params"
;
cfg
->
SetModel
(
FLAGS_infer_model
+
"/model"
,
FLAGS_infer_model
+
"/params"
);
cfg
->
prog_file
=
FLAGS_infer_model
+
"/model"
;
cfg
->
DisableGpu
();
cfg
->
use_gpu
=
false
;
cfg
->
SwitchIrOptim
();
cfg
->
device
=
0
;
cfg
->
SwitchSpecifyInputNames
();
cfg
->
enable_ir_optim
=
true
;
cfg
->
specify_input_name
=
true
;
cfg
->
SetCpuMathLibraryNumThreads
(
FLAGS_paddle_num_threads
);
cfg
->
SetCpuMathLibraryNumThreads
(
FLAGS_paddle_num_threads
);
}
}
...
...
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
浏览文件 @
875a07c3
...
@@ -204,12 +204,10 @@ void PrepareZeroCopyInputs(ZeroCopyTensor *lod_attention_tensor,
...
@@ -204,12 +204,10 @@ void PrepareZeroCopyInputs(ZeroCopyTensor *lod_attention_tensor,
}
}
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
->
SetModel
(
FLAGS_infer_model
+
"/__model__"
,
FLAGS_infer_model
+
"/param"
);
cfg
->
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
->
DisableGpu
();
cfg
->
use_gpu
=
false
;
cfg
->
SwitchSpecifyInputNames
();
cfg
->
device
=
0
;
cfg
->
SwitchIrOptim
();
cfg
->
specify_input_name
=
true
;
cfg
->
enable_ir_optim
=
true
;
}
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
...
@@ -225,10 +223,10 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
...
@@ -225,10 +223,10 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
// Easy for profiling independently.
// Easy for profiling independently.
TEST
(
Analyzer_rnn1
,
profile
)
{
TEST
(
Analyzer_rnn1
,
profile
)
{
contrib
::
AnalysisConfig
cfg
(
false
)
;
contrib
::
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
cfg
.
fraction_of_gpu_memory
=
0.1
;
cfg
.
DisableGpu
()
;
cfg
.
pass_builder
()
->
TurnOn
Debug
();
cfg
.
SwitchIr
Debug
();
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
...
@@ -293,16 +291,18 @@ TEST(Analyzer_rnn1, multi_thread) {
...
@@ -293,16 +291,18 @@ TEST(Analyzer_rnn1, multi_thread) {
TEST
(
Analyzer_rnn1
,
ZeroCopy
)
{
TEST
(
Analyzer_rnn1
,
ZeroCopy
)
{
AnalysisConfig
config
;
AnalysisConfig
config
;
SetConfig
(
&
config
);
SetConfig
(
&
config
);
config
.
use_feed_fetch_ops
=
false
;
config
.
SwitchUseFeedFetchOps
(
false
)
;
PaddlePlace
place
;
PaddlePlace
place
;
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
config
.
use_feed_fetch_ops
=
true
;
config
.
SwitchUseFeedFetchOps
(
true
);
auto
native_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
auto
native_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
.
ToNativeConfig
());
config
.
use_feed_fetch_ops
=
true
;
// the analysis predictor needs feed/fetch.
config
.
SwitchUseFeedFetchOps
(
true
);
// the analysis predictor needs feed/fetch.
auto
analysis_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
auto
analysis_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
#define NEW_TENSOR(name__) \
#define NEW_TENSOR(name__) \
...
@@ -362,7 +362,7 @@ TEST(Analyzer_rnn1, ZeroCopy) {
...
@@ -362,7 +362,7 @@ TEST(Analyzer_rnn1, ZeroCopy) {
TEST
(
Analyzer_rnn1
,
ZeroCopyMultiThread
)
{
TEST
(
Analyzer_rnn1
,
ZeroCopyMultiThread
)
{
AnalysisConfig
config
;
AnalysisConfig
config
;
SetConfig
(
&
config
);
SetConfig
(
&
config
);
config
.
use_feed_fetch_ops
=
false
;
config
.
SwitchUseFeedFetchOps
(
false
)
;
#define NEW_TENSOR(name__) \
#define NEW_TENSOR(name__) \
auto name__##_tensor = predictor->GetInputTensor(#name__);
auto name__##_tensor = predictor->GetInputTensor(#name__);
...
...
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
浏览文件 @
875a07c3
...
@@ -105,12 +105,10 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
...
@@ -105,12 +105,10 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
->
SetModel
(
FLAGS_infer_model
+
"/__model__"
,
FLAGS_infer_model
+
"/param"
);
cfg
->
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
->
DisableGpu
();
cfg
->
use_gpu
=
false
;
cfg
->
SwitchSpecifyInputNames
();
cfg
->
device
=
0
;
cfg
->
SwitchIrOptim
();
cfg
->
specify_input_name
=
true
;
cfg
->
enable_ir_optim
=
true
;
}
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
浏览文件 @
875a07c3
...
@@ -89,11 +89,10 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
...
@@ -89,11 +89,10 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
model_dir
=
FLAGS_infer_model
;
cfg
->
SetModel
(
FLAGS_infer_model
);
cfg
->
use_gpu
=
false
;
cfg
->
DisableGpu
();
cfg
->
device
=
0
;
cfg
->
SwitchSpecifyInputNames
();
cfg
->
specify_input_name
=
true
;
cfg
->
SwitchIrOptim
();
cfg
->
enable_ir_optim
=
true
;
}
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
浏览文件 @
875a07c3
...
@@ -122,12 +122,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data) {
...
@@ -122,12 +122,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data) {
}
}
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
param_file
=
FLAGS_infer_model
+
"/params"
;
cfg
->
SetModel
(
FLAGS_infer_model
+
"/model"
,
FLAGS_infer_model
+
"/params"
);
cfg
->
prog_file
=
FLAGS_infer_model
+
"/model"
;
cfg
->
DisableGpu
();
cfg
->
use_gpu
=
false
;
cfg
->
SwitchSpecifyInputNames
();
cfg
->
device
=
0
;
cfg
->
enable_ir_optim
=
true
;
cfg
->
specify_input_name
=
true
;
cfg
->
pass_builder
()
->
TurnOnDebug
();
cfg
->
pass_builder
()
->
TurnOnDebug
();
cfg
->
SetCpuMathLibraryNumThreads
(
FLAGS_paddle_num_threads
);
cfg
->
SetCpuMathLibraryNumThreads
(
FLAGS_paddle_num_threads
);
}
}
...
...
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
浏览文件 @
875a07c3
...
@@ -47,11 +47,10 @@ struct DataReader {
...
@@ -47,11 +47,10 @@ struct DataReader {
};
};
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
model_dir
=
FLAGS_infer_model
;
cfg
->
SetModel
(
FLAGS_infer_model
);
cfg
->
use_gpu
=
false
;
cfg
->
DisableGpu
();
cfg
->
device
=
0
;
cfg
->
SwitchSpecifyInputNames
();
cfg
->
specify_input_name
=
true
;
cfg
->
SwitchIrOptim
();
cfg
->
enable_ir_optim
=
true
;
}
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
浏览文件 @
875a07c3
...
@@ -51,12 +51,11 @@ Record ProcessALine(const std::string &line) {
...
@@ -51,12 +51,11 @@ Record ProcessALine(const std::string &line) {
}
}
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
param_file
=
FLAGS_infer_model
+
"/__params__"
;
cfg
->
SetModel
(
FLAGS_infer_model
+
"/__model__"
,
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
FLAGS_infer_model
+
"/__params__"
);
cfg
->
use_gpu
=
false
;
cfg
->
DisableGpu
();
cfg
->
device
=
0
;
cfg
->
SwitchIrDebug
();
cfg
->
enable_ir_optim
=
true
;
cfg
->
SwitchSpecifyInputNames
();
cfg
->
specify_input_name
=
true
;
// TODO(TJ): fix fusion gru
// TODO(TJ): fix fusion gru
cfg
->
pass_builder
()
->
DeletePass
(
"fc_gru_fuse_pass"
);
cfg
->
pass_builder
()
->
DeletePass
(
"fc_gru_fuse_pass"
);
}
}
...
...
paddle/fluid/inference/tests/api/config_printer.h
浏览文件 @
875a07c3
...
@@ -64,19 +64,23 @@ std::ostream &operator<<(std::ostream &os,
...
@@ -64,19 +64,23 @@ std::ostream &operator<<(std::ostream &os,
num_spaces
++
;
num_spaces
++
;
os
<<
*
reinterpret_cast
<
const
NativeConfig
*>
(
&
config
);
os
<<
*
reinterpret_cast
<
const
NativeConfig
*>
(
&
config
);
if
(
!
config
.
model_from_memory
())
{
if
(
!
config
.
model_from_memory
())
{
os
<<
GenSpaces
(
num_spaces
)
<<
"prog_file: "
<<
config
.
prog_file
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"prog_file: "
<<
config
.
prog_file
()
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"param_file: "
<<
config
.
param_file
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"param_file: "
<<
config
.
params_file
()
<<
"
\n
"
;
}
else
{
}
else
{
os
<<
GenSpaces
(
num_spaces
)
os
<<
GenSpaces
(
num_spaces
)
<<
"prog_file and param_file: load from memory
\n
"
;
<<
"prog_file and param_file: load from memory
\n
"
;
}
}
os
<<
GenSpaces
(
num_spaces
)
<<
"enable_ir_optim: "
<<
config
.
enable_ir_optim
os
<<
GenSpaces
(
num_spaces
)
<<
"enable_ir_optim: "
<<
config
.
ir_optim
()
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"enable_ir_optim: "
<<
config
.
ir_optim
()
<<
"
\n
"
;
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
os
<<
GenSpaces
(
num_spaces
)
<<
"use_feed_fetch_ops: "
<<
config
.
use_feed_fetch_ops
<<
"
\n
"
;
<<
"use_feed_fetch_ops: "
<<
config
.
use_feed_fetch_ops_enabled
()
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"use_tensorrt: "
<<
config
.
use_tensorrt
()
os
<<
GenSpaces
(
num_spaces
)
<<
"use_tensorrt: "
<<
config
.
tensorrt_engine_enabled
()
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"use_mkldnn: "
<<
config
.
mkldnn_enabled
()
<<
"
\n
"
;
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"use_mkldnn: "
<<
config
.
use_mkldnn
()
<<
"
\n
"
;
num_spaces
--
;
num_spaces
--
;
os
<<
GenSpaces
(
num_spaces
)
<<
"}
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"}
\n
"
;
return
os
;
return
os
;
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
875a07c3
...
@@ -328,7 +328,10 @@ void CompareNativeAndAnalysis(
...
@@ -328,7 +328,10 @@ void CompareNativeAndAnalysis(
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
)
{
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
)
{
PrintConfig
(
config
,
true
);
PrintConfig
(
config
,
true
);
std
::
vector
<
PaddleTensor
>
native_outputs
,
analysis_outputs
;
std
::
vector
<
PaddleTensor
>
native_outputs
,
analysis_outputs
;
TestOneThreadPrediction
(
config
,
inputs
,
&
native_outputs
,
false
);
const
auto
*
analysis_config
=
reinterpret_cast
<
const
contrib
::
AnalysisConfig
*>
(
config
);
auto
native_config
=
analysis_config
->
ToNativeConfig
();
TestOneThreadPrediction
(
&
native_config
,
inputs
,
&
native_outputs
,
false
);
TestOneThreadPrediction
(
config
,
inputs
,
&
analysis_outputs
,
true
);
TestOneThreadPrediction
(
config
,
inputs
,
&
analysis_outputs
,
true
);
CompareResult
(
analysis_outputs
,
native_outputs
);
CompareResult
(
analysis_outputs
,
native_outputs
);
}
}
...
...
paddle/fluid/inference/tests/api/trt_models_tester.cc
浏览文件 @
875a07c3
...
@@ -46,22 +46,20 @@ void SetConfig<contrib::AnalysisConfig>(contrib::AnalysisConfig* config,
...
@@ -46,22 +46,20 @@ void SetConfig<contrib::AnalysisConfig>(contrib::AnalysisConfig* config,
std
::
string
model_dir
,
bool
use_gpu
,
std
::
string
model_dir
,
bool
use_gpu
,
bool
use_tensorrt
,
int
batch_size
)
{
bool
use_tensorrt
,
int
batch_size
)
{
if
(
!
FLAGS_prog_filename
.
empty
()
&&
!
FLAGS_param_filename
.
empty
())
{
if
(
!
FLAGS_prog_filename
.
empty
()
&&
!
FLAGS_param_filename
.
empty
())
{
config
->
prog_file
=
model_dir
+
"/"
+
FLAGS_prog_filename
;
config
->
SetModel
(
model_dir
+
"/"
+
FLAGS_prog_filename
,
config
->
param_file
=
model_dir
+
"/"
+
FLAGS_param_filename
;
model_dir
+
"/"
+
FLAGS_param_filename
)
;
}
else
{
}
else
{
config
->
model_dir
=
model_dir
;
config
->
SetModel
(
model_dir
)
;
}
}
if
(
use_gpu
)
{
if
(
use_gpu
)
{
config
->
use_gpu
=
true
;
config
->
EnableUseGpu
(
100
,
0
);
config
->
device
=
0
;
config
->
fraction_of_gpu_memory
=
0.15
;
if
(
use_tensorrt
)
{
if
(
use_tensorrt
)
{
config
->
EnableTensorRtEngine
(
1
<<
10
,
batch_size
);
config
->
EnableTensorRtEngine
(
1
<<
10
,
batch_size
);
config
->
pass_builder
()
->
DeletePass
(
"conv_bn_fuse_pass"
);
config
->
pass_builder
()
->
DeletePass
(
"conv_bn_fuse_pass"
);
config
->
pass_builder
()
->
DeletePass
(
"fc_fuse_pass"
);
config
->
pass_builder
()
->
DeletePass
(
"fc_fuse_pass"
);
config
->
pass_builder
()
->
TurnOnDebug
();
config
->
pass_builder
()
->
TurnOnDebug
();
}
else
{
}
else
{
config
->
enable_ir_optim
=
true
;
config
->
SwitchIrOptim
()
;
}
}
}
}
}
}
...
@@ -77,7 +75,8 @@ void profile(std::string model_dir, bool use_analysis, bool use_tensorrt) {
...
@@ -77,7 +75,8 @@ void profile(std::string model_dir, bool use_analysis, bool use_tensorrt) {
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
PaddleTensor
>
outputs
;
if
(
use_analysis
||
use_tensorrt
)
{
if
(
use_analysis
||
use_tensorrt
)
{
contrib
::
AnalysisConfig
config
(
true
);
contrib
::
AnalysisConfig
config
;
config
.
EnableUseGpu
(
100
,
0
);
config
.
pass_builder
()
->
TurnOnDebug
();
config
.
pass_builder
()
->
TurnOnDebug
();
SetConfig
<
contrib
::
AnalysisConfig
>
(
&
config
,
model_dir
,
true
,
use_tensorrt
,
SetConfig
<
contrib
::
AnalysisConfig
>
(
&
config
,
model_dir
,
true
,
use_tensorrt
,
FLAGS_batch_size
);
FLAGS_batch_size
);
...
@@ -109,7 +108,8 @@ void compare(std::string model_dir, bool use_tensorrt) {
...
@@ -109,7 +108,8 @@ void compare(std::string model_dir, bool use_tensorrt) {
&
native_outputs
,
false
);
&
native_outputs
,
false
);
std
::
vector
<
PaddleTensor
>
analysis_outputs
;
std
::
vector
<
PaddleTensor
>
analysis_outputs
;
contrib
::
AnalysisConfig
analysis_config
(
true
);
contrib
::
AnalysisConfig
analysis_config
;
analysis_config
.
EnableUseGpu
(
50
,
0
);
SetConfig
<
contrib
::
AnalysisConfig
>
(
&
analysis_config
,
model_dir
,
true
,
SetConfig
<
contrib
::
AnalysisConfig
>
(
&
analysis_config
,
model_dir
,
true
,
use_tensorrt
,
FLAGS_batch_size
);
use_tensorrt
,
FLAGS_batch_size
);
TestOneThreadPrediction
(
TestOneThreadPrediction
(
...
@@ -154,9 +154,9 @@ TEST(TensorRT_mobilenet, analysis) {
...
@@ -154,9 +154,9 @@ TEST(TensorRT_mobilenet, analysis) {
TEST
(
AnalysisPredictor
,
use_gpu
)
{
TEST
(
AnalysisPredictor
,
use_gpu
)
{
std
::
string
model_dir
=
FLAGS_infer_model
+
"/"
+
"mobilenet"
;
std
::
string
model_dir
=
FLAGS_infer_model
+
"/"
+
"mobilenet"
;
AnalysisConfig
config
(
true
)
;
AnalysisConfig
config
;
config
.
model_dir
=
model_dir
;
config
.
EnableUseGpu
(
100
,
0
)
;
config
.
fraction_of_gpu_memory
=
0.15
;
config
.
SetModel
(
model_dir
)
;
config
.
pass_builder
()
->
TurnOnDebug
();
config
.
pass_builder
()
->
TurnOnDebug
();
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs_all
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录