Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
875a07c3
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
875a07c3
编写于
1月 07, 2019
作者:
Y
Yan Chunwei
提交者:
GitHub
1月 07, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor inference analysis api (#14634)
上级
99e6e8b0
变更
27
显示空白变更内容
内联
并排
Showing
27 changed file
with
418 addition
and
256 deletion
+418
-256
cmake/configure.cmake
cmake/configure.cmake
+1
-0
paddle/fluid/framework/naive_executor.cc
paddle/fluid/framework/naive_executor.cc
+8
-8
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+154
-66
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+46
-37
paddle/fluid/inference/api/analysis_predictor_tester.cc
paddle/fluid/inference/api/analysis_predictor_tester.cc
+15
-15
paddle/fluid/inference/api/api_anakin_engine.h
paddle/fluid/inference/api/api_anakin_engine.h
+0
-2
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+1
-1
paddle/fluid/inference/api/api_impl_tester.cc
paddle/fluid/inference/api/api_impl_tester.cc
+2
-1
paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
+4
-5
paddle/fluid/inference/api/demo_ci/vis_demo.cc
paddle/fluid/inference/api/demo_ci/vis_demo.cc
+6
-7
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+88
-21
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+2
-3
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+11
-1
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
+3
-6
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
+4
-5
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
+4
-5
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
+5
-6
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
+4
-6
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+14
-14
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
+4
-6
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
+4
-5
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
+3
-6
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
...nference/tests/api/analyzer_text_classification_tester.cc
+4
-5
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
+5
-6
paddle/fluid/inference/tests/api/config_printer.h
paddle/fluid/inference/tests/api/config_printer.h
+10
-6
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+4
-1
paddle/fluid/inference/tests/api/trt_models_tester.cc
paddle/fluid/inference/tests/api/trt_models_tester.cc
+12
-12
未找到文件。
cmake/configure.cmake
浏览文件 @
875a07c3
...
...
@@ -134,6 +134,7 @@ if(WITH_GPU)
message
(
WARNING
"Anakin needs CUDNN >= 7.0 to compile. Force WITH_ANAKIN=OFF"
)
set
(
WITH_ANAKIN OFF CACHE STRING
"Anakin is valid only when CUDNN >= 7.0."
FORCE
)
endif
()
add_definitions
(
-DWITH_ANAKIN
)
endif
()
if
(
WITH_ANAKIN
)
# NOTICE(minqiyang): the end slash is important because $CUDNN_INCLUDE_DIR
...
...
paddle/fluid/framework/naive_executor.cc
浏览文件 @
875a07c3
...
...
@@ -40,12 +40,12 @@ void NaiveExecutor::Prepare(Scope *scope, const ProgramDesc &program_desc,
void
NaiveExecutor
::
Run
()
{
#ifndef PADDLE_ON_INFERENCE
LOG_FIRST_N
(
WARNING
,
1
5
)
<<
"The NaiveExecutor can not work properly if the "
LOG_FIRST_N
(
WARNING
,
5
)
<<
"The NaiveExecutor can not work properly if the "
"cmake flag ON_INFER is not set."
;
LOG_FIRST_N
(
WARNING
,
1
5
)
<<
"Unlike the training phase, all the scopes and "
LOG_FIRST_N
(
WARNING
,
5
)
<<
"Unlike the training phase, all the scopes and "
"variables will be reused to save the allocation "
"overhead."
;
LOG_FIRST_N
(
WARNING
,
1
5
)
<<
"Please re-compile the inference library by "
LOG_FIRST_N
(
WARNING
,
5
)
<<
"Please re-compile the inference library by "
"setting the cmake flag ON_INFER=ON if you are "
"running Paddle Inference"
;
#endif // PADDLE_ON_INFERENCE
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
875a07c3
...
...
@@ -14,86 +14,101 @@
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_pass_builder.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle
_pass_builder.h" // NOLINT
#include "paddle
/fluid/platform/gpu_info.h"
namespace
paddle
{
PassStrategy
*
contrib
::
AnalysisConfig
::
pass_builder
()
const
{
PADDLE_ENFORCE
(
pass_builder_
.
get
(),
"Should call constructor first, that will init the pass_builder_."
);
return
pass_builder_
.
get
();
}
contrib
::
AnalysisConfig
::
AnalysisConfig
(
bool
use_gpu
)
{
this
->
use_gpu
=
use_gpu
;
if
(
use_gpu
)
{
if
(
!
pass_builder_
.
get
())
{
if
(
use_gpu_
)
{
LOG
(
INFO
)
<<
"Create GPU IR passes"
;
pass_builder_
.
reset
(
new
GpuPassStrategy
);
}
else
{
LOG
(
INFO
)
<<
"Create CPU IR passes"
;
pass_builder_
.
reset
(
new
CpuPassStrategy
);
}
}
else
if
(
pass_builder_
->
use_gpu
()
^
use_gpu
())
{
LOG
(
WARNING
)
<<
"The use_gpu flag is not compatible between Config and "
"PassBuilder, the flags are "
<<
use_gpu
()
<<
" "
<<
pass_builder_
->
use_gpu
();
LOG
(
WARNING
)
<<
"Please make them compatible, still use the existing "
"PassBuilder."
;
}
return
pass_builder_
.
get
();
}
contrib
::
AnalysisConfig
::
AnalysisConfig
(
const
std
::
string
&
model_dir
)
{
model_dir_
=
model_dir
;
}
contrib
::
AnalysisConfig
::
AnalysisConfig
(
const
std
::
string
&
prog_file
,
const
std
::
string
&
params_file
)
{
prog_file_
=
prog_file
;
params_file_
=
params_file
;
}
void
contrib
::
AnalysisConfig
::
SetModel
(
const
std
::
string
&
prog_file_path
,
const
std
::
string
&
params_file_path
)
{
prog_file_
=
prog_file_path
;
params_file_
=
params_file_path
;
}
void
contrib
::
AnalysisConfig
::
EnableUseGpu
(
uint64_t
memory_pool_init_size_mb
,
int
device_id
)
{
#ifdef PADDLE_WITH_CUDA
use_gpu_
=
true
;
memory_pool_init_size_mb_
=
memory_pool_init_size_mb
;
device_id_
=
device_id
;
#else
LOG
(
ERROR
)
<<
"Please compile with gpu to EnableGpu"
;
use_gpu_
=
false
;
#endif
}
void
contrib
::
AnalysisConfig
::
DisableGpu
()
{
use_gpu_
=
false
;
}
contrib
::
AnalysisConfig
::
AnalysisConfig
(
const
contrib
::
AnalysisConfig
&
other
)
{
// fields from Config
model_dir
=
other
.
model_dir
;
// fields from NativeConfig
use_gpu
=
other
.
use_gpu
;
device
=
other
.
device
;
fraction_of_gpu_memory
=
other
.
fraction_of_gpu_memory
;
prog_file
=
other
.
prog_file
;
param_file
=
other
.
param_file
;
specify_input_name
=
other
.
specify_input_name
;
cpu_math_library_num_threads_
=
other
.
cpu_math_library_num_threads_
;
// fields from this.
enable_ir_optim
=
other
.
enable_ir_optim
;
// For mkldnn
use_mkldnn_
=
other
.
use_mkldnn_
;
mkldnn_enabled_op_types_
=
other
.
mkldnn_enabled_op_types_
;
use_feed_fetch_ops
=
other
.
use_feed_fetch_ops
;
use_tensorrt_
=
other
.
use_tensorrt_
;
tensorrt_max_batchsize_
=
other
.
tensorrt_max_batchsize_
;
tensorrt_workspace_size_
=
other
.
tensorrt_workspace_size_
;
tensorrt_min_subgraph_size_
=
other
.
tensorrt_min_subgraph_size_
;
model_from_memory_
=
other
.
model_from_memory_
;
if
(
use_gpu
)
{
#define CP_MEMBER(member__) member__ = other.member__;
// Model related.
CP_MEMBER
(
model_dir_
);
CP_MEMBER
(
prog_file_
);
CP_MEMBER
(
params_file_
);
CP_MEMBER
(
model_from_memory_
);
// the memory model reuses prog_file_ and
// params_file_ fields.
// Gpu releated.
CP_MEMBER
(
use_gpu_
);
CP_MEMBER
(
device_id_
);
CP_MEMBER
(
memory_pool_init_size_mb_
);
// TensorRT releated.
CP_MEMBER
(
use_tensorrt_
);
CP_MEMBER
(
tensorrt_workspace_size_
);
CP_MEMBER
(
tensorrt_max_batchsize_
);
CP_MEMBER
(
tensorrt_min_subgraph_size_
);
// MKLDNN releated.
CP_MEMBER
(
use_mkldnn_
);
CP_MEMBER
(
mkldnn_enabled_op_types_
);
// Ir related.
CP_MEMBER
(
enable_ir_optim_
);
CP_MEMBER
(
use_feed_fetch_ops_
);
CP_MEMBER
(
ir_debug_
);
CP_MEMBER
(
specify_input_name_
);
CP_MEMBER
(
cpu_math_library_num_threads_
);
CP_MEMBER
(
serialized_info_cache_
);
if
(
use_gpu_
)
{
pass_builder_
.
reset
(
new
GpuPassStrategy
(
*
static_cast
<
GpuPassStrategy
*>
(
other
.
pass_builder
())));
}
else
{
pass_builder_
.
reset
(
new
CpuPassStrategy
(
*
static_cast
<
CpuPassStrategy
*>
(
other
.
pass_builder
())));
}
}
contrib
::
AnalysisConfig
::
AnalysisConfig
(
contrib
::
AnalysisConfig
&&
other
)
{
// fields from Config
model_dir
=
other
.
model_dir
;
// fields from NativeConfig
use_gpu
=
other
.
use_gpu
;
device
=
other
.
device
;
fraction_of_gpu_memory
=
other
.
fraction_of_gpu_memory
;
prog_file
=
other
.
prog_file
;
param_file
=
other
.
param_file
;
specify_input_name
=
other
.
specify_input_name
;
cpu_math_library_num_threads_
=
other
.
cpu_math_library_num_threads_
;
// fields from this.
enable_ir_optim
=
other
.
enable_ir_optim
;
// For mkldnn
use_mkldnn_
=
other
.
use_mkldnn_
;
mkldnn_enabled_op_types_
=
other
.
mkldnn_enabled_op_types_
;
use_feed_fetch_ops
=
other
.
use_feed_fetch_ops
;
use_tensorrt_
=
other
.
use_tensorrt_
;
tensorrt_max_batchsize_
=
other
.
tensorrt_max_batchsize_
;
tensorrt_workspace_size_
=
other
.
tensorrt_workspace_size_
;
tensorrt_min_subgraph_size_
=
other
.
tensorrt_min_subgraph_size_
;
model_from_memory_
=
other
.
model_from_memory_
;
pass_builder_
=
std
::
move
(
other
.
pass_builder_
);
#undef CP_MEMBER
}
void
contrib
::
AnalysisConfig
::
EnableMKLDNN
()
{
...
...
@@ -112,17 +127,90 @@ void contrib::AnalysisConfig::EnableTensorRtEngine(int workspace_size,
use_tensorrt_
=
true
;
tensorrt_workspace_size_
=
workspace_size
;
tensorrt_max_batchsize_
=
max_batch_size
;
tensorrt_min_subgraph_size_
=
min_subgraph_size
;
// Append after the conv+affine_channel fuse pass.
pass_builder
()
->
InsertPass
(
3
,
"tensorrt_subgraph_pass"
);
}
void
contrib
::
AnalysisConfig
::
Update
()
{
auto
info
=
SerializeInfoCache
();
if
(
info
==
serialized_info_cache_
)
return
;
if
(
use_gpu_
)
{
pass_builder_
.
reset
(
new
GpuPassStrategy
);
}
else
{
pass_builder_
.
reset
(
new
CpuPassStrategy
);
}
if
(
use_tensorrt_
)
{
if
(
!
use_gpu_
)
{
LOG
(
ERROR
)
<<
"TensorRT engine is not available when EnableGpu() not actived."
;
}
else
{
// Append after the infer_clean pass.
pass_builder
()
->
InsertPass
(
1
,
"tensorrt_subgraph_pass"
);
}
}
if
(
use_mkldnn_
)
{
if
(
!
enable_ir_optim_
)
{
LOG
(
ERROR
)
<<
"EnableMKLDNN() only works when IR optimization is enabled."
;
}
#ifdef PADDLE_WITH_MKLDNN
pass_builder
()
->
EnableMKLDNN
();
use_mkldnn_
=
true
;
#else
LOG
(
ERROR
)
<<
"Please compile with MKLDNN first to use MKLDNN"
;
use_mkldnn_
=
false
;
#endif
}
if
(
ir_debug_
)
{
pass_builder
()
->
TurnOnDebug
();
}
}
std
::
string
contrib
::
AnalysisConfig
::
SerializeInfoCache
()
{
std
::
stringstream
ss
;
ss
<<
use_gpu_
;
ss
<<
memory_pool_init_size_mb_
;
ss
<<
use_tensorrt_
;
ss
<<
tensorrt_workspace_size_
;
ss
<<
tensorrt_max_batchsize_
;
ss
<<
use_mkldnn_
;
ss
<<
enable_ir_optim_
;
ss
<<
use_feed_fetch_ops_
;
ss
<<
ir_debug_
;
return
ss
.
str
();
}
void
contrib
::
AnalysisConfig
::
SetCpuMathLibraryNumThreads
(
int
cpu_math_library_num_threads
)
{
cpu_math_library_num_threads_
=
cpu_math_library_num_threads
;
}
float
contrib
::
AnalysisConfig
::
fraction_of_gpu_memory_for_pool
()
const
{
#ifdef PADDLE_WITH_CUDA
// Get the GPU memory details and calculate the fraction of memory for the
// GPU memory pool.
size_t
gpu_used
,
gpu_available
;
platform
::
GpuMemoryUsage
(
&
gpu_used
,
&
gpu_available
);
double
total_gpu_memory
=
(
gpu_used
+
gpu_available
)
/
1024.
/
1024.
;
float
fraction_of_gpu_memory
=
static_cast
<
double
>
(
memory_pool_init_size_mb
())
/
total_gpu_memory
;
return
fraction_of_gpu_memory
;
#else
return
0.
;
#endif
}
void
contrib
::
AnalysisConfig
::
SetModelBuffer
(
const
char
*
prog_buffer
,
size_t
prog_buffer_size
,
const
char
*
param_buffer
,
size_t
param_buffer_size
)
{
prog_file
=
std
::
string
(
prog_buffer
,
prog_buffer
+
prog_buffer_size
);
param
_file
=
std
::
string
(
param_buffer
,
param_buffer
+
param_buffer_size
);
prog_file
_
=
std
::
string
(
prog_buffer
,
prog_buffer
+
prog_buffer_size
);
param
s_file_
=
std
::
string
(
param_buffer
,
param_buffer
+
param_buffer_size
);
model_from_memory_
=
true
;
}
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
875a07c3
...
...
@@ -33,6 +33,7 @@
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/profiler.h"
DECLARE_bool
(
profile
);
...
...
@@ -59,7 +60,7 @@ bool AnalysisPredictor::Init(
if
(
FLAGS_profile
)
{
LOG
(
WARNING
)
<<
"Profiler is actived, might affect the performance"
;
LOG
(
INFO
)
<<
"You can turn off by set gflags '-profile false'"
;
auto
tracking_device
=
config_
.
use_gpu
?
platform
::
ProfilerState
::
kAll
auto
tracking_device
=
config_
.
use_gpu
()
?
platform
::
ProfilerState
::
kAll
:
platform
::
ProfilerState
::
kCPU
;
platform
::
EnableProfiler
(
tracking_device
);
}
...
...
@@ -112,7 +113,7 @@ bool AnalysisPredictor::PrepareProgram(
// Optimize the program, and load parameters and modify them in the
// scope_.
// This will change the scope_ address.
if
(
config_
.
enable_ir_optim
)
{
if
(
config_
.
ir_optim
()
)
{
status_ir_optim_enabled_
=
true
;
OptimizeInferenceProgram
();
}
else
{
...
...
@@ -140,9 +141,9 @@ bool AnalysisPredictor::PrepareProgram(
return
true
;
}
bool
AnalysisPredictor
::
CreateExecutor
()
{
if
(
config_
.
use_gpu
)
{
if
(
config_
.
use_gpu
_
)
{
status_use_gpu_
=
true
;
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
_id_
);
}
else
{
place_
=
paddle
::
platform
::
CPUPlace
();
}
...
...
@@ -151,7 +152,7 @@ bool AnalysisPredictor::CreateExecutor() {
}
bool
AnalysisPredictor
::
PrepareExecutor
()
{
executor_
->
Prepare
(
sub_scope_
,
*
inference_program_
,
0
,
config_
.
use_feed_fetch_ops
);
config_
.
use_feed_fetch_ops
_
);
PADDLE_ENFORCE_NOT_NULL
(
sub_scope_
);
...
...
@@ -250,7 +251,7 @@ bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
}
input
.
set_lod
(
lod
);
int
idx
=
-
1
;
if
(
config_
.
specify_input_name
)
{
if
(
config_
.
specify_input_name
_
)
{
auto
name
=
inputs
[
i
].
name
;
if
(
feed_names_
.
find
(
name
)
==
feed_names_
.
end
())
{
LOG
(
ERROR
)
<<
"feed names from program do not have name: ["
<<
name
...
...
@@ -314,22 +315,22 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
void
AnalysisPredictor
::
OptimizeInferenceProgram
()
{
status_program_optimized_
=
true
;
argument_
.
SetUseGPU
(
config_
.
use_gpu
);
argument_
.
SetGPUDeviceId
(
config_
.
device
);
argument_
.
SetUseGPU
(
config_
.
use_gpu
()
);
argument_
.
SetGPUDeviceId
(
config_
.
gpu_device_id
()
);
argument_
.
SetModelFromMemory
(
config_
.
model_from_memory_
);
// Analyze inference_program
if
(
!
config_
.
model_dir
.
empty
())
{
argument_
.
SetModelDir
(
config_
.
model_dir
);
if
(
!
config_
.
model_dir
()
.
empty
())
{
argument_
.
SetModelDir
(
config_
.
model_dir
()
);
}
else
{
PADDLE_ENFORCE
(
!
config_
.
param
_file
.
empty
(),
!
config_
.
param
s_file
()
.
empty
(),
"Either model_dir or (param_file, prog_file) should be set."
);
PADDLE_ENFORCE
(
!
config_
.
prog_file
.
empty
());
argument_
.
SetModelProgramPath
(
config_
.
prog_file
);
argument_
.
SetModelParamsPath
(
config_
.
param
_file
);
PADDLE_ENFORCE
(
!
config_
.
prog_file
()
.
empty
());
argument_
.
SetModelProgramPath
(
config_
.
prog_file
()
);
argument_
.
SetModelParamsPath
(
config_
.
param
s_file
()
);
}
if
(
config_
.
use_gpu
&&
config_
.
use_tensorrt_
)
{
if
(
config_
.
use_gpu
()
&&
config_
.
tensorrt_engine_enabled
()
)
{
argument_
.
SetUseTensorRT
(
true
);
argument_
.
SetTensorRtWorkspaceSize
(
config_
.
tensorrt_workspace_size_
);
argument_
.
SetTensorRtMaxBatchSize
(
config_
.
tensorrt_max_batchsize_
);
...
...
@@ -341,7 +342,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
}
auto
passes
=
config_
.
pass_builder
()
->
AllPasses
();
if
(
!
config_
.
enable_ir_optim
)
passes
.
clear
();
if
(
!
config_
.
ir_optim
()
)
passes
.
clear
();
argument_
.
SetIrAnalysisPasses
(
passes
);
argument_
.
SetScopeNotOwned
(
const_cast
<
framework
::
Scope
*>
(
scope_
.
get
()));
Analyzer
().
Run
(
&
argument_
);
...
...
@@ -358,18 +359,26 @@ template <>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
const
AnalysisConfig
&
config
)
{
VLOG
(
3
)
<<
"create AnalysisConfig"
;
if
(
config
.
use_gpu
)
{
if
(
config
.
use_gpu
()
)
{
// 1. GPU memeroy
PADDLE_ENFORCE_GT
(
config
.
fraction_of_gpu_memory
,
0.
f
,
"fraction_of_gpu_memory in the config should be set to range (0., 1.]"
);
PADDLE_ENFORCE_GE
(
config
.
device
,
0
,
"Invalid device id %d"
,
config
.
device
);
PADDLE_ENFORCE_GT
(
config
.
memory_pool_init_size_mb
(),
0.
f
);
PADDLE_ENFORCE_GE
(
config
.
gpu_device_id
(),
0
,
"Invalid device id %d"
,
config
.
gpu_device_id
());
std
::
vector
<
std
::
string
>
flags
;
if
(
config
.
fraction_of_gpu_memory
>=
0.0
f
||
config
.
fraction_of_gpu_memory
<=
0.95
f
)
{
float
fraction_of_gpu_memory
=
config
.
fraction_of_gpu_memory_for_pool
();
if
(
fraction_of_gpu_memory
>
0.95
f
)
{
LOG
(
ERROR
)
<<
"Allocate too much memory for the GPU memory pool, assigned "
<<
config
.
memory_pool_init_size_mb
()
<<
" MB"
;
LOG
(
ERROR
)
<<
"Try to shink the value by setting AnalysisConfig::EnableGpu(...)"
;
}
if
(
fraction_of_gpu_memory
>=
0.0
f
||
fraction_of_gpu_memory
<=
0.95
f
)
{
flags
.
push_back
(
"dummpy"
);
std
::
string
flag
=
"--fraction_of_gpu_memory_to_use="
+
std
::
to_string
(
config
.
fraction_of_gpu_memory
);
std
::
to_string
(
fraction_of_gpu_memory
);
flags
.
push_back
(
flag
);
VLOG
(
3
)
<<
"set flag: "
<<
flag
;
framework
::
InitGflags
(
flags
);
...
...
@@ -443,22 +452,22 @@ bool AnalysisPredictor::ZeroCopyRun() {
bool
AnalysisPredictor
::
LoadProgramDesc
()
{
// Initialize the inference program
std
::
string
filename
;
if
(
!
config_
.
model_dir
.
empty
())
{
filename
=
config_
.
model_dir
+
"/__model__"
;
}
else
if
(
!
config_
.
prog_file
.
empty
()
&&
!
config_
.
param_file
.
empty
())
{
if
(
!
config_
.
model_dir
()
.
empty
())
{
filename
=
config_
.
model_dir
()
+
"/__model__"
;
}
else
if
(
!
config_
.
prog_file
().
empty
()
&&
!
config_
.
params_file
()
.
empty
())
{
// All parameters are saved in a single file.
// The file names should be consistent with that used
// in Python API `fluid.io.save_inference_model`.
filename
=
config_
.
prog_file
;
filename
=
config_
.
prog_file
()
;
}
else
{
if
(
config_
.
model_dir
.
empty
()
&&
config_
.
prog_file
.
empty
())
{
if
(
config_
.
model_dir
().
empty
()
&&
config_
.
prog_file
()
.
empty
())
{
LOG
(
ERROR
)
<<
"Either model_dir or (prog_file, param_file) should be set."
;
return
false
;
}
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"not valid model path '%s' or program path '%s'."
,
config_
.
model_dir
,
config_
.
param
_file
);
"not valid model path '%s' or program path '%s'."
,
config_
.
model_dir
()
,
config_
.
param
s_file
()
);
return
false
;
}
...
...
@@ -478,7 +487,7 @@ bool AnalysisPredictor::LoadProgramDesc() {
proto
.
ParseFromString
(
pb_content
);
}
else
{
proto
.
ParseFromString
(
config_
.
prog_file
);
proto
.
ParseFromString
(
config_
.
prog_file
()
);
}
inference_program_
.
reset
(
new
framework
::
ProgramDesc
(
proto
));
return
true
;
...
...
@@ -508,27 +517,27 @@ bool AnalysisPredictor::LoadParameters() {
new_var
->
SetLoDLevel
(
var
->
GetLoDLevel
());
new_var
->
SetPersistable
(
true
);
if
(
!
config_
.
param
_file
.
empty
())
{
if
(
!
config_
.
param
s_file
()
.
empty
())
{
params
.
push_back
(
new_var
->
Name
());
}
else
{
// append_op
framework
::
OpDesc
*
op
=
load_block
->
AppendOp
();
op
->
SetType
(
"load"
);
op
->
SetOutput
(
"Out"
,
{
new_var
->
Name
()});
op
->
SetAttr
(
"file_path"
,
{
config_
.
model_dir
+
"/"
+
new_var
->
Name
()});
op
->
SetAttr
(
"file_path"
,
{
config_
.
model_dir
()
+
"/"
+
new_var
->
Name
()});
op
->
CheckAttrs
();
}
}
}
if
(
!
config_
.
param
_file
.
empty
())
{
if
(
!
config_
.
param
s_file
()
.
empty
())
{
// sort paramlist to have consistent ordering
std
::
sort
(
params
.
begin
(),
params
.
end
());
// append just the load_combine op
framework
::
OpDesc
*
op
=
load_block
->
AppendOp
();
op
->
SetType
(
"load_combine"
);
op
->
SetOutput
(
"Out"
,
params
);
op
->
SetAttr
(
"file_path"
,
{
config_
.
param
_file
});
op
->
SetAttr
(
"file_path"
,
{
config_
.
param
s_file
()
});
op
->
CheckAttrs
();
}
...
...
paddle/fluid/inference/api/analysis_predictor_tester.cc
浏览文件 @
875a07c3
...
...
@@ -25,9 +25,9 @@ namespace paddle {
using
contrib
::
AnalysisConfig
;
TEST
(
AnalysisPredictor
,
analysis_off
)
{
AnalysisConfig
config
(
false
)
;
config
.
model_dir
=
FLAGS_dirname
;
config
.
enable_ir_optim
=
false
;
AnalysisConfig
config
;
config
.
SetModel
(
FLAGS_dirname
)
;
config
.
SwitchIrOptim
(
false
)
;
auto
_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
auto
*
predictor
=
static_cast
<
AnalysisPredictor
*>
(
_predictor
.
get
());
...
...
@@ -55,14 +55,14 @@ TEST(AnalysisPredictor, analysis_off) {
}
TEST
(
AnalysisPredictor
,
analysis_on
)
{
AnalysisConfig
config
;
config
.
SetModel
(
FLAGS_dirname
);
config
.
SwitchIrOptim
(
true
);
#ifdef PADDLE_WITH_CUDA
AnalysisConfig
config
(
true
);
config
.
fraction_of_gpu_memory
=
0.15
;
config
.
EnableUseGpu
(
100
,
0
);
#else
AnalysisConfig
config
;
config
.
DisableGpu
()
;
#endif
config
.
model_dir
=
FLAGS_dirname
;
config
.
enable_ir_optim
=
true
;
auto
_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
auto
*
predictor
=
static_cast
<
AnalysisPredictor
*>
(
_predictor
.
get
());
...
...
@@ -89,7 +89,8 @@ TEST(AnalysisPredictor, analysis_on) {
}
// compare with NativePredictor
auto
naive_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
auto
naive_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
.
ToNativeConfig
());
std
::
vector
<
PaddleTensor
>
naive_outputs
;
ASSERT_TRUE
(
naive_predictor
->
Run
(
inputs
,
&
naive_outputs
));
ASSERT_EQ
(
naive_outputs
.
size
(),
1UL
);
...
...
@@ -98,9 +99,8 @@ TEST(AnalysisPredictor, analysis_on) {
TEST
(
AnalysisPredictor
,
ZeroCopy
)
{
AnalysisConfig
config
;
config
.
model_dir
=
FLAGS_dirname
;
config
.
use_feed_fetch_ops
=
false
;
config
.
SetModel
(
FLAGS_dirname
);
config
.
SwitchUseFeedFetchOps
(
false
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
auto
w0
=
predictor
->
GetInputTensor
(
"firstw"
);
...
...
@@ -137,9 +137,9 @@ TEST(AnalysisPredictor, ZeroCopy) {
TEST
(
AnalysisPredictor
,
Clone
)
{
AnalysisConfig
config
;
config
.
model_dir
=
FLAGS_dirname
;
config
.
use_feed_fetch_ops
=
true
;
config
.
enable_ir_optim
=
true
;
config
.
SetModel
(
FLAGS_dirname
)
;
config
.
SwitchUseFeedFetchOps
(
true
)
;
config
.
SwitchIrOptim
(
true
)
;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
predictors
.
emplace_back
(
CreatePaddlePredictor
(
config
));
...
...
paddle/fluid/inference/api/api_anakin_engine.h
浏览文件 @
875a07c3
...
...
@@ -19,8 +19,6 @@ limitations under the License. */
#pragma once
#define WITH_ANAKIN
#include <vector>
#include "framework/core/net/net.h"
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
875a07c3
...
...
@@ -288,7 +288,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
VLOG
(
3
)
<<
"create NativePaddlePredictor"
;
if
(
config
.
use_gpu
)
{
// 1. GPU memeroy
PADDLE_ENFORCE_G
T
(
PADDLE_ENFORCE_G
E
(
config
.
fraction_of_gpu_memory
,
0.
f
,
"fraction_of_gpu_memory in the config should be set to range (0., 1.]"
);
PADDLE_ENFORCE_GE
(
config
.
device
,
0
,
"Invalid device id %d"
,
config
.
device
);
...
...
paddle/fluid/inference/api/api_impl_tester.cc
浏览文件 @
875a07c3
...
...
@@ -295,7 +295,8 @@ TEST(inference_api_native, image_classification_gpu) {
#endif
TEST
(
PassBuilder
,
Delete
)
{
contrib
::
AnalysisConfig
config
(
false
);
contrib
::
AnalysisConfig
config
;
config
.
DisableGpu
();
config
.
pass_builder
()
->
DeletePass
(
"attention_lstm_fuse_pass"
);
const
auto
&
passes
=
config
.
pass_builder
()
->
AllPasses
();
auto
it
=
std
::
find
(
passes
.
begin
(),
passes
.
end
(),
"attention_lstm_fuse_pass"
);
...
...
paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
浏览文件 @
875a07c3
...
...
@@ -36,12 +36,11 @@ namespace demo {
*/
void
Main
()
{
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
paddle
::
contrib
::
AnalysisConfig
config
(
true
)
;
config
.
param_file
=
FLAGS_modeldir
+
"/__params__"
;
config
.
prog_file
=
FLAGS_modeldir
+
"/__model__"
;
config
.
device
=
0
;
paddle
::
contrib
::
AnalysisConfig
config
;
config
.
EnableUseGpu
(
100
,
0
)
;
config
.
SetModel
(
FLAGS_modeldir
+
"/__params__"
,
FLAGS_modeldir
+
"/__model__"
)
;
config
.
EnableTensorRtEngine
();
config
.
fraction_of_gpu_memory
=
0.1
;
// set by yourself
predictor
=
CreatePaddlePredictor
(
config
);
VLOG
(
3
)
<<
"begin to process data"
;
...
...
paddle/fluid/inference/api/demo_ci/vis_demo.cc
浏览文件 @
875a07c3
...
...
@@ -40,15 +40,14 @@ using contrib::AnalysisConfig;
*/
void
Main
(
bool
use_gpu
)
{
std
::
unique_ptr
<
PaddlePredictor
>
predictor
,
analysis_predictor
;
AnalysisConfig
config
(
use_gpu
);
config
.
param_file
=
FLAGS_modeldir
+
"/__params__"
;
config
.
prog_file
=
FLAGS_modeldir
+
"/__model__"
;
config
.
device
=
0
;
if
(
FLAGS_use_gpu
)
{
config
.
fraction_of_gpu_memory
=
0.1
;
// set by yourself
AnalysisConfig
config
;
if
(
use_gpu
)
{
config
.
EnableUseGpu
(
100
,
0
);
}
config
.
SetModel
(
FLAGS_modeldir
+
"/__model__"
,
FLAGS_modeldir
+
"/__params__"
);
predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
.
ToNativeConfig
()
);
analysis_predictor
=
CreatePaddlePredictor
(
config
);
// Just a single batch of data.
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
875a07c3
...
...
@@ -34,26 +34,67 @@ class AnalysisPredictor;
namespace
contrib
{
// NOTE WIP, not stable yet.
struct
AnalysisConfig
:
public
NativeConfig
{
explicit
AnalysisConfig
(
bool
use_gpu
=
false
)
;
struct
AnalysisConfig
{
AnalysisConfig
()
=
default
;
explicit
AnalysisConfig
(
const
AnalysisConfig
&
other
);
explicit
AnalysisConfig
(
AnalysisConfig
&&
other
);
explicit
AnalysisConfig
(
const
std
::
string
&
model_dir
);
explicit
AnalysisConfig
(
const
std
::
string
&
prog_file
,
const
std
::
string
&
params_file
);
// Model path related.
void
SetModel
(
const
std
::
string
&
model_dir
)
{
model_dir_
=
model_dir
;
}
void
SetModel
(
const
std
::
string
&
prog_file_path
,
const
std
::
string
&
params_file_path
);
void
SetProgFile
(
const
std
::
string
&
x
)
{
prog_file_
=
x
;
}
void
SetParamsFile
(
const
std
::
string
&
x
)
{
params_file_
=
x
;
}
const
std
::
string
&
model_dir
()
const
{
return
model_dir_
;
}
const
std
::
string
&
prog_file
()
const
{
return
prog_file_
;
}
const
std
::
string
&
params_file
()
const
{
return
params_file_
;
}
// GPU related.
void
EnableUseGpu
(
uint64_t
memory_pool_init_size_mb
,
int
device_id
=
0
);
void
DisableGpu
();
bool
use_gpu
()
const
{
return
use_gpu_
;
}
int
gpu_device_id
()
const
{
return
device_id_
;
}
int
memory_pool_init_size_mb
()
const
{
return
memory_pool_init_size_mb_
;
}
float
fraction_of_gpu_memory_for_pool
()
const
;
// Determine whether to perform graph optimization.
bool
enable_ir_optim
=
true
;
void
SwitchIrOptim
(
int
x
=
true
)
{
enable_ir_optim_
=
x
;
}
bool
ir_optim
()
const
{
return
enable_ir_optim_
;
}
// Get a pass builder for customize the passes in IR analysis phase.
PassStrategy
*
pass_builder
()
const
;
void
SwitchUseFeedFetchOps
(
int
x
=
true
)
{
use_feed_fetch_ops_
=
x
;
}
bool
use_feed_fetch_ops_enabled
()
const
{
return
use_feed_fetch_ops_
;
}
// NOT stable yet.
bool
use_feed_fetch_ops
{
true
};
void
SwitchSpecifyInputNames
(
bool
x
=
true
)
{
specify_input_name_
=
x
;
}
bool
specify_input_name
()
const
{
return
specify_input_name_
;
}
void
EnableTensorRtEngine
(
int
workspace_size
=
1
<<
20
,
int
max_batch_size
=
1
,
int
min_subgraph_size
=
3
);
bool
use_tensorrt
()
const
{
return
use_tensorrt_
;
}
bool
tensorrt_engine_enabled
()
const
{
return
use_tensorrt_
;
}
void
SwitchIrDebug
(
int
x
=
true
)
{
ir_debug_
=
x
;
}
void
EnableMKLDNN
();
bool
use_mkldnn
()
const
{
return
use_mkldnn_
;
}
bool
mkldnn_enabled
()
const
{
return
use_mkldnn_
;
}
// Set and get the number of cpu math library threads.
void
SetCpuMathLibraryNumThreads
(
int
cpu_math_library_num_threads
);
int
cpu_math_library_num_threads
()
const
{
return
cpu_math_library_num_threads_
;
}
NativeConfig
ToNativeConfig
()
const
{
NativeConfig
config
;
config
.
model_dir
=
model_dir_
;
config
.
prog_file
=
prog_file_
;
config
.
param_file
=
params_file_
;
config
.
use_gpu
=
use_gpu_
;
config
.
device
=
device_id_
;
config
.
fraction_of_gpu_memory
=
fraction_of_gpu_memory_for_pool
();
config
.
specify_input_name
=
specify_input_name_
;
return
config
;
}
void
SetMKLDNNOp
(
std
::
unordered_set
<
std
::
string
>
op_list
)
{
mkldnn_enabled_op_types_
=
op_list
;
}
...
...
@@ -65,10 +106,29 @@ struct AnalysisConfig : public NativeConfig {
friend
class
::
paddle
::
AnalysisPredictor
;
// NOTE just for developer, not an official API, easily to be broken.
// Get a pass builder for customize the passes in IR analysis phase.
PassStrategy
*
pass_builder
()
const
;
protected:
// Update the config.
void
Update
();
std
::
string
SerializeInfoCache
();
protected:
// Model pathes.
std
::
string
model_dir_
;
std
::
string
prog_file_
;
std
::
string
params_file_
;
// GPU releated.
bool
use_gpu_
{
false
};
int
device_id_
{
0
};
uint64_t
memory_pool_init_size_mb_
{
100
};
// initial size is 100MB.
// TensorRT releated.
bool
use_tensorrt_
{
false
};
bool
use_mkldnn_
{
false
};
std
::
unordered_set
<
std
::
string
>
mkldnn_enabled_op_types_
;
// For workspace_size, refer it from here:
// https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting
int
tensorrt_workspace_size_
;
...
...
@@ -82,17 +142,24 @@ struct AnalysisConfig : public NativeConfig {
// We set this variable to control the minimum number of nodes in the
// subgraph, 3 as default value.
int
tensorrt_min_subgraph_size_
{
3
};
std
::
unique_ptr
<
PassStrategy
>
pass_builder_
;
bool
use_mkldnn_
{
false
};
std
::
unordered_set
<
std
::
string
>
mkldnn_enabled_op_types_
;
bool
model_from_memory_
{
false
};
};
// Configurations for Anakin engine.
struct
AnakinConfig
:
public
PaddlePredictor
::
Config
{
enum
TargetType
{
NVGPU
=
0
,
X86
};
int
device
;
std
::
string
model_file
;
int
max_batch_size
{
-
1
};
TargetType
target_type
;
bool
enable_ir_optim_
{
true
};
bool
use_feed_fetch_ops_
{
true
};
bool
ir_debug_
{
false
};
bool
specify_input_name_
{
false
};
int
cpu_math_library_num_threads_
{
1
};
// A runtime cache, shouldn't be transferred to others.
std
::
string
serialized_info_cache_
;
mutable
std
::
unique_ptr
<
PassStrategy
>
pass_builder_
;
};
}
// namespace contrib
...
...
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
875a07c3
...
...
@@ -26,9 +26,8 @@ limitations under the License. */
#include <string>
#include <vector>
#include "paddle_api.h" // NOLINT
#ifndef WITH_ANAKIN
#include "paddle_analysis_config.h" // NOLINT
#else
#include "paddle_api.h" // NOLINT
#ifdef WITH_ANAKIN
#include "paddle_anakin_config.h" // NOLINT
#endif
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
875a07c3
...
...
@@ -62,7 +62,12 @@ class PassStrategy : public PaddlePassBuilder {
// still some CPU kernels running in CPU mode.
virtual
void
EnableMKLDNN
()
=
0
;
bool
use_gpu
()
const
{
return
use_gpu_
;
}
virtual
~
PassStrategy
()
=
default
;
protected:
bool
use_gpu_
{
false
};
};
/*
...
...
@@ -88,6 +93,7 @@ class CpuPassStrategy : public PassStrategy {
"conv_eltwiseadd_bn_fuse_pass"
,
//
"is_test_pass"
,
//
});
use_gpu_
=
false
;
}
virtual
~
CpuPassStrategy
()
=
default
;
...
...
@@ -126,10 +132,14 @@ class GpuPassStrategy : public PassStrategy {
"conv_elementwise_add2_act_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
});
use_gpu_
=
true
;
}
GpuPassStrategy
(
const
GpuPassStrategy
&
other
)
:
PassStrategy
(
other
.
AllPasses
())
{}
:
PassStrategy
(
other
.
AllPasses
())
{
use_gpu_
=
true
;
}
void
EnableMKLDNN
()
override
;
...
...
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
浏览文件 @
875a07c3
...
...
@@ -165,12 +165,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
void
SetConfig
(
contrib
::
AnalysisConfig
*
cfg
)
{
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
->
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
specify_input_name
=
true
;
cfg
->
enable_ir_optim
=
true
;
cfg
->
SetModel
(
FLAGS_infer_model
+
"/__model__"
,
FLAGS_infer_model
+
"/param"
);
cfg
->
SwitchSpecifyInputNames
();
cfg
->
SwitchIrOptim
(
true
);
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
浏览文件 @
875a07c3
...
...
@@ -105,11 +105,10 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
model_dir
=
FLAGS_infer_model
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
specify_input_name
=
true
;
cfg
->
enable_ir_optim
=
true
;
cfg
->
SetModel
(
FLAGS_infer_model
);
cfg
->
DisableGpu
();
cfg
->
SwitchSpecifyInputNames
();
cfg
->
SwitchIrOptim
();
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
浏览文件 @
875a07c3
...
...
@@ -76,11 +76,10 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
void
SetConfig
(
contrib
::
AnalysisConfig
*
cfg
)
{
cfg
->
model_dir
=
FLAGS_infer_model
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
specify_input_name
=
true
;
cfg
->
enable_ir_optim
=
true
;
cfg
->
SetModel
(
FLAGS_infer_model
);
cfg
->
DisableGpu
();
cfg
->
SwitchSpecifyInputNames
();
cfg
->
SwitchIrOptim
();
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
浏览文件 @
875a07c3
...
...
@@ -84,13 +84,12 @@ void SetConfig(contrib::AnalysisConfig *cfg, bool memory_load = false) {
cfg
->
SetModelBuffer
(
&
buffer_prog
[
0
],
buffer_prog
.
size
(),
&
buffer_param
[
0
],
buffer_param
.
size
());
}
else
{
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
->
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
->
SetModel
(
FLAGS_infer_model
+
"/__model__"
,
FLAGS_infer_model
+
"/param"
)
;
}
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
specify_input_name
=
true
;
cfg
->
enable_ir_optim
=
true
;
cfg
->
DisableGpu
();
cfg
->
SwitchSpecifyInputNames
();
cfg
->
SwitchIrOptim
();
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
浏览文件 @
875a07c3
...
...
@@ -21,12 +21,10 @@ namespace inference {
namespace
analysis
{
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
param_file
=
FLAGS_infer_model
+
"/params"
;
cfg
->
prog_file
=
FLAGS_infer_model
+
"/model"
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
enable_ir_optim
=
true
;
cfg
->
specify_input_name
=
true
;
cfg
->
SetModel
(
FLAGS_infer_model
+
"/model"
,
FLAGS_infer_model
+
"/params"
);
cfg
->
DisableGpu
();
cfg
->
SwitchIrOptim
();
cfg
->
SwitchSpecifyInputNames
();
cfg
->
SetCpuMathLibraryNumThreads
(
FLAGS_paddle_num_threads
);
}
...
...
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
浏览文件 @
875a07c3
...
...
@@ -204,12 +204,10 @@ void PrepareZeroCopyInputs(ZeroCopyTensor *lod_attention_tensor,
}
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
->
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
specify_input_name
=
true
;
cfg
->
enable_ir_optim
=
true
;
cfg
->
SetModel
(
FLAGS_infer_model
+
"/__model__"
,
FLAGS_infer_model
+
"/param"
);
cfg
->
DisableGpu
();
cfg
->
SwitchSpecifyInputNames
();
cfg
->
SwitchIrOptim
();
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
...
...
@@ -225,10 +223,10 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
// Easy for profiling independently.
TEST
(
Analyzer_rnn1
,
profile
)
{
contrib
::
AnalysisConfig
cfg
(
false
)
;
contrib
::
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
cfg
.
fraction_of_gpu_memory
=
0.1
;
cfg
.
pass_builder
()
->
TurnOn
Debug
();
cfg
.
DisableGpu
()
;
cfg
.
SwitchIr
Debug
();
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
...
...
@@ -293,16 +291,18 @@ TEST(Analyzer_rnn1, multi_thread) {
TEST
(
Analyzer_rnn1
,
ZeroCopy
)
{
AnalysisConfig
config
;
SetConfig
(
&
config
);
config
.
use_feed_fetch_ops
=
false
;
config
.
SwitchUseFeedFetchOps
(
false
)
;
PaddlePlace
place
;
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
config
.
use_feed_fetch_ops
=
true
;
auto
native_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
config
.
SwitchUseFeedFetchOps
(
true
);
auto
native_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
.
ToNativeConfig
());
config
.
use_feed_fetch_ops
=
true
;
// the analysis predictor needs feed/fetch.
config
.
SwitchUseFeedFetchOps
(
true
);
// the analysis predictor needs feed/fetch.
auto
analysis_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
#define NEW_TENSOR(name__) \
...
...
@@ -362,7 +362,7 @@ TEST(Analyzer_rnn1, ZeroCopy) {
TEST
(
Analyzer_rnn1
,
ZeroCopyMultiThread
)
{
AnalysisConfig
config
;
SetConfig
(
&
config
);
config
.
use_feed_fetch_ops
=
false
;
config
.
SwitchUseFeedFetchOps
(
false
)
;
#define NEW_TENSOR(name__) \
auto name__##_tensor = predictor->GetInputTensor(#name__);
...
...
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
浏览文件 @
875a07c3
...
...
@@ -105,12 +105,10 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
->
param_file
=
FLAGS_infer_model
+
"/param"
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
specify_input_name
=
true
;
cfg
->
enable_ir_optim
=
true
;
cfg
->
SetModel
(
FLAGS_infer_model
+
"/__model__"
,
FLAGS_infer_model
+
"/param"
);
cfg
->
DisableGpu
();
cfg
->
SwitchSpecifyInputNames
();
cfg
->
SwitchIrOptim
();
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
浏览文件 @
875a07c3
...
...
@@ -89,11 +89,10 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
model_dir
=
FLAGS_infer_model
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
specify_input_name
=
true
;
cfg
->
enable_ir_optim
=
true
;
cfg
->
SetModel
(
FLAGS_infer_model
);
cfg
->
DisableGpu
();
cfg
->
SwitchSpecifyInputNames
();
cfg
->
SwitchIrOptim
();
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
浏览文件 @
875a07c3
...
...
@@ -122,12 +122,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data) {
}
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
param_file
=
FLAGS_infer_model
+
"/params"
;
cfg
->
prog_file
=
FLAGS_infer_model
+
"/model"
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
enable_ir_optim
=
true
;
cfg
->
specify_input_name
=
true
;
cfg
->
SetModel
(
FLAGS_infer_model
+
"/model"
,
FLAGS_infer_model
+
"/params"
);
cfg
->
DisableGpu
();
cfg
->
SwitchSpecifyInputNames
();
cfg
->
pass_builder
()
->
TurnOnDebug
();
cfg
->
SetCpuMathLibraryNumThreads
(
FLAGS_paddle_num_threads
);
}
...
...
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
浏览文件 @
875a07c3
...
...
@@ -47,11 +47,10 @@ struct DataReader {
};
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
model_dir
=
FLAGS_infer_model
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
specify_input_name
=
true
;
cfg
->
enable_ir_optim
=
true
;
cfg
->
SetModel
(
FLAGS_infer_model
);
cfg
->
DisableGpu
();
cfg
->
SwitchSpecifyInputNames
();
cfg
->
SwitchIrOptim
();
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
浏览文件 @
875a07c3
...
...
@@ -51,12 +51,11 @@ Record ProcessALine(const std::string &line) {
}
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
param_file
=
FLAGS_infer_model
+
"/__params__"
;
cfg
->
prog_file
=
FLAGS_infer_model
+
"/__model__"
;
cfg
->
use_gpu
=
false
;
cfg
->
device
=
0
;
cfg
->
enable_ir_optim
=
true
;
cfg
->
specify_input_name
=
true
;
cfg
->
SetModel
(
FLAGS_infer_model
+
"/__model__"
,
FLAGS_infer_model
+
"/__params__"
);
cfg
->
DisableGpu
();
cfg
->
SwitchIrDebug
();
cfg
->
SwitchSpecifyInputNames
();
// TODO(TJ): fix fusion gru
cfg
->
pass_builder
()
->
DeletePass
(
"fc_gru_fuse_pass"
);
}
...
...
paddle/fluid/inference/tests/api/config_printer.h
浏览文件 @
875a07c3
...
...
@@ -64,19 +64,23 @@ std::ostream &operator<<(std::ostream &os,
num_spaces
++
;
os
<<
*
reinterpret_cast
<
const
NativeConfig
*>
(
&
config
);
if
(
!
config
.
model_from_memory
())
{
os
<<
GenSpaces
(
num_spaces
)
<<
"prog_file: "
<<
config
.
prog_file
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"param_file: "
<<
config
.
param_file
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"prog_file: "
<<
config
.
prog_file
()
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"param_file: "
<<
config
.
params_file
()
<<
"
\n
"
;
}
else
{
os
<<
GenSpaces
(
num_spaces
)
<<
"prog_file and param_file: load from memory
\n
"
;
}
os
<<
GenSpaces
(
num_spaces
)
<<
"enable_ir_optim: "
<<
config
.
enable_ir_optim
os
<<
GenSpaces
(
num_spaces
)
<<
"enable_ir_optim: "
<<
config
.
ir_optim
()
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"enable_ir_optim: "
<<
config
.
ir_optim
()
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"use_feed_fetch_ops: "
<<
config
.
use_feed_fetch_ops
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"use_tensorrt: "
<<
config
.
use_tensorrt
()
<<
"use_feed_fetch_ops: "
<<
config
.
use_feed_fetch_ops_enabled
()
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"use_tensorrt: "
<<
config
.
tensorrt_engine_enabled
()
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"use_mkldnn: "
<<
config
.
mkldnn_enabled
()
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"use_mkldnn: "
<<
config
.
use_mkldnn
()
<<
"
\n
"
;
num_spaces
--
;
os
<<
GenSpaces
(
num_spaces
)
<<
"}
\n
"
;
return
os
;
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
875a07c3
...
...
@@ -328,7 +328,10 @@ void CompareNativeAndAnalysis(
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
)
{
PrintConfig
(
config
,
true
);
std
::
vector
<
PaddleTensor
>
native_outputs
,
analysis_outputs
;
TestOneThreadPrediction
(
config
,
inputs
,
&
native_outputs
,
false
);
const
auto
*
analysis_config
=
reinterpret_cast
<
const
contrib
::
AnalysisConfig
*>
(
config
);
auto
native_config
=
analysis_config
->
ToNativeConfig
();
TestOneThreadPrediction
(
&
native_config
,
inputs
,
&
native_outputs
,
false
);
TestOneThreadPrediction
(
config
,
inputs
,
&
analysis_outputs
,
true
);
CompareResult
(
analysis_outputs
,
native_outputs
);
}
...
...
paddle/fluid/inference/tests/api/trt_models_tester.cc
浏览文件 @
875a07c3
...
...
@@ -46,22 +46,20 @@ void SetConfig<contrib::AnalysisConfig>(contrib::AnalysisConfig* config,
std
::
string
model_dir
,
bool
use_gpu
,
bool
use_tensorrt
,
int
batch_size
)
{
if
(
!
FLAGS_prog_filename
.
empty
()
&&
!
FLAGS_param_filename
.
empty
())
{
config
->
prog_file
=
model_dir
+
"/"
+
FLAGS_prog_filename
;
config
->
param_file
=
model_dir
+
"/"
+
FLAGS_param_filename
;
config
->
SetModel
(
model_dir
+
"/"
+
FLAGS_prog_filename
,
model_dir
+
"/"
+
FLAGS_param_filename
)
;
}
else
{
config
->
model_dir
=
model_dir
;
config
->
SetModel
(
model_dir
)
;
}
if
(
use_gpu
)
{
config
->
use_gpu
=
true
;
config
->
device
=
0
;
config
->
fraction_of_gpu_memory
=
0.15
;
config
->
EnableUseGpu
(
100
,
0
);
if
(
use_tensorrt
)
{
config
->
EnableTensorRtEngine
(
1
<<
10
,
batch_size
);
config
->
pass_builder
()
->
DeletePass
(
"conv_bn_fuse_pass"
);
config
->
pass_builder
()
->
DeletePass
(
"fc_fuse_pass"
);
config
->
pass_builder
()
->
TurnOnDebug
();
}
else
{
config
->
enable_ir_optim
=
true
;
config
->
SwitchIrOptim
()
;
}
}
}
...
...
@@ -77,7 +75,8 @@ void profile(std::string model_dir, bool use_analysis, bool use_tensorrt) {
std
::
vector
<
PaddleTensor
>
outputs
;
if
(
use_analysis
||
use_tensorrt
)
{
contrib
::
AnalysisConfig
config
(
true
);
contrib
::
AnalysisConfig
config
;
config
.
EnableUseGpu
(
100
,
0
);
config
.
pass_builder
()
->
TurnOnDebug
();
SetConfig
<
contrib
::
AnalysisConfig
>
(
&
config
,
model_dir
,
true
,
use_tensorrt
,
FLAGS_batch_size
);
...
...
@@ -109,7 +108,8 @@ void compare(std::string model_dir, bool use_tensorrt) {
&
native_outputs
,
false
);
std
::
vector
<
PaddleTensor
>
analysis_outputs
;
contrib
::
AnalysisConfig
analysis_config
(
true
);
contrib
::
AnalysisConfig
analysis_config
;
analysis_config
.
EnableUseGpu
(
50
,
0
);
SetConfig
<
contrib
::
AnalysisConfig
>
(
&
analysis_config
,
model_dir
,
true
,
use_tensorrt
,
FLAGS_batch_size
);
TestOneThreadPrediction
(
...
...
@@ -154,9 +154,9 @@ TEST(TensorRT_mobilenet, analysis) {
TEST
(
AnalysisPredictor
,
use_gpu
)
{
std
::
string
model_dir
=
FLAGS_infer_model
+
"/"
+
"mobilenet"
;
AnalysisConfig
config
(
true
)
;
config
.
model_dir
=
model_dir
;
config
.
fraction_of_gpu_memory
=
0.15
;
AnalysisConfig
config
;
config
.
EnableUseGpu
(
100
,
0
)
;
config
.
SetModel
(
model_dir
)
;
config
.
pass_builder
()
->
TurnOnDebug
();
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs_all
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录