Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
ddb12035
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 2 年 前同步成功
通知
708
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ddb12035
编写于
11月 14, 2018
作者:
N
nhzlx
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/paddlepaddle/paddle
into add_trt_plugin
merge develop and fix conflicts
上级
0b962680
9f335939
变更
123
显示空白变更内容
内联
并排
Showing
123 changed file
with
2957 addition
and
4045 deletion
+2957
-4045
AUTHORS.md
AUTHORS.md
+1
-0
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+3
-3
cmake/tensorrt.cmake
cmake/tensorrt.cmake
+1
-0
paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc
...uid/framework/details/fast_threaded_ssa_graph_executor.cc
+3
-3
paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h
...luid/framework/details/fast_threaded_ssa_graph_executor.h
+1
-0
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+1
-0
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+3
-1
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
+4
-4
paddle/fluid/framework/ir/graph.cc
paddle/fluid/framework/ir/graph.cc
+0
-2
paddle/fluid/framework/ir/graph.h
paddle/fluid/framework/ir/graph.h
+15
-5
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+2
-2
paddle/fluid/framework/ir/graph_to_program_pass.cc
paddle/fluid/framework/ir/graph_to_program_pass.cc
+2
-1
paddle/fluid/framework/ir/graph_traits.cc
paddle/fluid/framework/ir/graph_traits.cc
+70
-0
paddle/fluid/framework/ir/graph_traits.h
paddle/fluid/framework/ir/graph_traits.h
+34
-0
paddle/fluid/framework/ir/node.cc
paddle/fluid/framework/ir/node.cc
+0
-1
paddle/fluid/framework/ir/node.h
paddle/fluid/framework/ir/node.h
+6
-13
paddle/fluid/framework/ir/pass.h
paddle/fluid/framework/ir/pass.h
+1
-0
paddle/fluid/framework/naive_executor.cc
paddle/fluid/framework/naive_executor.cc
+33
-35
paddle/fluid/framework/naive_executor.h
paddle/fluid/framework/naive_executor.h
+8
-4
paddle/fluid/framework/naive_executor_test.cc
paddle/fluid/framework/naive_executor_test.cc
+1
-1
paddle/fluid/framework/scope.cc
paddle/fluid/framework/scope.cc
+68
-14
paddle/fluid/framework/scope.h
paddle/fluid/framework/scope.h
+8
-3
paddle/fluid/framework/selected_rows.cc
paddle/fluid/framework/selected_rows.cc
+45
-7
paddle/fluid/framework/selected_rows.h
paddle/fluid/framework/selected_rows.h
+2
-2
paddle/fluid/framework/selected_rows_test.cc
paddle/fluid/framework/selected_rows_test.cc
+8
-4
paddle/fluid/inference/CMakeLists.txt
paddle/fluid/inference/CMakeLists.txt
+2
-6
paddle/fluid/inference/analysis/CMakeLists.txt
paddle/fluid/inference/analysis/CMakeLists.txt
+17
-26
paddle/fluid/inference/analysis/analysis_pass.h
paddle/fluid/inference/analysis/analysis_pass.h
+12
-18
paddle/fluid/inference/analysis/analyzer.cc
paddle/fluid/inference/analysis/analyzer.cc
+8
-123
paddle/fluid/inference/analysis/analyzer.h
paddle/fluid/inference/analysis/analyzer.h
+3
-38
paddle/fluid/inference/analysis/analyzer_tester.cc
paddle/fluid/inference/analysis/analyzer_tester.cc
+8
-8
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+93
-66
paddle/fluid/inference/analysis/data_flow_graph.cc
paddle/fluid/inference/analysis/data_flow_graph.cc
+0
-496
paddle/fluid/inference/analysis/data_flow_graph.h
paddle/fluid/inference/analysis/data_flow_graph.h
+0
-209
paddle/fluid/inference/analysis/data_flow_graph_tester.cc
paddle/fluid/inference/analysis/data_flow_graph_tester.cc
+0
-168
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h
.../fluid/inference/analysis/data_flow_graph_to_fluid_pass.h
+0
-59
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass_tester.cc
...nference/analysis/data_flow_graph_to_fluid_pass_tester.cc
+0
-48
paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.cc
paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.cc
+0
-59
paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h
paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h
+0
-78
paddle/fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc
...fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc
+0
-54
paddle/fluid/inference/analysis/dot_tester.cc
paddle/fluid/inference/analysis/dot_tester.cc
+0
-1
paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc
...fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc
+0
-76
paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h
.../fluid/inference/analysis/fluid_to_data_flow_graph_pass.h
+0
-57
paddle/fluid/inference/analysis/fluid_to_ir_pass.h
paddle/fluid/inference/analysis/fluid_to_ir_pass.h
+0
-128
paddle/fluid/inference/analysis/graph_traits.cc
paddle/fluid/inference/analysis/graph_traits.cc
+0
-15
paddle/fluid/inference/analysis/graph_traits.h
paddle/fluid/inference/analysis/graph_traits.h
+0
-63
paddle/fluid/inference/analysis/helper.h
paddle/fluid/inference/analysis/helper.h
+5
-5
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+60
-10
paddle/fluid/inference/analysis/ir_pass_manager.h
paddle/fluid/inference/analysis/ir_pass_manager.h
+15
-4
paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
+7
-0
paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc
...e/fluid/inference/analysis/ir_passes/subgraph_detector.cc
+231
-48
paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h
...le/fluid/inference/analysis/ir_passes/subgraph_detector.h
+182
-0
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+220
-0
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h
...uid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h
+12
-19
paddle/fluid/inference/analysis/model_store_pass.cc
paddle/fluid/inference/analysis/model_store_pass.cc
+0
-67
paddle/fluid/inference/analysis/node.cc
paddle/fluid/inference/analysis/node.cc
+0
-70
paddle/fluid/inference/analysis/node.h
paddle/fluid/inference/analysis/node.h
+0
-244
paddle/fluid/inference/analysis/node_tester.cc
paddle/fluid/inference/analysis/node_tester.cc
+0
-55
paddle/fluid/inference/analysis/pass_manager.cc
paddle/fluid/inference/analysis/pass_manager.cc
+0
-47
paddle/fluid/inference/analysis/pass_manager.h
paddle/fluid/inference/analysis/pass_manager.h
+0
-94
paddle/fluid/inference/analysis/pass_manager_tester.cc
paddle/fluid/inference/analysis/pass_manager_tester.cc
+0
-54
paddle/fluid/inference/analysis/passes/CMakeLists.txt
paddle/fluid/inference/analysis/passes/CMakeLists.txt
+9
-0
paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc
...uid/inference/analysis/passes/ir_analysis_compose_pass.cc
+83
-0
paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.h
...luid/inference/analysis/passes/ir_analysis_compose_pass.h
+16
-23
paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc
paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc
+43
-0
paddle/fluid/inference/analysis/passes/ir_analysis_pass.h
paddle/fluid/inference/analysis/passes/ir_analysis_pass.h
+14
-9
paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc
...le/fluid/inference/analysis/passes/ir_graph_build_pass.cc
+73
-0
paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h
paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h
+46
-0
paddle/fluid/inference/analysis/passes/passes.cc
paddle/fluid/inference/analysis/passes/passes.cc
+34
-0
paddle/fluid/inference/analysis/passes/passes.h
paddle/fluid/inference/analysis/passes/passes.h
+18
-12
paddle/fluid/inference/analysis/subgraph_splitter.h
paddle/fluid/inference/analysis/subgraph_splitter.h
+0
-88
paddle/fluid/inference/analysis/subgraph_splitter_tester.cc
paddle/fluid/inference/analysis/subgraph_splitter_tester.cc
+0
-92
paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc
...id/inference/analysis/tensorrt_subgraph_node_mark_pass.cc
+0
-80
paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass_tester.cc
...rence/analysis/tensorrt_subgraph_node_mark_pass_tester.cc
+0
-50
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
+0
-36
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h
+0
-57
paddle/fluid/inference/analysis/tensorrt_subgraph_pass_tester.cc
...fluid/inference/analysis/tensorrt_subgraph_pass_tester.cc
+0
-73
paddle/fluid/inference/analysis/ut_helper.h
paddle/fluid/inference/analysis/ut_helper.h
+0
-25
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+11
-16
paddle/fluid/inference/api/README.md
paddle/fluid/inference/api/README.md
+4
-14
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+103
-0
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+226
-61
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+27
-2
paddle/fluid/inference/api/analysis_predictor_tester.cc
paddle/fluid/inference/api/analysis_predictor_tester.cc
+125
-2
paddle/fluid/inference/api/api.cc
paddle/fluid/inference/api/api.cc
+1
-0
paddle/fluid/inference/api/api_anakin_engine.h
paddle/fluid/inference/api/api_anakin_engine.h
+3
-1
paddle/fluid/inference/api/api_impl_tester.cc
paddle/fluid/inference/api/api_impl_tester.cc
+8
-1
paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
+0
-189
paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc
...luid/inference/api/api_tensorrt_subgraph_engine_tester.cc
+0
-92
paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc
paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc
+1
-1
paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
+3
-4
paddle/fluid/inference/api/demo_ci/vis_demo.cc
paddle/fluid/inference/api/demo_ci/vis_demo.cc
+3
-9
paddle/fluid/inference/api/details/zero_copy_tensor.cc
paddle/fluid/inference/api/details/zero_copy_tensor.cc
+6
-4
paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc
paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc
+6
-4
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+65
-0
paddle/fluid/inference/api/paddle_anakin_config.h
paddle/fluid/inference/api/paddle_anakin_config.h
+18
-16
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+77
-0
paddle/fluid/inference/api/paddle_api.h
paddle/fluid/inference/api/paddle_api.h
+220
-0
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+6
-262
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+68
-0
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+131
-0
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+1
-0
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+4
-3
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
+7
-2
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+12
-101
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
...nference/tests/api/analyzer_text_classification_tester.cc
+1
-3
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
+10
-3
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+27
-18
paddle/fluid/inference/tests/api/trt_models_tester.cc
paddle/fluid/inference/tests/api/trt_models_tester.cc
+73
-33
paddle/fluid/memory/malloc.cc
paddle/fluid/memory/malloc.cc
+13
-5
paddle/fluid/operators/auc_op.cc
paddle/fluid/operators/auc_op.cc
+1
-1
paddle/fluid/operators/load_op.cc
paddle/fluid/operators/load_op.cc
+3
-2
paddle/fluid/operators/lookup_sparse_table_op.cc
paddle/fluid/operators/lookup_sparse_table_op.cc
+6
-1
paddle/fluid/operators/lrn_op.cc
paddle/fluid/operators/lrn_op.cc
+39
-26
paddle/fluid/operators/lrn_op.h
paddle/fluid/operators/lrn_op.h
+0
-1
paddle/fluid/operators/math/blas.h
paddle/fluid/operators/math/blas.h
+16
-0
paddle/fluid/operators/math/blas_impl.h
paddle/fluid/operators/math/blas_impl.h
+48
-0
paddle/fluid/operators/mul_op.cc
paddle/fluid/operators/mul_op.cc
+2
-1
paddle/fluid/operators/nce_op.cc
paddle/fluid/operators/nce_op.cc
+2
-2
paddle/fluid/operators/sgd_op.h
paddle/fluid/operators/sgd_op.h
+0
-2
paddle/fluid/platform/dynload/mklml.h
paddle/fluid/platform/dynload/mklml.h
+4
-0
paddle/fluid/string/printf.h
paddle/fluid/string/printf.h
+18
-0
python/paddle/fluid/tests/unittests/test_lookup_sparse_table_op.py
...ddle/fluid/tests/unittests/test_lookup_sparse_table_op.py
+27
-0
未找到文件。
AUTHORS.md
浏览文件 @
ddb12035
...
@@ -43,6 +43,7 @@
...
@@ -43,6 +43,7 @@
| qingqing01 | Qing-Qing Dang |
| qingqing01 | Qing-Qing Dang |
| reyoung | Yang Yu |
| reyoung | Yang Yu |
| Superjom | Chun-Wei Yan |
| Superjom | Chun-Wei Yan |
| tensor-tang | Jian Tang |
| tianbingsz | Tian-Bing Xu |
| tianbingsz | Tian-Bing Xu |
| tpatejko | Tomasz Patejko |
| tpatejko | Tomasz Patejko |
| typhoonzero | Yi Wu |
| typhoonzero | Yi Wu |
...
...
cmake/inference_lib.cmake
浏览文件 @
ddb12035
...
@@ -164,7 +164,7 @@ endif()
...
@@ -164,7 +164,7 @@ endif()
set
(
module
"inference"
)
set
(
module
"inference"
)
copy
(
inference_lib DEPS
${
inference_deps
}
copy
(
inference_lib DEPS
${
inference_deps
}
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/libpaddle_fluid.*
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/libpaddle_fluid.*
${
src_dir
}
/
${
module
}
/api/paddle_
inference_api
.h
${
src_dir
}
/
${
module
}
/api/paddle_
*
.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/paddle_inference_pass.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/paddle_inference_pass.h
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
)
)
...
@@ -202,10 +202,10 @@ copy(third_party DEPS fluid_lib_dist
...
@@ -202,10 +202,10 @@ copy(third_party DEPS fluid_lib_dist
DSTS
${
FLUID_INFERENCE_INSTALL_DIR
}
${
FLUID_INFERENCE_INSTALL_DIR
}
DSTS
${
FLUID_INFERENCE_INSTALL_DIR
}
${
FLUID_INFERENCE_INSTALL_DIR
}
)
)
# only need libpaddle_fluid.so/a and paddle_
inference_api
.h for inference-only library
# only need libpaddle_fluid.so/a and paddle_
*
.h for inference-only library
copy
(
inference_api_lib DEPS fluid_lib_dist
copy
(
inference_api_lib DEPS fluid_lib_dist
SRCS
${
FLUID_INSTALL_DIR
}
/paddle/fluid/inference/libpaddle_fluid.*
SRCS
${
FLUID_INSTALL_DIR
}
/paddle/fluid/inference/libpaddle_fluid.*
${
FLUID_INSTALL_DIR
}
/paddle/fluid/inference/paddle_
inference_api
.h
${
FLUID_INSTALL_DIR
}
/paddle/fluid/inference/paddle_
*
.h
DSTS
${
FLUID_INFERENCE_INSTALL_DIR
}
/paddle/lib
${
FLUID_INFERENCE_INSTALL_DIR
}
/paddle/include
DSTS
${
FLUID_INFERENCE_INSTALL_DIR
}
/paddle/lib
${
FLUID_INFERENCE_INSTALL_DIR
}
/paddle/include
)
)
...
...
cmake/tensorrt.cmake
浏览文件 @
ddb12035
...
@@ -34,4 +34,5 @@ if(TENSORRT_FOUND)
...
@@ -34,4 +34,5 @@ if(TENSORRT_FOUND)
"Current TensorRT version is v
${
TENSORRT_MAJOR_VERSION
}
. "
)
"Current TensorRT version is v
${
TENSORRT_MAJOR_VERSION
}
. "
)
include_directories
(
${
TENSORRT_INCLUDE_DIR
}
)
include_directories
(
${
TENSORRT_INCLUDE_DIR
}
)
list
(
APPEND EXTERNAL_LIBS
${
TENSORRT_LIBRARY
}
)
list
(
APPEND EXTERNAL_LIBS
${
TENSORRT_LIBRARY
}
)
add_definitions
(
-DPADDLE_WITH_TENSORRT
)
endif
()
endif
()
paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc
浏览文件 @
ddb12035
...
@@ -30,8 +30,8 @@ FastThreadedSSAGraphExecutor::FastThreadedSSAGraphExecutor(
...
@@ -30,8 +30,8 @@ FastThreadedSSAGraphExecutor::FastThreadedSSAGraphExecutor(
local_scopes_
(
local_scopes
),
local_scopes_
(
local_scopes
),
places_
(
places
),
places_
(
places
),
graph_
(
std
::
move
(
graph
)),
graph_
(
std
::
move
(
graph
)),
pool_
(
strategy
.
num_threads_
+
pool_
(
strategy
.
num_threads_
),
1
),
// add one more thread for generate op_deps
prepare_pool_
(
1
),
// add one more thread for generate op_deps
fetch_ctxs_
(
places
)
{
fetch_ctxs_
(
places
)
{
for
(
auto
&
op
:
ir
::
FilterByNodeWrapper
<
OpHandleBase
>
(
*
graph_
))
{
for
(
auto
&
op
:
ir
::
FilterByNodeWrapper
<
OpHandleBase
>
(
*
graph_
))
{
int
dep
=
static_cast
<
int
>
(
op
->
NotReadyInputSize
());
int
dep
=
static_cast
<
int
>
(
op
->
NotReadyInputSize
());
...
@@ -160,7 +160,7 @@ void FastThreadedSSAGraphExecutor::RunOpAsync(
...
@@ -160,7 +160,7 @@ void FastThreadedSSAGraphExecutor::RunOpAsync(
});
});
}
}
void
FastThreadedSSAGraphExecutor
::
PrepareAtomicOpDeps
()
{
void
FastThreadedSSAGraphExecutor
::
PrepareAtomicOpDeps
()
{
atomic_op_deps_
=
pool_
.
enqueue
([
&
]
{
atomic_op_deps_
=
p
repare_p
ool_
.
enqueue
([
&
]
{
auto
*
op_deps
=
new
std
::
unordered_map
<
OpHandleBase
*
,
std
::
atomic
<
int
>>
;
auto
*
op_deps
=
new
std
::
unordered_map
<
OpHandleBase
*
,
std
::
atomic
<
int
>>
;
for
(
auto
&
pair
:
op_deps_
)
{
for
(
auto
&
pair
:
op_deps_
)
{
(
*
op_deps
)[
pair
.
first
]
=
pair
.
second
;
(
*
op_deps
)[
pair
.
first
]
=
pair
.
second
;
...
...
paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h
浏览文件 @
ddb12035
...
@@ -46,6 +46,7 @@ class FastThreadedSSAGraphExecutor : public SSAGraphExecutor {
...
@@ -46,6 +46,7 @@ class FastThreadedSSAGraphExecutor : public SSAGraphExecutor {
std
::
vector
<
OpHandleBase
*>
bootstrap_ops_
;
std
::
vector
<
OpHandleBase
*>
bootstrap_ops_
;
::
ThreadPool
pool_
;
::
ThreadPool
pool_
;
::
ThreadPool
prepare_pool_
;
platform
::
DeviceContextPool
fetch_ctxs_
;
platform
::
DeviceContextPool
fetch_ctxs_
;
std
::
atomic
<
int
>
remaining_
;
std
::
atomic
<
int
>
remaining_
;
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
ddb12035
...
@@ -359,6 +359,7 @@ std::vector<std::shared_ptr<ExecutorPrepareContext>> Executor::Prepare(
...
@@ -359,6 +359,7 @@ std::vector<std::shared_ptr<ExecutorPrepareContext>> Executor::Prepare(
void
Executor
::
RunPreparedContext
(
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
void
Executor
::
RunPreparedContext
(
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
bool
create_local_scope
,
bool
create_vars
,
bool
create_local_scope
,
bool
create_vars
,
bool
keep_kids
)
{
bool
keep_kids
)
{
PADDLE_ENFORCE_NOT_NULL
(
scope
);
Scope
*
local_scope
=
scope
;
Scope
*
local_scope
=
scope
;
if
(
create_vars
)
{
if
(
create_vars
)
{
if
(
create_local_scope
)
{
if
(
create_local_scope
)
{
...
...
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
ddb12035
...
@@ -5,6 +5,7 @@ file(APPEND ${pass_file} "\#include \"paddle/fluid/framework/ir/pass.h\"\n")
...
@@ -5,6 +5,7 @@ file(APPEND ${pass_file} "\#include \"paddle/fluid/framework/ir/pass.h\"\n")
# Usage: pass_library(target inference) will append to paddle_inference_pass.h
# Usage: pass_library(target inference) will append to paddle_inference_pass.h
unset
(
INFER_IR_PASSES CACHE
)
# clear the global variable
function
(
pass_library TARGET DEST
)
function
(
pass_library TARGET DEST
)
set
(
options
""
)
set
(
options
""
)
set
(
oneValueArgs
""
)
set
(
oneValueArgs
""
)
...
@@ -15,10 +16,11 @@ function(pass_library TARGET DEST)
...
@@ -15,10 +16,11 @@ function(pass_library TARGET DEST)
if
(
${
DEST
}
STREQUAL
"base"
OR
${
DEST
}
STREQUAL
"inference"
)
if
(
${
DEST
}
STREQUAL
"base"
OR
${
DEST
}
STREQUAL
"inference"
)
message
(
STATUS
"add pass
${
TARGET
}
${
DEST
}
"
)
message
(
STATUS
"add pass
${
TARGET
}
${
DEST
}
"
)
file
(
APPEND
${
pass_file
}
"USE_PASS(
${
TARGET
}
);
\n
"
)
file
(
APPEND
${
pass_file
}
"USE_PASS(
${
TARGET
}
);
\n
"
)
set
(
PASS_LIBRARY
${
TARGET
}
${
PASS_LIBRARY
}
PARENT_SCOPE
)
set
(
INFER_IR_PASSES
${
INFER_IR_PASSES
}
${
TARGET
}
CACHE INTERNAL
""
)
endif
()
endif
()
endfunction
()
endfunction
()
cc_library
(
node SRCS node.cc DEPS proto_desc
)
cc_library
(
node SRCS node.cc DEPS proto_desc
)
cc_library
(
graph SRCS graph.cc DEPS node pretty_log
)
cc_library
(
graph SRCS graph.cc DEPS node pretty_log
)
cc_library
(
graph_helper SRCS graph_helper.cc DEPS graph
)
cc_library
(
graph_helper SRCS graph_helper.cc DEPS graph
)
...
...
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
浏览文件 @
ddb12035
...
@@ -91,10 +91,10 @@ void FindWhileOp(Graph* graph) {
...
@@ -91,10 +91,10 @@ void FindWhileOp(Graph* graph) {
#undef OP_SET_IN
#undef OP_SET_IN
#undef OP_SET_OUT
#undef OP_SET_OUT
auto
*
X
=
graph
->
RetriveNode
(
34
);
auto
*
X
=
graph
->
Retri
e
veNode
(
34
);
auto
*
LSTMOUT
=
graph
->
RetriveNode
(
81
);
auto
*
LSTMOUT
=
graph
->
Retri
e
veNode
(
81
);
auto
*
cell_init
=
graph
->
RetriveNode
(
6
);
auto
*
cell_init
=
graph
->
Retri
e
veNode
(
6
);
auto
*
hidden_init
=
graph
->
RetriveNode
(
8
);
auto
*
hidden_init
=
graph
->
Retri
e
veNode
(
8
);
auto
*
lstm_op
=
graph
->
CreateOpNode
(
&
op_desc
);
auto
*
lstm_op
=
graph
->
CreateOpNode
(
&
op_desc
);
PrepareParameters
(
graph
,
param
);
PrepareParameters
(
graph
,
param
);
...
...
paddle/fluid/framework/ir/graph.cc
浏览文件 @
ddb12035
...
@@ -84,8 +84,6 @@ void CheckProgram(const ProgramDesc &program) {
...
@@ -84,8 +84,6 @@ void CheckProgram(const ProgramDesc &program) {
Graph
::
Graph
(
const
ProgramDesc
&
program
)
:
program_
(
program
)
{
Graph
::
Graph
(
const
ProgramDesc
&
program
)
:
program_
(
program
)
{
CheckProgram
(
program_
);
CheckProgram
(
program_
);
// Make the nodes id start from 0.
Node
::
ResetId
();
auto
var_nodes
=
InitFromProgram
(
program_
);
auto
var_nodes
=
InitFromProgram
(
program_
);
ResolveHazard
(
var_nodes
);
ResolveHazard
(
var_nodes
);
}
}
...
...
paddle/fluid/framework/ir/graph.h
浏览文件 @
ddb12035
...
@@ -116,13 +116,17 @@ class Graph {
...
@@ -116,13 +116,17 @@ class Graph {
// Create a normal variable with non-null VarDesc.
// Create a normal variable with non-null VarDesc.
ir
::
Node
*
CreateVarNode
(
VarDesc
*
var_desc
)
{
ir
::
Node
*
CreateVarNode
(
VarDesc
*
var_desc
)
{
PADDLE_ENFORCE
(
var_desc
);
PADDLE_ENFORCE
(
var_desc
);
return
AddNode
(
new
ir
::
Node
(
var_desc
));
auto
*
x
=
AddNode
(
new
ir
::
Node
(
var_desc
));
x
->
SetId
(
num_node_created_
++
);
return
x
;
}
}
// Create a normal runnable operator with OpDesc.
// Create a normal runnable operator with OpDesc.
ir
::
Node
*
CreateOpNode
(
OpDesc
*
op_desc
)
{
ir
::
Node
*
CreateOpNode
(
OpDesc
*
op_desc
)
{
PADDLE_ENFORCE
(
op_desc
);
PADDLE_ENFORCE
(
op_desc
);
return
AddNode
(
new
ir
::
Node
(
op_desc
));
auto
*
x
=
AddNode
(
new
ir
::
Node
(
op_desc
));
x
->
SetId
(
num_node_created_
++
);
return
x
;
}
}
// Create a control dependency var that connects 2 operations. The
// Create a control dependency var that connects 2 operations. The
...
@@ -132,13 +136,17 @@ class Graph {
...
@@ -132,13 +136,17 @@ class Graph {
// TODO(panyx0718): control var name should be really unique.
// TODO(panyx0718): control var name should be really unique.
const
std
::
string
name
=
string
::
Sprintf
(
const
std
::
string
name
=
string
::
Sprintf
(
"%s@%llu"
,
ir
::
Node
::
kControlDepVarName
,
node_set_
.
size
());
"%s@%llu"
,
ir
::
Node
::
kControlDepVarName
,
node_set_
.
size
());
return
AddNode
(
new
ir
::
Node
(
name
,
ir
::
Node
::
Type
::
kVariable
));
auto
*
x
=
AddNode
(
new
ir
::
Node
(
name
,
ir
::
Node
::
Type
::
kVariable
));
x
->
SetId
(
num_node_created_
++
);
return
x
;
}
}
// A more free style way of creating a graph node. Mostly use for test
// A more free style way of creating a graph node. Mostly use for test
// or "copy" from another node. Avoid using it if possible.
// or "copy" from another node. Avoid using it if possible.
ir
::
Node
*
CreateEmptyNode
(
const
std
::
string
&
name
,
ir
::
Node
::
Type
type
)
{
ir
::
Node
*
CreateEmptyNode
(
const
std
::
string
&
name
,
ir
::
Node
::
Type
type
)
{
return
AddNode
(
new
ir
::
Node
(
name
,
type
));
auto
*
x
=
AddNode
(
new
ir
::
Node
(
name
,
type
));
x
->
SetId
(
num_node_created_
++
);
return
x
;
}
}
// Clear all node information of the graph and return the ownership of the
// Clear all node information of the graph and return the ownership of the
...
@@ -160,7 +168,7 @@ class Graph {
...
@@ -160,7 +168,7 @@ class Graph {
}
}
// NOTE low performance, but simple and secure.
// NOTE low performance, but simple and secure.
Node
*
RetriveNode
(
int
id
)
{
Node
*
Retri
e
veNode
(
int
id
)
{
for
(
auto
&
node
:
nodes_
)
{
for
(
auto
&
node
:
nodes_
)
{
if
(
node
.
second
->
id
()
==
id
)
{
if
(
node
.
second
->
id
()
==
id
)
{
return
node
.
second
.
get
();
return
node
.
second
.
get
();
...
@@ -169,6 +177,7 @@ class Graph {
...
@@ -169,6 +177,7 @@ class Graph {
return
nullptr
;
return
nullptr
;
}
}
const
ProgramDesc
&
program
()
const
{
return
program_
;
}
std
::
map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
InitFromProgram
(
std
::
map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
InitFromProgram
(
const
ProgramDesc
&
program
);
const
ProgramDesc
&
program
);
...
@@ -190,6 +199,7 @@ class Graph {
...
@@ -190,6 +199,7 @@ class Graph {
std
::
map
<
std
::
string
,
std
::
function
<
void
(
void
)
>>
attr_dels_
;
std
::
map
<
std
::
string
,
std
::
function
<
void
(
void
)
>>
attr_dels_
;
std
::
map
<
ir
::
Node
*
,
std
::
unique_ptr
<
ir
::
Node
>>
nodes_
;
std
::
map
<
ir
::
Node
*
,
std
::
unique_ptr
<
ir
::
Node
>>
nodes_
;
std
::
unordered_set
<
ir
::
Node
*>
node_set_
;
std
::
unordered_set
<
ir
::
Node
*>
node_set_
;
size_t
num_node_created_
{
0
};
// help to generate a unique node id.
};
};
bool
IsControlDepVar
(
const
ir
::
Node
&
var
);
bool
IsControlDepVar
(
const
ir
::
Node
&
var
);
...
...
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
ddb12035
...
@@ -310,8 +310,8 @@ void GraphSafeRemoveNodes(Graph* graph,
...
@@ -310,8 +310,8 @@ void GraphSafeRemoveNodes(Graph* graph,
const
std
::
unordered_set
<
const
Node
*>&
nodes
);
const
std
::
unordered_set
<
const
Node
*>&
nodes
);
// Some pre-defined patterns those can be reused in multiple passes.
// Some pre-defined patterns those can be reused in multiple passes.
// The related Fluid Layer or Op should be one pattern here for better reusage
// The related Fluid Layer or Op should be one pattern here for better re
-
usage
// ac
c
ross different fusion.
// across different fusion.
namespace
patterns
{
namespace
patterns
{
struct
KeyCounter
{
struct
KeyCounter
{
...
...
paddle/fluid/framework/ir/graph_to_program_pass.cc
浏览文件 @
ddb12035
...
@@ -35,10 +35,11 @@ std::unique_ptr<Graph> GraphToProgramPass::ApplyImpl(
...
@@ -35,10 +35,11 @@ std::unique_ptr<Graph> GraphToProgramPass::ApplyImpl(
new
proto
::
ProgramDesc
(
*
program
.
Proto
()));
new
proto
::
ProgramDesc
(
*
program
.
Proto
()));
auto
block
=
program_pb
->
mutable_blocks
(
kRootBlockIndex
);
auto
block
=
program_pb
->
mutable_blocks
(
kRootBlockIndex
);
block
->
set_idx
(
kRootBlockIndex
);
block
->
clear_vars
();
block
->
clear_vars
();
std
::
unordered_set
<
std
::
string
>
visited_vars
;
std
::
unordered_set
<
std
::
string
>
visited_vars
;
for
(
ir
::
Node
*
n
:
graph
->
Nodes
())
{
for
(
ir
::
Node
*
n
:
graph
->
Nodes
())
{
if
(
n
->
NodeType
()
==
ir
::
Node
::
Type
::
kVariable
)
{
if
(
n
->
IsVar
()
)
{
if
(
n
->
Var
()
&&
visited_vars
.
count
(
n
->
Var
()
->
Name
())
==
0
)
{
if
(
n
->
Var
()
&&
visited_vars
.
count
(
n
->
Var
()
->
Name
())
==
0
)
{
visited_vars
.
insert
(
n
->
Var
()
->
Name
());
visited_vars
.
insert
(
n
->
Var
()
->
Name
());
block
->
add_vars
()
->
MergeFrom
(
*
n
->
Var
()
->
Proto
());
block
->
add_vars
()
->
MergeFrom
(
*
n
->
Var
()
->
Proto
());
...
...
paddle/fluid/framework/ir/graph_traits.cc
浏览文件 @
ddb12035
...
@@ -66,6 +66,76 @@ NodesDFSIterator &NodesDFSIterator::operator=(const NodesDFSIterator &other) {
...
@@ -66,6 +66,76 @@ NodesDFSIterator &NodesDFSIterator::operator=(const NodesDFSIterator &other) {
}
}
Node
*
NodesDFSIterator
::
operator
->
()
{
return
stack_
.
top
();
}
Node
*
NodesDFSIterator
::
operator
->
()
{
return
stack_
.
top
();
}
inline
bool
CheckNodeIndegreeEquals
(
const
Node
&
node
,
size_t
n
)
{
return
node
.
inputs
.
size
()
==
n
;
}
NodesTSIterator
::
NodesTSIterator
(
const
std
::
vector
<
Node
*>
&
source
)
{
PADDLE_ENFORCE
(
!
source
.
empty
(),
"Start points of topological sorting should not be empty!"
);
// CHECK all the inputs' in-degree is 0
for
(
auto
*
node
:
source
)
{
PADDLE_ENFORCE
(
CheckNodeIndegreeEquals
(
*
node
,
0
));
}
std
::
unordered_set
<
Node
*>
visited
;
std
::
unordered_set
<
Node
*>
to_visit
{
source
.
begin
(),
source
.
end
()};
std
::
vector
<
Node
*>
inlink_visited
;
while
(
!
to_visit
.
empty
())
{
std
::
vector
<
Node
*>
queue
(
to_visit
.
begin
(),
to_visit
.
end
());
for
(
auto
*
p
:
queue
)
{
inlink_visited
.
clear
();
std
::
copy_if
(
p
->
inputs
.
begin
(),
p
->
inputs
.
end
(),
std
::
back_inserter
(
inlink_visited
),
[
&
](
Node
*
x
)
->
bool
{
return
visited
.
count
(
x
)
!=
0
;
});
if
(
inlink_visited
.
size
()
==
p
->
inputs
.
size
())
{
sorted_
.
push_back
(
p
);
for
(
auto
*
_
:
p
->
outputs
)
{
if
(
!
visited
.
count
(
_
))
{
to_visit
.
insert
(
_
);
}
}
to_visit
.
erase
(
p
);
visited
.
insert
(
p
);
}
}
}
}
NodesTSIterator
::
NodesTSIterator
(
const
NodesTSIterator
&
other
)
:
sorted_
(
other
.
sorted_
),
cursor_
(
other
.
cursor_
)
{}
Node
&
NodesTSIterator
::
operator
*
()
{
PADDLE_ENFORCE_LT
(
cursor_
,
sorted_
.
size
());
return
*
sorted_
[
cursor_
];
}
NodesTSIterator
&
NodesTSIterator
::
operator
++
()
{
if
(
++
cursor_
>=
sorted_
.
size
())
{
sorted_
.
clear
();
cursor_
=
0
;
}
return
*
this
;
}
NodesTSIterator
&
NodesTSIterator
::
operator
=
(
const
NodesTSIterator
&
other
)
{
cursor_
=
other
.
cursor_
;
sorted_
=
other
.
sorted_
;
return
*
this
;
}
bool
NodesTSIterator
::
operator
==
(
const
NodesTSIterator
&
other
)
{
return
sorted_
==
other
.
sorted_
&&
cursor_
==
other
.
cursor_
;
}
Node
*
NodesTSIterator
::
operator
->
()
{
PADDLE_ENFORCE_LT
(
cursor_
,
sorted_
.
size
());
return
sorted_
[
cursor_
];
}
}
// namespace ir
}
// namespace ir
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/ir/graph_traits.h
浏览文件 @
ddb12035
...
@@ -62,6 +62,32 @@ struct NodesDFSIterator
...
@@ -62,6 +62,32 @@ struct NodesDFSIterator
std
::
unordered_set
<
Node
*>
visited_
;
std
::
unordered_set
<
Node
*>
visited_
;
};
};
// Topological sorting iterator on nodes.
struct
NodesTSIterator
:
public
std
::
iterator
<
std
::
forward_iterator_tag
,
Node
*>
{
NodesTSIterator
()
=
default
;
NodesTSIterator
(
const
std
::
vector
<
Node
*>
&
source
);
NodesTSIterator
(
NodesTSIterator
&&
other
)
:
sorted_
(
std
::
move
(
other
.
sorted_
)),
cursor_
(
other
.
cursor_
)
{
other
.
cursor_
=
0
;
}
NodesTSIterator
(
const
NodesTSIterator
&
other
);
Node
&
operator
*
();
NodesTSIterator
&
operator
++
();
// TODO(Superjomn) current implementation just compare the first
// element, need to compare the graph and all the elements in the queue and
// set.
NodesTSIterator
&
operator
=
(
const
NodesTSIterator
&
other
);
bool
operator
==
(
const
NodesTSIterator
&
other
);
bool
operator
!=
(
const
NodesTSIterator
&
other
)
{
return
!
(
*
this
==
other
);
}
Node
*
operator
->
();
private:
std
::
vector
<
Node
*>
sorted_
;
size_t
cursor_
{
0
};
};
/*
/*
* GraphTraits contains some graph traversal algorithms.
* GraphTraits contains some graph traversal algorithms.
*
*
...
@@ -76,6 +102,14 @@ struct GraphTraits {
...
@@ -76,6 +102,14 @@ struct GraphTraits {
NodesDFSIterator
());
NodesDFSIterator
());
}
}
static
iterator_range
<
NodesTSIterator
>
TS
(
const
Graph
&
g
)
{
auto
start_points
=
ExtractStartPoints
(
g
);
PADDLE_ENFORCE
(
!
start_points
.
empty
());
NodesTSIterator
x
(
start_points
);
return
iterator_range
<
NodesTSIterator
>
(
NodesTSIterator
(
start_points
),
NodesTSIterator
());
}
private:
private:
// The nodes those have no input will be treated as start points.
// The nodes those have no input will be treated as start points.
static
std
::
vector
<
Node
*>
ExtractStartPoints
(
const
Graph
&
g
)
{
static
std
::
vector
<
Node
*>
ExtractStartPoints
(
const
Graph
&
g
)
{
...
...
paddle/fluid/framework/ir/node.cc
浏览文件 @
ddb12035
...
@@ -18,7 +18,6 @@ namespace paddle {
...
@@ -18,7 +18,6 @@ namespace paddle {
namespace
framework
{
namespace
framework
{
namespace
ir
{
namespace
ir
{
constexpr
char
Node
::
kControlDepVarName
[];
constexpr
char
Node
::
kControlDepVarName
[];
int
Node
::
count_
=
0
;
std
::
unique_ptr
<
Node
>
CreateNodeForTest
(
const
std
::
string
&
name
,
std
::
unique_ptr
<
Node
>
CreateNodeForTest
(
const
std
::
string
&
name
,
Node
::
Type
type
)
{
Node
::
Type
type
)
{
...
...
paddle/fluid/framework/ir/node.h
浏览文件 @
ddb12035
...
@@ -115,37 +115,30 @@ class Node {
...
@@ -115,37 +115,30 @@ class Node {
int
id_
;
int
id_
;
private:
private:
// ID can only set by a Graph.
void
SetId
(
int
id
)
{
id_
=
id
;
}
friend
class
Graph
;
friend
class
Graph
;
friend
std
::
unique_ptr
<
Node
>
CreateNodeForTest
(
const
std
::
string
&
name
,
friend
std
::
unique_ptr
<
Node
>
CreateNodeForTest
(
const
std
::
string
&
name
,
Node
::
Type
type
);
Node
::
Type
type
);
explicit
Node
(
const
std
::
string
&
name
,
Type
type
)
explicit
Node
(
const
std
::
string
&
name
,
Type
type
)
:
name_
(
name
),
:
name_
(
name
),
var_desc_
(
nullptr
),
op_desc_
(
nullptr
),
type_
(
type
)
{}
var_desc_
(
nullptr
),
op_desc_
(
nullptr
),
type_
(
type
),
id_
(
count_
++
)
{}
explicit
Node
(
VarDesc
*
var_desc
)
explicit
Node
(
VarDesc
*
var_desc
)
:
name_
(
var_desc
->
Name
()),
:
name_
(
var_desc
->
Name
()),
var_desc_
(
new
VarDesc
(
*
var_desc
)),
var_desc_
(
new
VarDesc
(
*
var_desc
)),
op_desc_
(
nullptr
),
op_desc_
(
nullptr
),
type_
(
Type
::
kVariable
),
type_
(
Type
::
kVariable
)
{}
id_
(
count_
++
)
{}
explicit
Node
(
OpDesc
*
op_desc
)
explicit
Node
(
OpDesc
*
op_desc
)
:
name_
(
op_desc
->
Type
()),
:
name_
(
op_desc
->
Type
()),
var_desc_
(
nullptr
),
var_desc_
(
nullptr
),
op_desc_
(
new
OpDesc
(
*
op_desc
,
op_desc
->
Block
())),
op_desc_
(
new
OpDesc
(
*
op_desc
,
op_desc
->
Block
())),
type_
(
Type
::
kOperation
),
type_
(
Type
::
kOperation
)
{}
id_
(
count_
++
)
{}
Node
()
=
delete
;
Node
()
=
delete
;
static
int
count_
;
// Please don't use this API or make this public.
static
void
ResetId
()
{
count_
=
0
;
}
boost
::
any
wrapper_
;
boost
::
any
wrapper_
;
std
::
function
<
void
(
void
)
>
wrapper_deleter_
;
std
::
function
<
void
(
void
)
>
wrapper_deleter_
;
std
::
type_index
wrapper_type_
=
std
::
type_index
(
typeid
(
void
));
std
::
type_index
wrapper_type_
=
std
::
type_index
(
typeid
(
void
));
...
...
paddle/fluid/framework/ir/pass.h
浏览文件 @
ddb12035
...
@@ -93,6 +93,7 @@ class Pass {
...
@@ -93,6 +93,7 @@ class Pass {
protected:
protected:
virtual
std
::
unique_ptr
<
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
Graph
>
graph
)
const
{
virtual
std
::
unique_ptr
<
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
Graph
>
graph
)
const
{
LOG
(
FATAL
)
<<
"Calling virtual Pass not implemented."
;
LOG
(
FATAL
)
<<
"Calling virtual Pass not implemented."
;
return
graph
;
}
}
private:
private:
...
...
paddle/fluid/framework/naive_executor.cc
浏览文件 @
ddb12035
...
@@ -57,59 +57,57 @@ static void InitializeVariable(Variable *var, proto::VarType::Type var_type) {
...
@@ -57,59 +57,57 @@ static void InitializeVariable(Variable *var, proto::VarType::Type var_type) {
}
}
}
}
void
NaiveExecutor
::
Prepare
(
Scope
*
parent_scope
,
void
NaiveExecutor
::
Prepare
(
Scope
*
scope
,
const
ProgramDesc
&
program_desc
,
const
ProgramDesc
&
program_desc
,
int
block_id
,
int
block_id
,
bool
with_feed_fetch_ops
)
{
bool
with_feed_fetch_ops
)
{
if
(
!
scope
)
{
if
(
!
parent_scope
)
{
scope_
=
new
framework
::
Scope
;
scope_
=
new
framework
::
Scope
;
}
else
{
}
else
{
scope_
=
&
parent_scope
->
NewScope
()
;
scope_
=
scope
;
}
}
CreateVariables
(
program_desc
,
scope_
,
block_id
);
VLOG
(
3
)
<<
"NaiveExecutor init with scope "
<<
scope
;
CreateOps
(
program_desc
,
block_id
,
with_feed_fetch_ops
);
CreateOps
(
program_desc
,
block_id
,
with_feed_fetch_ops
);
}
}
void
NaiveExecutor
::
Run
()
{
void
NaiveExecutor
::
Run
()
{
for
(
auto
&
op
:
ops_
)
{
for
(
auto
&
op
:
ops_
)
{
VLOG
(
40
)
<<
"run "
<<
op
->
Type
();
VLOG
(
3
)
<<
std
::
this_thread
::
get_id
()
<<
" run "
<<
op
->
Type
()
<<
" on scope "
<<
scope_
;
op
->
Run
(
*
scope_
,
place_
);
op
->
Run
(
*
scope_
,
place_
);
}
}
}
}
void
NaiveExecutor
::
CreateVariables
(
const
ProgramDesc
&
desc
,
Scope
*
scope
,
void
NaiveExecutor
::
CreateVariables
(
const
ProgramDesc
&
desc
,
int
block_id
,
int
block_id
)
{
bool
persistable
,
Scope
*
scope
)
{
PADDLE_ENFORCE
(
scope
);
PADDLE_ENFORCE_NOT_NULL
(
scope
);
auto
&
global_block
=
desc
.
Block
(
block_id
);
auto
&
global_block
=
desc
.
Block
(
block_id
);
const
Scope
*
ancestor_scope
=
scope
;
const
auto
*
anc
=
scope
;
while
(
ancestor_scope
->
parent
())
{
PADDLE_ENFORCE
(
anc
->
parent
()
!=
anc
);
ancestor_scope
=
ancestor_scope
->
parent
();
while
(
anc
->
parent
())
{
anc
=
anc
->
parent
();
}
}
if
(
ancestor_scope
!=
scope
)
{
for
(
auto
&
var
:
global_block
.
AllVars
())
{
for
(
auto
&
var
:
global_block
.
AllVars
())
{
if
(
var
->
Name
()
==
framework
::
kEmptyVarName
)
{
if
(
var
->
Name
()
==
framework
::
kEmptyVarName
)
{
continue
;
continue
;
}
}
// Create persistable vars in ancestor scope.
if
(
var
->
Persistable
())
{
if
(
persistable
==
var
->
Persistable
())
{
auto
*
ptr
=
const_cast
<
Scope
*>
(
ancestor_scope
)
->
Var
(
var
->
Name
());
if
(
persistable
)
{
InitializeVariable
(
ptr
,
var
->
GetType
());
if
(
!
anc
->
FindVar
(
var
->
Name
()))
{
VLOG
(
30
)
<<
"Create Variable "
<<
var
->
Name
()
auto
*
ptr
=
const_cast
<
Scope
*>
(
anc
)
->
Var
(
var
->
Name
());
<<
" global, which pointer is "
<<
ptr
;
VLOG
(
3
)
<<
scope
<<
" Create persistable variable "
<<
var
->
Name
()
}
else
{
// Create temporary variables in local scope.
<<
", which pointer is "
<<
ptr
;
auto
*
ptr
=
scope
->
Var
(
var
->
Name
());
InitializeVariable
(
ptr
,
var
->
GetType
());
InitializeVariable
(
ptr
,
var
->
GetType
());
VLOG
(
30
)
<<
"Create Variable "
<<
var
->
Name
()
<<
" locally, which pointer is "
<<
ptr
;
}
}
}
}
else
{
}
else
{
for
(
auto
&
var
:
global_block
.
AllVars
())
{
auto
*
ptr
=
const_cast
<
Scope
*>
(
scope
)
->
Var
(
var
->
Name
());
auto
*
ptr
=
scope
->
Var
(
var
->
Name
());
VLOG
(
3
)
<<
scope
<<
" Create variable "
<<
var
->
Name
()
<<
", which pointer is "
<<
ptr
;
InitializeVariable
(
ptr
,
var
->
GetType
());
InitializeVariable
(
ptr
,
var
->
GetType
());
VLOG
(
30
)
<<
"Create variable "
<<
var
->
Name
()
<<
", which pointer is "
}
<<
ptr
;
}
}
}
}
}
}
...
...
paddle/fluid/framework/naive_executor.h
浏览文件 @
ddb12035
...
@@ -35,8 +35,14 @@ class NaiveExecutor {
...
@@ -35,8 +35,14 @@ class NaiveExecutor {
// Create child scope.
// Create child scope.
// Create variables.
// Create variables.
// @with_feed_fetch_ops: whether to work with the feed and fetch operators.
// @with_feed_fetch_ops: whether to work with the feed and fetch operators.
void
Prepare
(
Scope
*
parent_scope
,
const
ProgramDesc
&
program_desc
,
void
Prepare
(
Scope
*
scope
,
const
ProgramDesc
&
program_desc
,
int
block_id
,
int
block_id
,
bool
with_feed_fetch_ops
);
bool
with_feed_fetch_ops
);
// Create variables before head.
// Create parameters if persistable is ture, or create the temporary variables
// instead.
void
CreateVariables
(
const
ProgramDesc
&
desc
,
int
block_id
,
bool
persistable
,
Scope
*
scope
);
// Run all the operators.
// Run all the operators.
void
Run
();
void
Run
();
...
@@ -49,8 +55,6 @@ class NaiveExecutor {
...
@@ -49,8 +55,6 @@ class NaiveExecutor {
void
CleanFeedFetchOps
();
void
CleanFeedFetchOps
();
protected:
protected:
void
CreateVariables
(
const
ProgramDesc
&
desc
,
Scope
*
scope
,
int
block_id
);
void
CreateOps
(
const
ProgramDesc
&
desc
,
int
block_id
,
void
CreateOps
(
const
ProgramDesc
&
desc
,
int
block_id
,
bool
with_feed_fetch_ops
);
bool
with_feed_fetch_ops
);
...
...
paddle/fluid/framework/naive_executor_test.cc
浏览文件 @
ddb12035
...
@@ -39,7 +39,7 @@ TEST(NaiveExecutor, Basic) {
...
@@ -39,7 +39,7 @@ TEST(NaiveExecutor, Basic) {
auto
place
=
platform
::
CPUPlace
();
auto
place
=
platform
::
CPUPlace
();
NaiveExecutor
exe
(
place
);
NaiveExecutor
exe
(
place
);
exe
.
Prepare
(
nullptr
,
program
,
0
,
false
/*with feed fetch ops*/
);
exe
.
Prepare
(
nullptr
,
program
,
0
,
false
);
auto
*
a_tensor
=
exe
.
FindTensor
(
"a"
);
auto
*
a_tensor
=
exe
.
FindTensor
(
"a"
);
auto
*
b_tensor
=
exe
.
FindTensor
(
"b"
);
auto
*
b_tensor
=
exe
.
FindTensor
(
"b"
);
auto
*
c_tensor
=
exe
.
FindTensor
(
"c"
);
auto
*
c_tensor
=
exe
.
FindTensor
(
"c"
);
...
...
paddle/fluid/framework/scope.cc
浏览文件 @
ddb12035
...
@@ -15,7 +15,9 @@ limitations under the License. */
...
@@ -15,7 +15,9 @@ limitations under the License. */
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/scope.h"
#include <memory> // for unique_ptr
#include <memory> // for unique_ptr
#include <queue>
#include <set>
#include <set>
#include <unordered_set>
#include "glog/logging.h"
#include "glog/logging.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/string/printf.h"
...
@@ -36,6 +38,16 @@ DEFINE_double(
...
@@ -36,6 +38,16 @@ DEFINE_double(
"Memory size threshold (GB) when the garbage collector clear tensors."
"Memory size threshold (GB) when the garbage collector clear tensors."
"Disabled when this value is less than 0"
);
"Disabled when this value is less than 0"
);
// When in inference scenario, the scopes will not be written by two threads in
// a mean time, but a scope may be read by multiple threads concurrently, and
// the mutex will cause serious performance issue.
// So the mutex is disabled when `ON_INFER`.
#ifdef ON_INFER
#define SCOPE_LOCK_GUARD
#else
#define SCOPE_LOCK_GUARD std::lock_guard<std::mutex> lock(mutex_);
#endif
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
@@ -49,18 +61,18 @@ int64_t GetEagerDeletionThreshold() {
...
@@ -49,18 +61,18 @@ int64_t GetEagerDeletionThreshold() {
Scope
::~
Scope
()
{
DropKids
();
}
Scope
::~
Scope
()
{
DropKids
();
}
Scope
&
Scope
::
NewScope
()
const
{
Scope
&
Scope
::
NewScope
()
const
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
SCOPE_LOCK_GUARD
kids_
.
push_back
(
new
Scope
(
this
));
kids_
.
push_back
(
new
Scope
(
this
));
return
*
kids_
.
back
();
return
*
kids_
.
back
();
}
}
Variable
*
Scope
::
Var
(
const
std
::
string
&
name
)
{
Variable
*
Scope
::
Var
(
const
std
::
string
&
name
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
SCOPE_LOCK_GUARD
return
VarInternal
(
name
);
return
VarInternal
(
name
);
}
}
Variable
*
Scope
::
Var
(
std
::
string
*
name
)
{
Variable
*
Scope
::
Var
(
std
::
string
*
name
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
SCOPE_LOCK_GUARD
auto
new_name
=
string
::
Sprintf
(
"%p.%d"
,
this
,
vars_
.
size
());
auto
new_name
=
string
::
Sprintf
(
"%p.%d"
,
this
,
vars_
.
size
());
if
(
name
!=
nullptr
)
{
if
(
name
!=
nullptr
)
{
*
name
=
new_name
;
*
name
=
new_name
;
...
@@ -69,34 +81,34 @@ Variable* Scope::Var(std::string* name) {
...
@@ -69,34 +81,34 @@ Variable* Scope::Var(std::string* name) {
}
}
Variable
*
Scope
::
FindVar
(
const
std
::
string
&
name
)
const
{
Variable
*
Scope
::
FindVar
(
const
std
::
string
&
name
)
const
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
SCOPE_LOCK_GUARD
return
FindVarInternal
(
name
);
return
FindVarInternal
(
name
);
}
}
Variable
*
Scope
::
FindLocalVar
(
const
std
::
string
&
name
)
const
{
Variable
*
Scope
::
FindLocalVar
(
const
std
::
string
&
name
)
const
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
SCOPE_LOCK_GUARD
return
FindVarLocally
(
name
);
return
FindVarLocally
(
name
);
}
}
const
Scope
*
Scope
::
FindScope
(
const
Variable
*
var
)
const
{
const
Scope
*
Scope
::
FindScope
(
const
Variable
*
var
)
const
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
SCOPE_LOCK_GUARD
return
FindScopeInternal
(
var
);
return
FindScopeInternal
(
var
);
}
}
void
Scope
::
DropKids
()
{
void
Scope
::
DropKids
()
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
SCOPE_LOCK_GUARD
for
(
Scope
*
s
:
kids_
)
delete
s
;
for
(
Scope
*
s
:
kids_
)
delete
s
;
kids_
.
clear
();
kids_
.
clear
();
}
}
bool
Scope
::
HasKid
(
const
Scope
*
scope
)
const
{
bool
Scope
::
HasKid
(
const
Scope
*
scope
)
const
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
SCOPE_LOCK_GUARD
auto
it
=
std
::
find
(
this
->
kids_
.
begin
(),
this
->
kids_
.
end
(),
scope
);
auto
it
=
std
::
find
(
this
->
kids_
.
begin
(),
this
->
kids_
.
end
(),
scope
);
return
it
!=
this
->
kids_
.
end
();
return
it
!=
this
->
kids_
.
end
();
}
}
std
::
vector
<
std
::
string
>
Scope
::
LocalVarNames
()
const
{
std
::
vector
<
std
::
string
>
Scope
::
LocalVarNames
()
const
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
SCOPE_LOCK_GUARD
std
::
vector
<
std
::
string
>
known_vars
;
std
::
vector
<
std
::
string
>
known_vars
;
known_vars
.
reserve
(
this
->
vars_
.
size
());
known_vars
.
reserve
(
this
->
vars_
.
size
());
for
(
auto
&
p
:
vars_
)
{
for
(
auto
&
p
:
vars_
)
{
...
@@ -106,9 +118,10 @@ std::vector<std::string> Scope::LocalVarNames() const {
...
@@ -106,9 +118,10 @@ std::vector<std::string> Scope::LocalVarNames() const {
}
}
void
Scope
::
DeleteScope
(
Scope
*
scope
)
const
{
void
Scope
::
DeleteScope
(
Scope
*
scope
)
const
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
SCOPE_LOCK_GUARD
auto
it
=
std
::
find
(
this
->
kids_
.
begin
(),
this
->
kids_
.
end
(),
scope
);
auto
it
=
std
::
find
(
this
->
kids_
.
begin
(),
this
->
kids_
.
end
(),
scope
);
PADDLE_ENFORCE
(
it
!=
this
->
kids_
.
end
(),
"Cannot find %p as kid scope"
,
scope
);
PADDLE_ENFORCE
(
it
!=
this
->
kids_
.
end
(),
"%p Cannot find %p as kid scope"
,
this
,
scope
);
this
->
kids_
.
erase
(
it
);
this
->
kids_
.
erase
(
it
);
// When making memory benchmark on Fluid, we have to delete scope sync.
// When making memory benchmark on Fluid, we have to delete scope sync.
if
(
FLAGS_benchmark
||
FLAGS_eager_delete_scope
)
{
if
(
FLAGS_benchmark
||
FLAGS_eager_delete_scope
)
{
...
@@ -119,7 +132,7 @@ void Scope::DeleteScope(Scope* scope) const {
...
@@ -119,7 +132,7 @@ void Scope::DeleteScope(Scope* scope) const {
}
}
void
Scope
::
EraseVars
(
const
std
::
vector
<
std
::
string
>&
var_names
)
{
void
Scope
::
EraseVars
(
const
std
::
vector
<
std
::
string
>&
var_names
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
SCOPE_LOCK_GUARD
std
::
set
<
std
::
string
>
var_set
(
var_names
.
begin
(),
var_names
.
end
());
std
::
set
<
std
::
string
>
var_set
(
var_names
.
begin
(),
var_names
.
end
());
for
(
auto
it
=
vars_
.
begin
();
it
!=
vars_
.
end
();)
{
for
(
auto
it
=
vars_
.
begin
();
it
!=
vars_
.
end
();)
{
if
(
var_set
.
find
(
it
->
first
)
!=
var_set
.
end
())
{
if
(
var_set
.
find
(
it
->
first
)
!=
var_set
.
end
())
{
...
@@ -132,12 +145,12 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {
...
@@ -132,12 +145,12 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {
void
Scope
::
Rename
(
const
std
::
string
&
origin_name
,
void
Scope
::
Rename
(
const
std
::
string
&
origin_name
,
const
std
::
string
&
new_name
)
const
{
const
std
::
string
&
new_name
)
const
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
SCOPE_LOCK_GUARD
RenameInternal
(
origin_name
,
new_name
);
RenameInternal
(
origin_name
,
new_name
);
}
}
std
::
string
Scope
::
Rename
(
const
std
::
string
&
origin_name
)
const
{
std
::
string
Scope
::
Rename
(
const
std
::
string
&
origin_name
)
const
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
SCOPE_LOCK_GUARD
auto
new_name
=
string
::
Sprintf
(
"%p.%d"
,
this
,
vars_
.
size
());
auto
new_name
=
string
::
Sprintf
(
"%p.%d"
,
this
,
vars_
.
size
());
RenameInternal
(
origin_name
,
new_name
);
RenameInternal
(
origin_name
,
new_name
);
return
new_name
;
return
new_name
;
...
@@ -189,5 +202,46 @@ Variable* Scope::FindVarLocally(const std::string& name) const {
...
@@ -189,5 +202,46 @@ Variable* Scope::FindVarLocally(const std::string& name) const {
return
nullptr
;
return
nullptr
;
}
}
std
::
string
GenScopeTreeDebugInfo
(
Scope
*
root
)
{
std
::
stringstream
os
;
if
(
!
root
)
return
""
;
// level traversal
std
::
queue
<
Scope
*>
queue
;
queue
.
push
(
root
);
std
::
vector
<
Scope
*>
scopes
;
while
(
!
queue
.
empty
())
{
auto
*
end
=
queue
.
back
();
Scope
*
q
=
nullptr
;
while
(
q
!=
end
)
{
q
=
queue
.
front
();
queue
.
pop
();
os
<<
q
<<
" "
;
scopes
.
push_back
(
q
);
for
(
auto
*
c
:
q
->
kids
())
{
queue
.
push
(
c
);
}
}
// end of a level
os
<<
"
\n
------------------------------------------
\n
"
;
}
os
<<
"
\n
Details:
\n\n
"
;
for
(
Scope
*
q
:
scopes
)
{
os
<<
"====
\n
"
;
os
<<
q
<<
":
\n
"
;
for
(
auto
&
var
:
q
->
LocalVarNames
())
{
os
<<
" - "
<<
var
<<
"
\n
"
;
}
}
return
os
.
str
();
}
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/scope.h
浏览文件 @
ddb12035
...
@@ -78,11 +78,11 @@ class Scope {
...
@@ -78,11 +78,11 @@ class Scope {
/// Drop all kids scopes belonged to this scope.
/// Drop all kids scopes belonged to this scope.
void
DropKids
();
void
DropKids
();
std
::
list
<
Scope
*>&
kids
()
const
{
return
kids_
;
}
/// Find if a scope exists in the kid scopes
/// Find if a scope exists in the kid scopes
bool
HasKid
(
const
Scope
*
scope
)
const
;
bool
HasKid
(
const
Scope
*
scope
)
const
;
const
std
::
list
<
Scope
*>&
kids
()
const
{
return
kids_
;
}
// enumerate all the variables current contains.
// enumerate all the variables current contains.
std
::
vector
<
std
::
string
>
LocalVarNames
()
const
;
std
::
vector
<
std
::
string
>
LocalVarNames
()
const
;
...
@@ -118,12 +118,17 @@ class Scope {
...
@@ -118,12 +118,17 @@ class Scope {
// Scope in `kids_` are owned by this class.
// Scope in `kids_` are owned by this class.
mutable
std
::
list
<
Scope
*>
kids_
;
mutable
std
::
list
<
Scope
*>
kids_
;
Scope
const
*
parent_
{
nullptr
};
const
Scope
*
parent_
{
nullptr
};
DISABLE_COPY_AND_ASSIGN
(
Scope
);
DISABLE_COPY_AND_ASSIGN
(
Scope
);
private:
private:
mutable
std
::
mutex
mutex_
;
mutable
std
::
mutex
mutex_
;
};
};
// Generate some debug string about the inherience structure of scope, quite
// naive.
std
::
string
GenScopeTreeDebugInfo
(
Scope
*
);
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/selected_rows.cc
浏览文件 @
ddb12035
...
@@ -63,6 +63,26 @@ struct TensorCopyVisitor {
...
@@ -63,6 +63,26 @@ struct TensorCopyVisitor {
int64_t
size_
;
int64_t
size_
;
};
};
struct
TensorFillVisitor
{
TensorFillVisitor
(
framework
::
Tensor
*
dst
,
int64_t
dst_offset
,
int64_t
size
,
float
value
)
:
dst_
(
dst
),
dst_offset_
(
dst_offset
),
size_
(
size
)
{}
template
<
typename
T
>
void
apply
()
const
{
// TODO(qiao): support other place
platform
::
CPUPlace
cpu
;
auto
*
tensor_data
=
dst_
->
mutable_data
<
T
>
(
cpu
);
auto
*
start
=
tensor_data
+
dst_offset_
;
auto
*
end
=
start
+
size_
;
std
::
fill
(
start
,
end
,
static_cast
<
T
>
(
0.0
));
}
framework
::
Tensor
*
dst_
;
int64_t
dst_offset_
;
int64_t
size_
;
};
void
SerializeToStream
(
std
::
ostream
&
os
,
const
SelectedRows
&
selected_rows
,
void
SerializeToStream
(
std
::
ostream
&
os
,
const
SelectedRows
&
selected_rows
,
const
platform
::
DeviceContext
&
dev_ctx
)
{
const
platform
::
DeviceContext
&
dev_ctx
)
{
{
// the 1st field, uint32_t version
{
// the 1st field, uint32_t version
...
@@ -120,7 +140,17 @@ bool SelectedRows::HasKey(int64_t key) const {
...
@@ -120,7 +140,17 @@ bool SelectedRows::HasKey(int64_t key) const {
:
true
;
:
true
;
}
}
int64_t
SelectedRows
::
AutoGrownIndex
(
int64_t
key
,
bool
auto_grown
)
{
int64_t
SelectedRows
::
AutoGrownIndex
(
int64_t
key
,
bool
auto_grown
,
bool
is_test
)
{
if
(
is_test
)
{
auto
iter
=
id_to_index_
.
find
(
key
);
if
(
iter
==
id_to_index_
.
end
())
{
return
-
1
;
}
else
{
return
iter
->
second
;
}
}
rwlock_
->
RDLock
();
rwlock_
->
RDLock
();
auto
iter
=
id_to_index_
.
find
(
key
);
auto
iter
=
id_to_index_
.
find
(
key
);
if
(
iter
==
id_to_index_
.
end
())
{
if
(
iter
==
id_to_index_
.
end
())
{
...
@@ -172,7 +202,7 @@ void SelectedRows::SyncIndex() {
...
@@ -172,7 +202,7 @@ void SelectedRows::SyncIndex() {
}
}
void
SelectedRows
::
Get
(
const
framework
::
Tensor
&
ids
,
framework
::
Tensor
*
value
,
void
SelectedRows
::
Get
(
const
framework
::
Tensor
&
ids
,
framework
::
Tensor
*
value
,
bool
auto_grown
)
{
bool
auto_grown
,
bool
is_test
)
{
PADDLE_ENFORCE
(
value
->
IsInitialized
(),
PADDLE_ENFORCE
(
value
->
IsInitialized
(),
"The value tensor should be initialized."
);
"The value tensor should be initialized."
);
if
(
ids
.
numel
()
==
0
)
{
if
(
ids
.
numel
()
==
0
)
{
...
@@ -183,13 +213,21 @@ void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
...
@@ -183,13 +213,21 @@ void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
"output tensor should have the same shape with table "
"output tensor should have the same shape with table "
"except the dims[0]."
);
"except the dims[0]."
);
for
(
int
i
=
0
;
i
<
ids
.
numel
();
++
i
)
{
for
(
int
i
=
0
;
i
<
ids
.
numel
();
++
i
)
{
int64_t
index
=
AutoGrownIndex
(
ids
.
data
<
int64_t
>
()[
i
],
auto_grown
);
auto
id
=
ids
.
data
<
int64_t
>
()[
i
];
int64_t
index
=
AutoGrownIndex
(
id
,
auto_grown
,
is_test
);
if
(
index
<
0
)
{
VLOG
(
5
)
<<
"id "
<<
id
<<
" not in the table, return 0"
;
framework
::
VisitDataType
(
framework
::
ToDataType
(
value_
->
type
()),
TensorFillVisitor
(
value
,
i
*
value_width
,
value_width
,
0.0
));
}
else
{
framework
::
VisitDataType
(
framework
::
VisitDataType
(
framework
::
ToDataType
(
value_
->
type
()),
framework
::
ToDataType
(
value_
->
type
()),
TensorCopyVisitor
(
value
,
i
*
value_width
,
*
value_
.
get
(),
TensorCopyVisitor
(
value
,
i
*
value_width
,
*
value_
.
get
(),
index
*
value_width
,
value_width
));
index
*
value_width
,
value_width
));
}
}
}
}
}
}
}
}
// namespace framework
}
// namespace framework
...
...
paddle/fluid/framework/selected_rows.h
浏览文件 @
ddb12035
...
@@ -105,7 +105,7 @@ class SelectedRows {
...
@@ -105,7 +105,7 @@ class SelectedRows {
* the value
* the value
*/
*/
void
Get
(
const
framework
::
Tensor
&
ids
,
framework
::
Tensor
*
value
,
void
Get
(
const
framework
::
Tensor
&
ids
,
framework
::
Tensor
*
value
,
bool
auto_grown
=
false
);
bool
auto_grown
=
false
,
bool
is_test
=
false
);
/*
/*
* @brief Get the index of the key from id_to_index_ map. If the key not
* @brief Get the index of the key from id_to_index_ map. If the key not
...
@@ -118,7 +118,7 @@ class SelectedRows {
...
@@ -118,7 +118,7 @@ class SelectedRows {
*
*
* @return index of the key.
* @return index of the key.
*/
*/
int64_t
AutoGrownIndex
(
int64_t
key
,
bool
auto_grown
);
int64_t
AutoGrownIndex
(
int64_t
key
,
bool
auto_grown
,
bool
is_test
=
false
);
void
SyncIndex
();
void
SyncIndex
();
...
...
paddle/fluid/framework/selected_rows_test.cc
浏览文件 @
ddb12035
...
@@ -84,10 +84,14 @@ TEST(SelectedRows, SparseTable) {
...
@@ -84,10 +84,14 @@ TEST(SelectedRows, SparseTable) {
data
[
i
*
embedding_width
+
j
]
=
static_cast
<
float
>
(
i
);
data
[
i
*
embedding_width
+
j
]
=
static_cast
<
float
>
(
i
);
}
}
}
}
ASSERT_EQ
(
table
.
AutoGrownIndex
(
10
,
true
),
0
);
ASSERT_EQ
(
table
.
AutoGrownIndex
(
10
,
true
,
false
),
0
);
ASSERT_EQ
(
table
.
AutoGrownIndex
(
8
,
true
),
1
);
ASSERT_EQ
(
table
.
AutoGrownIndex
(
8
,
true
,
false
),
1
);
ASSERT_EQ
(
table
.
AutoGrownIndex
(
8
,
true
),
1
);
ASSERT_EQ
(
table
.
AutoGrownIndex
(
8
,
true
,
false
),
1
);
ASSERT_EQ
(
table
.
AutoGrownIndex
(
6
,
true
),
2
);
ASSERT_EQ
(
table
.
AutoGrownIndex
(
6
,
true
,
false
),
2
);
for
(
int64_t
i
=
11
;
i
<
20
;
i
++
)
{
ASSERT_EQ
(
table
.
AutoGrownIndex
(
i
,
true
,
true
),
-
1
);
ASSERT_TRUE
(
!
table
.
HasKey
(
i
));
}
ASSERT_TRUE
(
table
.
HasKey
(
10
));
ASSERT_TRUE
(
table
.
HasKey
(
10
));
ASSERT_TRUE
(
table
.
HasKey
(
8
));
ASSERT_TRUE
(
table
.
HasKey
(
8
));
ASSERT_TRUE
(
table
.
HasKey
(
6
));
ASSERT_TRUE
(
table
.
HasKey
(
6
));
...
...
paddle/fluid/inference/CMakeLists.txt
浏览文件 @
ddb12035
...
@@ -27,13 +27,9 @@ set(SHARED_INFERENCE_SRCS
...
@@ -27,13 +27,9 @@ set(SHARED_INFERENCE_SRCS
io.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/api.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/api_impl.cc
io.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/api.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/api_impl.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/analysis_predictor.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/analysis_predictor.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/details/zero_copy_tensor.cc
)
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/details/zero_copy_tensor.cc
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
set
(
STATIC_INFERENCE_APIS
${
STATIC_INFERENCE_APIS
}
paddle_inference_tensorrt_subgraph_engine
)
set
(
SHARED_INFERENCE_SRCS
${
SHARED_INFERENCE_SRCS
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/api/api_tensorrt_subgraph_engine.cc
)
endif
()
# Create static library
# Create static library
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
${
STATIC_INFERENCE_APIS
}
zero_copy_tensor reset_tensor_array
)
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
${
STATIC_INFERENCE_APIS
}
zero_copy_tensor reset_tensor_array
analysis_config paddle_pass_builder
)
if
(
NOT APPLE
)
if
(
NOT APPLE
)
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
...
@@ -43,7 +39,7 @@ endif()
...
@@ -43,7 +39,7 @@ endif()
# Create shared library
# Create shared library
cc_library
(
paddle_fluid_shared SHARED SRCS
${
SHARED_INFERENCE_SRCS
}
cc_library
(
paddle_fluid_shared SHARED SRCS
${
SHARED_INFERENCE_SRCS
}
DEPS
${
fluid_modules
}
paddle_fluid_api reset_tensor_array
)
DEPS
${
fluid_modules
}
paddle_fluid_api reset_tensor_array
analysis_config paddle_pass_builder
)
set_target_properties
(
paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid
)
set_target_properties
(
paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid
)
if
(
NOT APPLE
)
if
(
NOT APPLE
)
...
...
paddle/fluid/inference/analysis/CMakeLists.txt
浏览文件 @
ddb12035
cc_library
(
ir_pass_manager SRCS ir_pass_manager.cc DEPS graph pass
)
unset
(
analysis_deps CACHE
)
set
(
analysis_deps
set
(
analysis_deps
# analysis_deps can be extended accross the project
framework_proto proto_desc ir_pass_manager graph pass paddle_fluid_api executor pretty_log
)
framework_proto proto_desc graph pass paddle_fluid_api executor pretty_log
ir_pass_manager
CACHE INTERNAL
""
)
cc_library
(
analysis SRCS pass_manager.cc node.cc data_flow_graph.cc graph_traits.cc subgraph_splitter.cc
add_subdirectory
(
ir_passes
)
add_subdirectory
(
passes
)
cc_library
(
ir_pass_manager SRCS ir_pass_manager.cc DEPS graph pass
${
INFER_IR_PASSES
}
)
cc_library
(
argument SRCS argument.cc DEPS scope proto_desc
)
cc_library
(
analysis_pass SRCS analysis_pass.cc DEPS proto_desc
)
cc_library
(
analysis SRCS
analyzer.cc
analyzer.cc
helper.cc
helper.cc
# passes
analysis_pass
analysis_pass.cc
DEPS
${
analysis_deps
}
fluid_to_data_flow_graph_pass.cc
)
data_flow_graph_to_fluid_pass.cc
dfg_graphviz_draw_pass.cc
tensorrt_subgraph_pass.cc
tensorrt_subgraph_node_mark_pass.cc
fluid_to_ir_pass.cc
model_store_pass.cc
DEPS
${
analysis_deps
}
)
cc_test
(
test_node SRCS node_tester.cc DEPS analysis
)
cc_test
(
test_dot SRCS dot_tester.cc DEPS analysis
)
cc_test
(
test_dot SRCS dot_tester.cc DEPS analysis
)
cc_binary
(
inference_analyzer SRCS analyzer_main.cc DEPS analysis paddle_fluid
)
function
(
inference_analysis_test TARGET
)
function
(
inference_analysis_test TARGET
)
if
(
WITH_TESTING
)
if
(
WITH_TESTING
)
...
@@ -34,13 +35,3 @@ function(inference_analysis_test TARGET)
...
@@ -34,13 +35,3 @@ function(inference_analysis_test TARGET)
endfunction
(
inference_analysis_test
)
endfunction
(
inference_analysis_test
)
inference_analysis_test
(
test_analyzer SRCS analyzer_tester.cc EXTRA_DEPS paddle_inference_api
)
inference_analysis_test
(
test_analyzer SRCS analyzer_tester.cc EXTRA_DEPS paddle_inference_api
)
inference_analysis_test
(
test_data_flow_graph SRCS data_flow_graph_tester.cc
)
inference_analysis_test
(
test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc
)
inference_analysis_test
(
test_fluid_to_ir_pass SRCS fluid_to_ir_pass_tester.cc
)
inference_analysis_test
(
test_fluid_to_data_flow_graph_pass SRCS fluid_to_data_flow_graph_pass_tester.cc
)
inference_analysis_test
(
test_subgraph_splitter SRCS subgraph_splitter_tester.cc
)
inference_analysis_test
(
test_dfg_graphviz_draw_pass SRCS dfg_graphviz_draw_pass_tester.cc
)
inference_analysis_test
(
test_tensorrt_subgraph_pass SRCS tensorrt_subgraph_pass_tester.cc
)
inference_analysis_test
(
test_pass_manager SRCS pass_manager_tester.cc
)
inference_analysis_test
(
test_tensorrt_subgraph_node_mark_pass SRCS tensorrt_subgraph_node_mark_pass_tester.cc
)
inference_analysis_test
(
test_model_store_pass SRCS model_store_pass_tester.cc
)
paddle/fluid/inference/analysis/analysis_pass.h
浏览文件 @
ddb12035
...
@@ -19,42 +19,36 @@ limitations under the License. */
...
@@ -19,42 +19,36 @@ limitations under the License. */
#include <string>
#include <string>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/inference/analysis/argument.h"
#include "paddle/fluid/inference/analysis/argument.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/node.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
analysis
{
namespace
analysis
{
/*
* AnalysisPass is a pass used to control the IR passes.
*/
class
AnalysisPass
{
class
AnalysisPass
{
public:
public:
AnalysisPass
()
=
default
;
AnalysisPass
()
=
default
;
virtual
~
AnalysisPass
()
=
default
;
virtual
~
AnalysisPass
()
=
default
;
// Mutable Pass.
virtual
bool
Initialize
(
Argument
*
argument
)
{
return
false
;
}
// Readonly Pass.
virtual
bool
Initialize
(
const
Argument
&
argument
)
{
return
false
;
}
// Virtual method overriden by subclasses to do any necessary clean up after
// Run on a single Graph.
// all passes have run.
void
Run
(
Argument
*
argument
)
{
RunImpl
(
argument
);
}
virtual
bool
Finalize
()
{
return
false
;
}
// Create a debugger Pass that draw the DFG by graphviz toolkit.
virtual
AnalysisPass
*
CreateGraphvizDebugerPass
()
const
{
return
nullptr
;
}
// Run on a single DataFlowGraph.
virtual
void
Run
(
DataFlowGraph
*
x
)
=
0
;
// Human-readable short representation.
// Human-readable short representation.
virtual
std
::
string
repr
()
const
=
0
;
virtual
std
::
string
repr
()
const
=
0
;
// Human-readable long description.
// Human-readable long description.
virtual
std
::
string
description
()
const
{
return
"No DOC"
;
}
virtual
std
::
string
description
()
const
{
return
"No DOC"
;
}
};
// GraphPass processes on any GraphType.
protected:
class
DataFlowGraphPass
:
public
AnalysisPass
{};
// User should implement these.
virtual
void
RunImpl
(
Argument
*
argument
)
=
0
;
Argument
*
argument_
{
nullptr
};
};
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
...
...
paddle/fluid/inference/analysis/analyzer.cc
浏览文件 @
ddb12035
...
@@ -15,138 +15,23 @@
...
@@ -15,138 +15,23 @@
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.h"
#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
#include "paddle/fluid/inference/analysis/passes/passes.h"
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
#include "paddle/fluid/inference/analysis/fluid_to_ir_pass.h"
#include "paddle/fluid/inference/analysis/model_store_pass.h"
#include "paddle/fluid/inference/analysis/pass_manager.h"
#include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h"
#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h"
DEFINE_bool
(
IA_enable_tensorrt_subgraph_engine
,
false
,
"Enable subgraph to TensorRT engine for acceleration"
);
DEFINE_bool
(
IA_enable_ir
,
false
,
"Turn on IR support"
);
DEFINE_string
(
IA_graphviz_log_root
,
"./"
,
"Graphviz debuger for data flow graphs."
);
DEFINE_string
(
IA_output_storage_path
,
""
,
"optimized model output path"
);
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
analysis
{
namespace
analysis
{
class
DfgPassManagerImpl
final
:
public
DfgPassManager
{
Analyzer
::
Analyzer
()
{}
public:
DfgPassManagerImpl
()
{
// TODO(Superjomn) set the key with pass reprs.
if
(
!
FLAGS_IA_enable_ir
)
{
AddPass
(
"fluid-to-data-flow-graph"
,
new
FluidToDataFlowGraphPass
);
}
else
{
AddPass
(
"fluid-to-ir-pass"
,
new
FluidToIrPass
);
}
TryAddTensorRtPass
();
AddPass
(
"data-flow-graph-to-fluid"
,
new
DataFlowGraphToFluidPass
);
if
(
!
FLAGS_IA_output_storage_path
.
empty
())
{
AddPass
(
"model-store-pass"
,
new
ModelStorePass
);
}
}
std
::
string
repr
()
const
override
{
return
"dfg-pass-manager"
;
}
std
::
string
description
()
const
override
{
return
"DFG pass manager."
;
}
private:
void
AddPass
(
const
std
::
string
&
name
,
AnalysisPass
*
pass
)
{
VLOG
(
30
)
<<
"Adding pass "
<<
name
;
Register
(
name
,
pass
);
AddGraphvizDebugerPass
(
pass
);
}
void
TryAddTensorRtPass
()
{
if
(
FLAGS_IA_enable_tensorrt_subgraph_engine
)
{
auto
trt_teller
=
[
&
](
const
Node
*
node
)
{
std
::
unordered_set
<
std
::
string
>
teller_set
(
{
"mul"
,
"conv2d"
,
"pool2d"
,
"relu"
,
"softmax"
,
"sigmoid"
,
"depthwise_conv2d"
,
"batch_norm"
,
"concat"
,
"tanh"
,
"pad"
,
"elementwise_add"
,
"dropout"
,
"split"
});
if
(
!
node
->
IsFunction
())
return
false
;
const
auto
*
func
=
static_cast
<
const
Function
*>
(
node
);
void
Analyzer
::
Run
(
Argument
*
argument
)
{
RunIrAnalysis
(
argument
);
}
if
(
teller_set
.
count
(
func
->
func_type
()))
{
return
true
;
}
else
{
return
false
;
}
};
AddPass
(
"tensorrt-subgraph-marker"
,
void
Analyzer
::
RunIrAnalysis
(
Argument
*
argument
)
{
new
TensorRTSubgraphNodeMarkPass
(
trt_teller
));
std
::
vector
<
std
::
string
>
passes
({
"ir_analysis_compose_pass"
});
AddPass
(
"tensorrt-subgraph"
,
new
TensorRTSubGraphPass
(
trt_teller
));
}
}
// Add the graphviz debuger pass if the parent pass has one.
for
(
auto
&
pass
:
passes
)
{
void
AddGraphvizDebugerPass
(
AnalysisPass
*
pass
)
{
PassRegistry
::
Global
().
Retreive
(
pass
)
->
Run
(
argument
);
auto
*
debuger_pass
=
pass
->
CreateGraphvizDebugerPass
();
if
(
debuger_pass
)
{
Register
(
debuger_pass
->
repr
(),
debuger_pass
);
}
}
}
};
Analyzer
::
Analyzer
()
{
Register
(
"manager1"
,
new
DfgPassManagerImpl
);
}
void
Analyzer
::
Run
(
Argument
*
argument
)
{
std
::
vector
<
std
::
string
>
passes
;
passes
.
push_back
(
"graph_viz_pass"
);
// add graphviz for debug.
#ifdef PADDLE_WITH_MKLDNN
if
(
use_mkldnn_
)
{
VLOG
(
30
)
<<
"Adding MKL-DNN placement pass"
;
passes
.
push_back
(
"mkldnn_placement_pass"
);
}
#endif
// infer_clean_graph_pass should be the first default pass
// after mkldnn_placement_pass.
passes
.
push_back
(
"infer_clean_graph_pass"
);
passes
.
push_back
(
"graph_viz_pass"
);
// add graphviz for debug.
for
(
auto
&
pass
:
ir_passes_
)
{
// skip mkldnn pass when use_mkldnn_ = false;
bool
skip_pass
=
(
!
use_mkldnn_
)
&&
pass
.
find
(
"mkldnn"
)
!=
std
::
string
::
npos
;
if
(
!
disabled_ir_passes_
.
count
(
pass
)
&&
!
skip_pass
)
{
passes
.
push_back
(
pass
);
passes
.
push_back
(
"graph_viz_pass"
);
// add graphviz for debug.
}
}
argument
->
Set
(
kFluidToIrPassesAttr
,
new
std
::
vector
<
std
::
string
>
(
passes
));
for
(
auto
&
x
:
data_
)
{
PADDLE_ENFORCE
(
x
->
Initialize
(
argument
));
x
->
RunAll
();
PADDLE_ENFORCE
(
x
->
Finalize
());
}
}
Analyzer
&
Analyzer
::
IncludeAllIrPasses
()
{
ir_passes_
=
all_ir_passes_
;
return
*
this
;
}
Analyzer
&
Analyzer
::
DisableIrPasses
(
const
std
::
vector
<
std
::
string
>&
passes
)
{
disabled_ir_passes_
.
insert
(
passes
.
begin
(),
passes
.
end
());
return
*
this
;
}
Analyzer
&
Analyzer
::
IncludeIrPasses
(
const
std
::
vector
<
std
::
string
>&
passes
)
{
ir_passes_
=
passes
;
return
*
this
;
}
Analyzer
&
Analyzer
::
SetUseMkldnn
(
bool
use_mkldnn
)
{
use_mkldnn_
=
use_mkldnn
;
return
*
this
;
}
}
}
// namespace analysis
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/analyzer.h
浏览文件 @
ddb12035
...
@@ -40,56 +40,21 @@ limitations under the License. */
...
@@ -40,56 +40,21 @@ limitations under the License. */
#include <vector>
#include <vector>
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/inference/analysis/flags.h"
#include "paddle/fluid/inference/analysis/flags.h"
#include "paddle/fluid/inference/analysis/pass_manager.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
analysis
{
namespace
analysis
{
class
Analyzer
:
public
OrderedRegistry
<
PassManager
>
{
class
Analyzer
final
{
public:
public:
// Register all the pass-managers.
Analyzer
();
Analyzer
();
void
Run
(
Argument
*
argument
);
void
Run
(
Argument
*
argument
);
Analyzer
&
DisableIrPasses
(
const
std
::
vector
<
std
::
string
>&
passes
);
Analyzer
&
IncludeIrPasses
(
const
std
::
vector
<
std
::
string
>&
passes
);
Analyzer
&
IncludeAllIrPasses
();
Analyzer
&
SetUseMkldnn
(
bool
use_mkldnn
);
DISABLE_COPY_AND_ASSIGN
(
Analyzer
);
DISABLE_COPY_AND_ASSIGN
(
Analyzer
);
private:
protected:
// All avaiable IR passes.
void
RunIrAnalysis
(
Argument
*
argument
);
// The bigger fuse comes first, so that the small operators prefer to be
// merged in a larger fuse op. The small fusion will not break the pattern of
// larger fusion.
const
std
::
vector
<
std
::
string
>
all_ir_passes_
{{
// Manual update the passes here.
"attention_lstm_fuse_pass"
,
//
"seqconv_eltadd_relu_fuse_pass"
,
//
"embedding_fc_lstm_fuse_pass"
,
//
"fc_lstm_fuse_pass"
,
//
"mul_lstm_fuse_pass"
,
//
"fc_gru_fuse_pass"
,
//
"mul_gru_fuse_pass"
,
//
"seq_concat_fc_fuse_pass"
,
//
"fc_fuse_pass"
,
//
"conv_bn_fuse_pass"
,
//
"conv_eltwiseadd_bn_fuse_pass"
,
//
#ifdef PADDLE_WITH_MKLDNN
"depthwise_conv_mkldnn_pass"
,
//
"conv_bias_mkldnn_fuse_pass"
,
//
"conv_relu_mkldnn_fuse_pass"
,
//
"conv_elementwise_add_mkldnn_fuse_pass"
,
//
#endif
}};
std
::
unordered_set
<
std
::
string
>
disabled_ir_passes_
;
// Ir passes to run
std
::
vector
<
std
::
string
>
ir_passes_
;
bool
use_mkldnn_
;
};
};
}
// namespace analysis
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/analyzer_tester.cc
浏览文件 @
ddb12035
...
@@ -27,21 +27,21 @@ namespace analysis {
...
@@ -27,21 +27,21 @@ namespace analysis {
using
namespace
framework
;
// NOLINT
using
namespace
framework
;
// NOLINT
TEST
(
Analyzer
,
analysis_without_tensorrt
)
{
TEST
(
Analyzer
,
analysis_without_tensorrt
)
{
FLAGS_IA_enable_tensorrt_subgraph_engine
=
false
;
Argument
argument
;
Argument
argument
;
argument
.
fluid_model_dir
.
reset
(
new
std
::
string
(
FLAGS_inference_model_dir
));
argument
.
SetModelDir
(
FLAGS_inference_model_dir
);
argument
.
SetIrAnalysisPasses
({
"infer_clean_graph_pass"
});
Analyzer
analyser
;
Analyzer
analyser
;
analyser
.
Run
(
&
argument
);
analyser
.
Run
(
&
argument
);
}
}
TEST
(
Analyzer
,
analysis_with_tensorrt
)
{
TEST
(
Analyzer
,
analysis_with_tensorrt
)
{
FLAGS_IA_enable_tensorrt_subgraph_engine
=
true
;
Argument
argument
;
Argument
argument
;
argument
.
Set
<
int
>
(
"minimum_subgraph_size"
,
new
int
(
0
)
);
argument
.
Set
TensorRtMaxBatchSize
(
3
);
argument
.
Set
<
int
>
(
"max_batch_size"
,
new
int
(
3
)
);
argument
.
Set
TensorRtWorkspaceSize
(
1
<<
20
);
argument
.
Set
<
int
>
(
"workspace_size"
,
new
int
(
1
<<
20
)
);
argument
.
Set
ModelDir
(
FLAGS_inference_model_dir
);
argument
.
Set
<
std
::
string
>
(
"precision_mode"
,
new
std
::
string
(
"FP32"
)
);
argument
.
Set
IrAnalysisPasses
({
"infer_clean_graph_pass"
}
);
argument
.
fluid_model_dir
.
reset
(
new
std
::
string
(
FLAGS_inference_model_dir
));
Analyzer
analyser
;
Analyzer
analyser
;
analyser
.
Run
(
&
argument
);
analyser
.
Run
(
&
argument
);
}
}
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
ddb12035
...
@@ -24,13 +24,16 @@
...
@@ -24,13 +24,16 @@
#pragma once
#pragma once
#include <string>
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/
inference/analysis/data_flow_graph
.h"
#include "paddle/fluid/
framework/scope
.h"
#include "paddle/fluid/platform/variant.h"
#include "paddle/fluid/platform/variant.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
analysis
{
namespace
analysis
{
using
framework
::
ir
::
Graph
;
/*
/*
* The argument definition of both Pass and PassManagers.
* The argument definition of both Pass and PassManagers.
...
@@ -39,75 +42,99 @@ namespace analysis {
...
@@ -39,75 +42,99 @@ namespace analysis {
*/
*/
struct
Argument
{
struct
Argument
{
Argument
()
=
default
;
Argument
()
=
default
;
explicit
Argument
(
const
std
::
string
&
fluid_model_dir
)
explicit
Argument
(
const
std
::
string
&
model_dir
)
{
SetModelDir
(
model_dir
);
}
:
fluid_model_dir
(
new
std
::
string
(
fluid_model_dir
))
{}
// The directory of the trained model.
using
unique_ptr_t
=
std
::
unique_ptr
<
void
,
std
::
function
<
void
(
void
*
)
>>
;
std
::
unique_ptr
<
std
::
string
>
fluid_model_dir
;
using
fusion_statis_t
=
std
::
unordered_map
<
std
::
string
,
int
>
;
// The path of `__model__` and `param`, this is used when the file name of
// model and param is changed.
bool
Has
(
const
std
::
string
&
key
)
const
{
return
valid_fields_
.
count
(
key
);
}
std
::
unique_ptr
<
std
::
string
>
fluid_model_program_path
;
std
::
unique_ptr
<
std
::
string
>
fluid_model_param_path
;
#define DECL_ARGUMENT_FIELD(field__, Field, type__) \
public: \
// The graph that process by the Passes or PassManagers.
type__& field__() { \
std
::
unique_ptr
<
DataFlowGraph
>
main_dfg
;
PADDLE_ENFORCE(Has(#field__)); \
return field__##_; \
// The original program desc.
} \
std
::
unique_ptr
<
framework
::
proto
::
ProgramDesc
>
origin_program_desc
;
void Set##Field(const type__& x) { \
field__##_ = x; \
// The processed program desc.
valid_fields_.insert(#field__); \
std
::
unique_ptr
<
framework
::
proto
::
ProgramDesc
>
transformed_program_desc
;
} \
DECL_ARGUMENT_FIELD_VALID(field__); \
// The output storage path of ModelStorePass.
type__* field__##_ptr() { return &field__##_; } \
std
::
unique_ptr
<
std
::
string
>
model_output_store_path
;
\
private: \
// Support for any other attributes.
type__ field__##_;
template
<
typename
T
>
void
Set
(
const
std
::
string
&
key
,
T
*
data
)
{
#define DECL_ARGUMENT_FIELD_VALID(field__) \
PADDLE_ENFORCE_NOT_NULL
(
data
);
bool field__##_valid() { return Has(#field__); }
PADDLE_ENFORCE
(
!
attrs_
.
count
(
key
),
"Duplicate set Argument's attr [%s]"
,
key
);
#define DECL_ARGUMENT_UNIQUE_FIELD(field__, Field, type__) \
attrs_
[
key
]
=
data
;
public: \
attr_deleters_
[
key
]
=
[
data
,
key
]()
{
type__& field__() { \
VLOG
(
30
)
<<
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
;
PADDLE_ENFORCE_NOT_NULL(field__##_); \
VLOG
(
30
)
<<
"argument delete attr: "
<<
key
;
PADDLE_ENFORCE(Has(#field__)); \
delete
data
;
return *static_cast<type__*>(field__##_.get()); \
};
} \
}
void Set##Field(type__* x) { \
field__##_ = \
bool
Has
(
const
std
::
string
&
name
)
const
{
return
attrs_
.
count
(
name
);
}
unique_ptr_t(x, [](void* x) { delete static_cast<type__*>(x); }); \
valid_fields_.insert(#field__); \
template
<
typename
T
>
} \
T
*
Release
(
const
std
::
string
&
key
)
{
void Set##Field##NotOwned(type__* x) { \
PADDLE_ENFORCE
(
attrs_
.
count
(
key
));
valid_fields_.insert(#field__); \
auto
*
res
=
boost
::
any_cast
<
T
*>
(
attrs_
.
at
(
key
));
field__##_ = unique_ptr_t(x, [](void* x) {}); \
attrs_
.
erase
(
key
);
} \
attr_deleters_
.
erase
(
key
);
DECL_ARGUMENT_FIELD_VALID(field__); \
return
res
;
type__* field__##_ptr() { \
}
PADDLE_ENFORCE(Has(#field__)); \
return static_cast<type__*>(field__##_.get()); \
template
<
typename
T
>
} \
T
&
Get
(
const
std
::
string
&
key
)
{
type__* Release##Field() { \
PADDLE_ENFORCE
(
Has
(
key
));
PADDLE_ENFORCE(Has(#field__)); \
return
*
boost
::
any_cast
<
T
*>
(
attrs_
.
at
(
key
));
valid_fields_.erase(#field__); \
}
return static_cast<type__*>(field__##_.release()); \
} \
~
Argument
()
{
\
for
(
auto
&
item
:
attr_deleters_
)
{
private: \
item
.
second
();
unique_ptr_t field__##_;
}
}
// Model path
DECL_ARGUMENT_FIELD
(
model_dir
,
ModelDir
,
std
::
string
);
// Model specified with program and parameters files.
DECL_ARGUMENT_FIELD
(
model_program_path
,
ModelProgramPath
,
std
::
string
);
DECL_ARGUMENT_FIELD
(
model_params_path
,
ModelParamsPath
,
std
::
string
);
// The overall graph to work on.
DECL_ARGUMENT_UNIQUE_FIELD
(
main_graph
,
MainGraph
,
framework
::
ir
::
Graph
);
// The overall Scope to work on.
DECL_ARGUMENT_UNIQUE_FIELD
(
scope
,
Scope
,
framework
::
Scope
);
DECL_ARGUMENT_UNIQUE_FIELD
(
main_program
,
MainProgram
,
framework
::
ProgramDesc
);
// The ir passes to perform in analysis phase.
DECL_ARGUMENT_FIELD
(
ir_analysis_passes
,
IrAnalysisPasses
,
std
::
vector
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
use_gpu
,
UseGPU
,
bool
);
DECL_ARGUMENT_FIELD
(
use_tensorrt
,
UseTensorRT
,
bool
);
DECL_ARGUMENT_FIELD
(
tensorrt_node_teller
,
TensorRtNodeTeller
,
std
::
function
<
bool
(
const
framework
::
ir
::
Node
*
)
>
);
DECL_ARGUMENT_FIELD
(
tensorrt_max_batch_size
,
TensorRtMaxBatchSize
,
int
);
DECL_ARGUMENT_FIELD
(
tensorrt_workspace_size
,
TensorRtWorkspaceSize
,
int
);
// The program transformed by IR analysis phase.
DECL_ARGUMENT_UNIQUE_FIELD
(
ir_analyzed_program
,
IrAnalyzedProgram
,
framework
::
proto
::
ProgramDesc
);
DECL_ARGUMENT_FIELD
(
fusion_statis
,
FusionStatis
,
fusion_statis_t
);
private:
private:
std
::
unordered_map
<
std
::
string
,
boost
::
any
>
attrs_
;
std
::
unordered_set
<
std
::
string
>
valid_fields_
;
std
::
unordered_map
<
std
::
string
,
std
::
function
<
void
()
>>
attr_deleters_
;
};
};
#define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)
#define ARGUMENT_CHECK_FIELD(argument__, fieldname__) \
#define ANALYSIS_ARGUMENT_CHECK_FIELD(field__) \
PADDLE_ENFORCE(argument__->Has(#fieldname__), \
if (UNLIKELY(!(field__))) { \
"the argument field [%s] should be set", #fieldname__);
LOG(ERROR) << "field " << #field__ << " should be set."; \
return false; \
}
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
...
...
paddle/fluid/inference/analysis/data_flow_graph.cc
已删除
100644 → 0
浏览文件 @
0b962680
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/dot.h"
#include "paddle/fluid/inference/analysis/node.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
using
ir_node_t
=
framework
::
ir
::
Node
;
using
ir_graph_t
=
framework
::
ir
::
Graph
;
// It is a better idea that the inputs and outputs of this graph is set manually
// before, but there must be a Pass that helps to prune the unnecessary ops that
// do not contribute to the given targets, so in this pass, analysis and get the
// inputs and outputs is OK.
void
DataFlowGraph
::
Build
()
{
inputs_
.
clear
();
outputs_
.
clear
();
std
::
unordered_set
<
Node
*>
ins
;
std
::
unordered_set
<
Node
*>
outs
;
for
(
auto
&
node
:
nodes
.
nodes
())
{
for
(
auto
*
in
:
node
->
inlinks
)
{
ins
.
insert
(
in
);
}
for
(
auto
*
out
:
node
->
outlinks
)
{
outs
.
insert
(
out
);
}
}
// The nodes that in ins but not in outs is the graph's inputs
// similarly, the nodes that in outs but not in ins is the graphs' outputs
for
(
auto
*
in
:
ins
)
{
if
(
!
outs
.
count
(
in
))
{
inputs_
.
push_back
(
in
);
}
}
for
(
auto
*
out
:
outs
)
{
if
(
!
ins
.
count
(
out
))
{
outputs_
.
push_back
(
out
);
}
}
Clean
();
}
void
DataFlowGraph
::
Build
(
const
framework
::
proto
::
ProgramDesc
&
prog
)
{
// insert vars
// The `var2id` keeps a map from a variable's name to its Node-id, the Node-id
// will keep updating to its latest alias during the graph-building.
std
::
unordered_map
<
std
::
string
,
size_t
>
var2id
;
auto
&
main_block
=
prog
.
blocks
(
framework
::
kRootBlockIndex
);
for
(
int
i
=
0
;
i
<
main_block
.
vars_size
();
i
++
)
{
const
auto
&
var
=
main_block
.
vars
(
i
);
auto
*
v
=
nodes
.
Create
(
Node
::
Type
::
kValue
);
v
->
SetName
(
var
.
name
());
v
->
SetPbDesc
(
const_cast
<
void
*>
(
static_cast
<
const
void
*>
(
&
var
)));
v
->
SetPbMsg
(
var
.
SerializeAsString
());
var2id
[
var
.
name
()]
=
v
->
id
();
}
// The variables in a SSA can only write once, so if a variable is written
// multiple times(quite common in our ProgramDesc design), multiple alias
// Nodes of this variable will be created, and each will just write once.
// An set that keep all the names of the variables(the original, not alias)
// that have been written(as outputs). Once an Op's output variable hit the
// set, it should create a new alias and update the global alias for this
// variable. And that make a Data Flow Graph a SSA.
std
::
unordered_set
<
Node
*>
unique_written_vars
;
for
(
int
i
=
0
;
i
<
main_block
.
ops_size
();
i
++
)
{
const
auto
&
op
=
main_block
.
ops
(
i
);
auto
*
o
=
nodes
.
Create
(
Node
::
Type
::
kFunction
);
o
->
SetName
(
op
.
type
());
static_cast
<
Function
*>
(
o
)
->
SetFuncType
(
op
.
type
());
// Link to the original protobuf message's memory, make it easier to
// generate from a data flow graph to fluid ProgramDesc.
o
->
SetPbDesc
(
const_cast
<
void
*>
(
static_cast
<
const
void
*>
(
&
op
)));
o
->
SetPbMsg
(
op
.
SerializeAsString
());
// set inputs and outputs
for
(
int
j
=
0
;
j
<
op
.
inputs_size
();
j
++
)
{
auto
&
in_var
=
op
.
inputs
(
j
);
for
(
int
k
=
0
;
k
<
in_var
.
arguments_size
();
k
++
)
{
auto
*
in
=
nodes
.
GetMutable
(
var2id
.
at
(
in_var
.
arguments
(
k
)));
in
->
outlinks
.
push_back
(
o
);
o
->
inlinks
.
push_back
(
in
);
unique_written_vars
.
insert
(
in
);
}
}
for
(
int
j
=
0
;
j
<
op
.
outputs_size
();
j
++
)
{
auto
&
out_var
=
op
.
outputs
(
j
);
for
(
int
k
=
0
;
k
<
out_var
.
arguments_size
();
k
++
)
{
auto
*
out
=
nodes
.
GetMutable
(
var2id
[
out_var
.
arguments
(
k
)]);
if
(
unique_written_vars
.
count
(
out
))
{
// Loop found, for example, a = op(a), use SSA, change to a1 = op(a).
auto
*
out_alias
=
nodes
.
Create
(
Node
::
Type
::
kValue
);
out_alias
->
SetName
(
out
->
name
());
out_alias
->
SetPbDesc
(
out
->
pb_desc
());
out_alias
->
SetPbMsg
(
out
->
pb_msg
());
var2id
[
out_alias
->
name
()]
=
out_alias
->
id
();
// update variable's alias Node
VLOG
(
40
)
<<
"loop found in graph, create SSA alias node ["
<<
out_alias
->
repr
()
<<
"] for ["
<<
out
->
repr
()
<<
"]"
;
out
=
out_alias
;
}
out
->
inlinks
.
push_back
(
o
);
o
->
outlinks
.
push_back
(
out
);
}
}
}
// Analysis and extract the inputs and outputs of this graph.
Build
();
}
void
DataFlowGraph
::
Build
(
const
framework
::
ir
::
Graph
&
graph
)
{
// Create nodes
std
::
unordered_map
<
ir_node_t
*
,
Node
*>
ir_node_map
;
for
(
auto
*
ir_node
:
graph
.
Nodes
())
{
Node
*
x
{
nullptr
};
if
(
ir_node
->
IsOp
())
{
PADDLE_ENFORCE
(
ir_node
->
Op
());
VLOG
(
40
)
<<
"get op "
<<
ir_node
<<
" "
<<
ir_node
->
Name
();
x
=
nodes
.
Create
(
Node
::
Type
::
kFunction
);
x
->
attr
(
"ir_node"
).
Pointer
()
=
ir_node
;
PADDLE_ENFORCE
(
ir_node
->
Op
()
->
Proto
());
x
->
SetName
(
ir_node
->
Op
()
->
Proto
()
->
type
());
x
->
SetPbMsg
(
ir_node
->
Op
()
->
Proto
()
->
SerializeAsString
());
}
else
if
(
ir_node
->
IsVar
())
{
// Not create a Node for IR ControlDepVar, considering Inference currently
// just used in single thread scenerio.
VLOG
(
40
)
<<
"get var "
<<
ir_node
->
Name
();
x
=
nodes
.
Create
(
Node
::
Type
::
kValue
);
x
->
attr
(
"ir_node"
).
Pointer
()
=
ir_node
;
x
->
SetName
(
ir_node
->
Name
());
// x->SetPbMsg(ir_node->Var()->Proto()->SerializeAsString());
}
else
{
PADDLE_THROW
(
"Failed to create an Node from IR, unknown type"
);
}
ir_node_map
.
emplace
(
ir_node
,
x
);
}
VLOG
(
40
)
<<
"finish creating Nodes"
;
VLOG
(
40
)
<<
"to create edge"
;
// Create links
for
(
auto
*
ir_node
:
graph
.
Nodes
())
{
auto
it
=
ir_node_map
.
find
(
ir_node
);
// Skip ControlDepVar.
if
(
it
==
ir_node_map
.
end
())
continue
;
auto
*
node
=
it
->
second
;
for
(
auto
*
x
:
ir_node
->
inputs
)
{
if
(
!
ir_node_map
.
count
(
x
))
continue
;
node
->
inlinks
.
push_back
(
ir_node_map
.
at
(
x
));
}
for
(
auto
*
x
:
ir_node
->
outputs
)
{
if
(
!
ir_node_map
.
count
(
x
))
continue
;
node
->
outlinks
.
push_back
(
ir_node_map
.
at
(
x
));
}
}
Build
();
PADDLE_ENFORCE
(
!
inputs_
.
empty
(),
"Can't deduce any inputs from the graph, Is the graph empty?"
);
ir_graph
=
&
graph
;
VLOG
(
30
)
<<
"finished build from IR"
;
}
void
DataFlowGraph
::
Clean
()
{
for
(
auto
&
node
:
nodes
.
nodes
())
{
std
::
unordered_set
<
Node
*>
inlinks_set
(
node
->
inlinks
.
begin
(),
node
->
inlinks
.
end
());
std
::
unordered_set
<
Node
*>
outlinks_set
(
node
->
outlinks
.
begin
(),
node
->
outlinks
.
end
());
if
(
inlinks_set
.
size
()
<
node
->
inlinks
.
size
())
{
node
->
inlinks
.
assign
(
inlinks_set
.
begin
(),
inlinks_set
.
end
());
}
if
(
outlinks_set
.
size
()
<
node
->
outlinks
.
size
())
{
node
->
outlinks
.
assign
(
outlinks_set
.
begin
(),
outlinks_set
.
end
());
}
}
}
std
::
string
DataFlowGraph
::
DotString
()
const
{
Dot
dot
;
// Add nodes
for
(
size_t
i
=
0
;
i
<
nodes
.
size
();
i
++
)
{
const
Node
&
node
=
nodes
.
Get
(
i
);
dot
.
AddNode
(
node
.
repr
(),
node
.
dot_attrs
());
}
// Add edges
for
(
size_t
i
=
0
;
i
<
nodes
.
size
();
i
++
)
{
const
Node
&
node
=
nodes
.
Get
(
i
);
for
(
auto
&
in
:
node
.
inlinks
)
{
dot
.
AddEdge
(
in
->
repr
(),
node
.
repr
(),
{});
}
}
return
dot
.
Build
();
}
std
::
string
DataFlowGraph
::
HumanReadableInfo
(
bool
show_values
,
bool
show_functions
)
const
{
std
::
stringstream
values
,
functions
;
for
(
auto
&
n
:
nodes
.
nodes
())
{
if
(
show_values
&&
n
->
IsValue
())
{
values
<<
n
->
repr
()
<<
"
\n
"
;
}
if
(
show_functions
&&
n
->
IsFunction
())
{
functions
<<
n
->
repr
()
<<
"
\n
"
;
}
}
return
"Values:
\n
"
+
values
.
str
()
+
"
\n\n
"
+
"Functions:
\n
"
+
functions
.
str
();
}
//
// NodesBFSIterator
//
GraphTraits
<
DataFlowGraph
>::
NodesBFSIterator
::
NodesBFSIterator
(
const
std
::
vector
<
Node
*>
&
source
)
:
queue_
(
source
.
begin
(),
source
.
end
())
{}
GraphTraits
<
DataFlowGraph
>::
NodesBFSIterator
::
NodesBFSIterator
(
GraphTraits
<
DataFlowGraph
>::
NodesBFSIterator
&&
other
)
noexcept
:
queue_
(
std
::
move
(
other
.
queue_
)),
visited_
(
std
::
move
(
other
.
visited_
))
{}
GraphTraits
<
DataFlowGraph
>::
NodesBFSIterator
::
NodesBFSIterator
(
const
GraphTraits
<
DataFlowGraph
>::
NodesBFSIterator
&
other
)
:
queue_
(
other
.
queue_
),
visited_
(
other
.
visited_
)
{}
Node
&
GraphTraits
<
DataFlowGraph
>::
NodesBFSIterator
::
operator
*
()
{
PADDLE_ENFORCE
(
!
queue_
.
empty
());
return
*
queue_
.
front
();
}
Node
*
GraphTraits
<
DataFlowGraph
>::
NodesBFSIterator
::
operator
->
()
{
PADDLE_ENFORCE
(
!
queue_
.
empty
());
return
queue_
.
front
();
}
GraphTraits
<
DataFlowGraph
>::
NodesBFSIterator
&
GraphTraits
<
DataFlowGraph
>::
NodesBFSIterator
::
operator
=
(
const
GraphTraits
<
DataFlowGraph
>::
NodesBFSIterator
&
other
)
{
queue_
=
other
.
queue_
;
visited_
=
other
.
visited_
;
return
*
this
;
}
GraphTraits
<
DataFlowGraph
>::
NodesBFSIterator
&
GraphTraits
<
DataFlowGraph
>::
NodesBFSIterator
::
operator
++
()
{
PADDLE_ENFORCE
(
!
queue_
.
empty
());
auto
*
cur
=
queue_
.
front
();
visited_
.
insert
(
cur
);
queue_
.
pop_front
();
for
(
auto
*
output
:
cur
->
outlinks
)
{
if
(
!
visited_
.
count
(
output
))
{
queue_
.
push_back
(
output
);
visited_
.
insert
(
output
);
}
}
return
*
this
;
}
bool
GraphTraits
<
DataFlowGraph
>::
NodesBFSIterator
::
operator
==
(
const
GraphTraits
<
DataFlowGraph
>::
NodesBFSIterator
&
other
)
{
if
(
queue_
.
empty
())
return
other
.
queue_
.
empty
();
if
((
!
queue_
.
empty
())
&&
(
!
other
.
queue_
.
empty
()))
{
return
queue_
.
front
()
==
other
.
queue_
.
front
()
&&
visited_
.
size
()
==
other
.
visited_
.
size
();
// equality of queue and
// visited. Just a light but week implementation.
}
return
false
;
}
//
// NodesDFSIterator
//
GraphTraits
<
DataFlowGraph
>::
NodesDFSIterator
::
NodesDFSIterator
(
const
std
::
vector
<
Node
*>
&
source
)
{
for
(
auto
*
x
:
source
)
stack_
.
push
(
x
);
}
GraphTraits
<
DataFlowGraph
>::
NodesDFSIterator
::
NodesDFSIterator
(
GraphTraits
<
DataFlowGraph
>::
NodesDFSIterator
&&
other
)
noexcept
:
stack_
(
std
::
move
(
other
.
stack_
)),
visited_
(
std
::
move
(
other
.
visited_
))
{}
GraphTraits
<
DataFlowGraph
>::
NodesDFSIterator
::
NodesDFSIterator
(
const
GraphTraits
<
DataFlowGraph
>::
NodesDFSIterator
&
other
)
:
stack_
(
other
.
stack_
),
visited_
(
other
.
visited_
)
{}
Node
&
GraphTraits
<
DataFlowGraph
>::
NodesDFSIterator
::
operator
*
()
{
PADDLE_ENFORCE
(
!
stack_
.
empty
());
return
*
stack_
.
top
();
}
GraphTraits
<
DataFlowGraph
>::
NodesDFSIterator
&
GraphTraits
<
DataFlowGraph
>::
NodesDFSIterator
::
operator
++
()
{
if
(
stack_
.
empty
())
return
*
this
;
visited_
.
insert
(
stack_
.
top
());
auto
*
cur
=
stack_
.
top
();
stack_
.
pop
();
for
(
auto
*
x
:
cur
->
outlinks
)
{
if
(
!
visited_
.
count
(
x
))
{
stack_
.
push
(
x
);
visited_
.
insert
(
x
);
}
}
return
*
this
;
}
bool
GraphTraits
<
DataFlowGraph
>::
NodesDFSIterator
::
operator
==
(
const
GraphTraits
<
DataFlowGraph
>::
NodesDFSIterator
&
other
)
{
if
(
stack_
.
empty
())
return
other
.
stack_
.
empty
();
if
((
!
stack_
.
empty
())
&&
(
!
other
.
stack_
.
empty
()))
{
return
stack_
.
top
()
==
other
.
stack_
.
top
();
}
return
false
;
}
GraphTraits
<
DataFlowGraph
>::
NodesDFSIterator
&
GraphTraits
<
DataFlowGraph
>::
NodesDFSIterator
::
operator
=
(
const
GraphTraits
<
DataFlowGraph
>::
NodesDFSIterator
&
other
)
{
stack_
=
other
.
stack_
;
visited_
=
other
.
visited_
;
return
*
this
;
}
Node
*
GraphTraits
<
DataFlowGraph
>::
NodesDFSIterator
::
operator
->
()
{
return
stack_
.
top
();
}
inline
bool
CheckNodeIndegreeEquals
(
const
Node
&
node
,
size_t
n
)
{
return
node
.
inlinks
.
size
()
==
n
;
}
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
NodesTSIterator
(
const
std
::
vector
<
Node
*>
&
source
)
{
PADDLE_ENFORCE
(
!
source
.
empty
(),
"Start points of topological sorting should not be empty!"
);
// CHECK all the inputs' in-degree is 0
for
(
auto
*
node
:
source
)
{
PADDLE_ENFORCE
(
CheckNodeIndegreeEquals
(
*
node
,
0
));
}
std
::
unordered_set
<
Node
*>
visited
;
std
::
unordered_set
<
Node
*>
to_visit
{
source
.
begin
(),
source
.
end
()};
std
::
vector
<
Node
*>
inlink_visited
;
while
(
!
to_visit
.
empty
())
{
std
::
vector
<
Node
*>
queue
(
to_visit
.
begin
(),
to_visit
.
end
());
for
(
auto
*
p
:
queue
)
{
if
(
p
->
deleted
())
{
visited
.
insert
(
p
);
to_visit
.
erase
(
p
);
continue
;
}
inlink_visited
.
clear
();
std
::
copy_if
(
p
->
inlinks
.
begin
(),
p
->
inlinks
.
end
(),
std
::
back_inserter
(
inlink_visited
),
[
&
](
Node
*
x
)
{
return
visited
.
count
(
x
);
});
if
(
inlink_visited
.
size
()
==
p
->
inlinks
.
size
())
{
sorted_
.
push_back
(
p
);
for
(
auto
*
_
:
p
->
outlinks
)
{
if
(
!
visited
.
count
(
_
))
{
to_visit
.
insert
(
_
);
}
}
to_visit
.
erase
(
p
);
visited
.
insert
(
p
);
}
}
}
}
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
NodesTSIterator
(
const
paddle
::
inference
::
analysis
::
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
&
other
)
:
sorted_
(
other
.
sorted_
),
cursor_
(
other
.
cursor_
)
{}
Node
&
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
operator
*
()
{
PADDLE_ENFORCE_LT
(
cursor_
,
sorted_
.
size
());
return
*
sorted_
[
cursor_
];
}
paddle
::
inference
::
analysis
::
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
&
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
operator
++
()
{
if
(
++
cursor_
>=
sorted_
.
size
())
{
sorted_
.
clear
();
cursor_
=
0
;
}
return
*
this
;
}
paddle
::
inference
::
analysis
::
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
&
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
operator
=
(
const
paddle
::
inference
::
analysis
::
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
&
other
)
{
cursor_
=
other
.
cursor_
;
sorted_
=
other
.
sorted_
;
return
*
this
;
}
bool
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
operator
==
(
const
paddle
::
inference
::
analysis
::
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
&
other
)
{
return
sorted_
==
other
.
sorted_
&&
cursor_
==
other
.
cursor_
;
}
Node
*
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
operator
->
()
{
PADDLE_ENFORCE_LT
(
cursor_
,
sorted_
.
size
());
return
sorted_
[
cursor_
];
}
std
::
pair
<
std
::
vector
<
Node
*>
,
std
::
vector
<
Node
*>>
ExtractInputAndOutputOfSubGraph
(
std
::
vector
<
Node
*>
&
graph
)
{
// NOLINT
std
::
unordered_set
<
Node
*>
nodes
(
graph
.
begin
(),
graph
.
end
());
std
::
unordered_set
<
Node
*>
inputs
;
std
::
unordered_set
<
Node
*>
outputs
;
// Input a Value, check whether its inlink is in the subgraph.
auto
inlink_in_subgraph
=
[
&
](
Node
*
n
)
{
for
(
auto
*
in
:
n
->
inlinks
)
{
if
(
nodes
.
count
(
in
))
return
true
;
}
return
false
;
};
for
(
auto
&
node
:
graph
)
{
for
(
auto
*
in
:
node
->
inlinks
)
{
// The Value that is written by nodes inside a sub-graph shouldn't be the
// input of the sub-graph.
if
(
!
nodes
.
count
(
in
)
&&
in
->
type
()
==
Node
::
Type
::
kValue
&&
!
inlink_in_subgraph
(
in
))
{
inputs
.
insert
(
in
);
}
}
for
(
auto
*
out
:
node
->
outlinks
)
{
if
(
!
nodes
.
count
(
out
)
&&
out
->
type
()
==
Node
::
Type
::
kValue
)
{
outputs
.
insert
(
out
);
}
}
}
return
std
::
make_pair
(
std
::
vector
<
Node
*>
(
inputs
.
begin
(),
inputs
.
end
()),
std
::
vector
<
Node
*>
(
outputs
.
begin
(),
outputs
.
end
()));
}
// Filter the Intermediate results of the subgraph node.
void
FilterRedundantOutputOfSubGraph
(
DataFlowGraph
*
graph
)
{
std
::
vector
<
Node
*>
op_nodes
;
for
(
auto
&
node
:
GraphTraits
<
DataFlowGraph
>
(
*
graph
).
nodes_in_TS
())
{
if
(
node
.
type
()
==
Node
::
Type
::
kValue
||
node
.
deleted
())
{
continue
;
}
op_nodes
.
push_back
(
&
node
);
}
size_t
op_num
=
op_nodes
.
size
();
for
(
size_t
i
=
0
;
i
<
op_num
;
i
++
)
{
if
(
op_nodes
[
i
]
->
type
()
==
Node
::
Type
::
kFunction
)
continue
;
std
::
unordered_set
<
std
::
string
>
follow_up_input_names
;
for
(
size_t
j
=
i
+
1
;
j
<
op_num
;
j
++
)
{
for
(
auto
*
in
:
op_nodes
[
j
]
->
inlinks
)
{
follow_up_input_names
.
insert
(
in
->
name
());
}
}
std
::
vector
<
Node
*>
filtered_subgraph_outlinks
;
for
(
auto
*
out
:
op_nodes
[
i
]
->
outlinks
)
{
if
(
follow_up_input_names
.
count
(
out
->
name
()))
{
filtered_subgraph_outlinks
.
push_back
(
out
);
}
else
{
out
->
SetDeleted
();
}
}
// The filtered_subgraph_outlinks may be empty.
op_nodes
[
i
]
->
outlinks
=
filtered_subgraph_outlinks
;
}
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/data_flow_graph.h
已删除
100644 → 0
浏览文件 @
0b962680
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* Data flow graph is an pass that build the basic graph. It contains a graph
* and the iterators that enable the iteration over the graph.
*/
#pragma once
#include <deque>
#include <stack>
#include <string>
#include <unordered_set>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/inference/analysis/graph_traits.h"
#include "paddle/fluid/inference/analysis/node.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
/*
* DataFlowGraph - A container of Value and Function Nodes.
*
* This is the base graph for any other type of graphs, such as SSA or CFG.
*/
struct
DataFlowGraph
{
NodeMap
nodes
;
// inputs and outputs are deduced from the graph.
// Used to interact with IR.
const
framework
::
ir
::
Graph
*
ir_graph
{
nullptr
};
// Extract inputs and outputs of the graph.
void
Build
();
void
Build
(
const
framework
::
proto
::
ProgramDesc
&
prog
);
// Build a graph from ir::Graph.
void
Build
(
const
framework
::
ir
::
Graph
&
graph
);
// Get an attribute.
AnyAttr
&
Attr
(
const
std
::
string
&
key
)
{
return
attrs_
[
key
];
}
// Output a DOT graph file for debug.
std
::
string
DotString
()
const
;
std
::
string
HumanReadableInfo
(
bool
show_values
=
true
,
bool
show_functions
=
true
)
const
;
const
std
::
vector
<
Node
*>
&
inputs
()
const
{
PADDLE_ENFORCE
(
!
inputs_
.
empty
(),
"No inputs are deduced, need to Build() first."
);
return
inputs_
;
}
const
std
::
vector
<
Node
*>
&
outputs
()
const
{
PADDLE_ENFORCE
(
!
outputs_
.
empty
(),
"No outputs are deduced, need to Build() first."
);
return
outputs_
;
}
private:
mutable
std
::
vector
<
Node
*>
inputs_
;
mutable
std
::
vector
<
Node
*>
outputs_
;
std
::
unordered_map
<
std
::
string
,
AnyAttr
>
attrs_
;
// Remove duplicate edges and so on.
void
Clean
();
};
/*
* An graph trait help to traverse the graph using BFS.
* The BFS start from a graph's inputs, the graph should be fully-connected, so
* that the iterator can reach the end.
*/
template
<
>
struct
GraphTraits
<
DataFlowGraph
>
{
// BFS iterator on nodes.
struct
NodesBFSIterator
:
public
std
::
iterator
<
std
::
forward_iterator_tag
,
Node
*>
{
NodesBFSIterator
()
=
default
;
explicit
NodesBFSIterator
(
const
std
::
vector
<
Node
*>
&
source
);
NodesBFSIterator
(
NodesBFSIterator
&&
other
)
noexcept
;
// NOTE Heavy to use.
NodesBFSIterator
(
const
NodesBFSIterator
&
other
);
Node
&
operator
*
();
NodesBFSIterator
&
operator
++
();
Node
*
operator
->
();
// TODO(Superjomn) current implementation just compare the first
// element, need to compare the graph and all the elements in the queue and
// set.
NodesBFSIterator
&
operator
=
(
const
NodesBFSIterator
&
other
);
bool
operator
==
(
const
NodesBFSIterator
&
other
);
bool
operator
!=
(
const
NodesBFSIterator
&
other
)
{
return
!
(
*
this
==
other
);
}
private:
std
::
deque
<
Node
*>
queue_
;
std
::
unordered_set
<
Node
*>
visited_
;
};
// DFS iterator on nodes.
struct
NodesDFSIterator
:
public
std
::
iterator
<
std
::
forward_iterator_tag
,
Node
*>
{
NodesDFSIterator
()
=
default
;
NodesDFSIterator
(
const
std
::
vector
<
Node
*>
&
source
);
NodesDFSIterator
(
NodesDFSIterator
&&
other
)
noexcept
;
NodesDFSIterator
(
const
NodesDFSIterator
&
other
);
Node
&
operator
*
();
NodesDFSIterator
&
operator
++
();
// TODO(Superjomn) current implementation just compare the first
// element, need to compare the graph and all the elements in the queue and
// set.
NodesDFSIterator
&
operator
=
(
const
NodesDFSIterator
&
other
);
bool
operator
==
(
const
NodesDFSIterator
&
other
);
bool
operator
!=
(
const
NodesDFSIterator
&
other
)
{
return
!
(
*
this
==
other
);
}
Node
*
operator
->
();
private:
std
::
stack
<
Node
*>
stack_
;
std
::
unordered_set
<
Node
*>
visited_
;
};
// Topological sorting iterator on nodes.
struct
NodesTSIterator
:
public
std
::
iterator
<
std
::
forward_iterator_tag
,
Node
*>
{
NodesTSIterator
()
=
default
;
NodesTSIterator
(
const
std
::
vector
<
Node
*>
&
source
);
NodesTSIterator
(
NodesTSIterator
&&
other
)
:
sorted_
(
std
::
move
(
other
.
sorted_
)),
cursor_
(
other
.
cursor_
)
{
other
.
cursor_
=
0
;
}
NodesTSIterator
(
const
NodesTSIterator
&
other
);
Node
&
operator
*
();
NodesTSIterator
&
operator
++
();
// TODO(Superjomn) current implementation just compare the first
// element, need to compare the graph and all the elements in the queue and
// set.
NodesTSIterator
&
operator
=
(
const
NodesTSIterator
&
other
);
bool
operator
==
(
const
NodesTSIterator
&
other
);
bool
operator
!=
(
const
NodesTSIterator
&
other
)
{
return
!
(
*
this
==
other
);
}
Node
*
operator
->
();
private:
std
::
vector
<
Node
*>
sorted_
;
size_t
cursor_
{
0
};
};
explicit
GraphTraits
(
const
DataFlowGraph
&
graph
)
:
graph_
(
graph
)
{}
// default use BFS to visit the nodes.
iterator_range
<
NodesBFSIterator
>
nodes
()
{
return
iterator_range
<
NodesBFSIterator
>
(
nodes_bfs_begin
(),
nodes_bfs_end
());
}
iterator_range
<
NodesBFSIterator
>
nodes_in_BFS
()
{
return
iterator_range
<
NodesBFSIterator
>
(
nodes_bfs_begin
(),
nodes_bfs_end
());
}
iterator_range
<
NodesDFSIterator
>
nodes_in_DFS
()
{
return
iterator_range
<
NodesDFSIterator
>
(
nodes_dfs_begin
(),
nodes_dfs_end
());
}
iterator_range
<
NodesTSIterator
>
nodes_in_TS
()
{
return
iterator_range
<
NodesTSIterator
>
(
nodes_ts_begin
(),
nodes_ts_end
());
}
private:
NodesBFSIterator
nodes_bfs_begin
()
{
return
NodesBFSIterator
(
graph_
.
inputs
());
}
NodesBFSIterator
nodes_bfs_end
()
{
return
NodesBFSIterator
();
}
NodesDFSIterator
nodes_dfs_begin
()
{
return
NodesDFSIterator
(
graph_
.
inputs
());
}
NodesDFSIterator
nodes_dfs_end
()
{
return
NodesDFSIterator
();
}
NodesTSIterator
nodes_ts_begin
()
{
return
NodesTSIterator
(
graph_
.
inputs
());
}
NodesTSIterator
nodes_ts_end
()
{
return
NodesTSIterator
();
}
private:
const
DataFlowGraph
&
graph_
;
};
// Extract the inputs and outputs of a graph. The inputs and outputs of a
// sub-graph is the inputs nodes and output nodes that doesn't inside the
// sub-graph.
std
::
pair
<
std
::
vector
<
Node
*>
,
std
::
vector
<
Node
*>>
ExtractInputAndOutputOfSubGraph
(
std
::
vector
<
Node
*>
&
graph
);
// NOLINT
void
FilterRedundantOutputOfSubGraph
(
DataFlowGraph
*
graph
);
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/data_flow_graph_tester.cc
已删除
100644 → 0
浏览文件 @
0b962680
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
TEST
(
DataFlowGraph
,
BFS
)
{
auto
desc
=
LoadProgramDesc
(
FLAGS_inference_model_dir
+
"/__model__"
);
auto
dfg
=
ProgramDescToDFG
(
desc
);
dfg
.
Build
();
for
(
auto
*
in
:
dfg
.
inputs
())
{
LOG
(
INFO
)
<<
"inputs: "
<<
in
->
name
()
<<
" "
<<
static_cast
<
int
>
(
in
->
type
());
}
for
(
auto
*
out
:
dfg
.
outputs
())
{
LOG
(
INFO
)
<<
"outputs: "
<<
out
->
name
()
<<
" "
<<
static_cast
<
int
>
(
out
->
type
());
}
size_t
count
=
0
;
for
(
auto
&
node
:
GraphTraits
<
DataFlowGraph
>
(
dfg
).
nodes
())
{
LOG
(
INFO
)
<<
"visiting "
<<
node
.
name
();
++
count
;
}
ASSERT_EQ
(
count
,
dfg
.
nodes
.
size
());
}
TEST
(
DataFlowGraph
,
DFS
)
{
auto
desc
=
LoadProgramDesc
(
FLAGS_inference_model_dir
+
"/__model__"
);
DataFlowGraph
dfg
;
dfg
.
Build
(
desc
);
size_t
count
=
0
;
for
(
auto
&
node
:
GraphTraits
<
DataFlowGraph
>
(
dfg
).
nodes_in_DFS
())
{
LOG
(
INFO
)
<<
"visiting "
<<
node
.
name
();
++
count
;
}
ASSERT_EQ
(
count
,
dfg
.
nodes
.
size
());
}
// Topological sorting.
/*
* Graph topology
* inputs: 0, 1, 2
* 0 -> 4
* 0 -> 5
* 1 -> 6
* 2 -> 7
* 4 -> 5
* 4 -> 7
* 4 -> 3
* 7 -> 3
*/
TEST
(
DataFlowGraph
,
TS
)
{
DataFlowGraph
graph
;
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
auto
*
node
=
graph
.
nodes
.
Create
(
Node
::
Type
::
kValue
);
node
->
SetName
(
"node-"
+
std
::
to_string
(
i
));
}
auto
add_link
=
[
&
](
int
i
,
int
j
)
{
Node
*
source
=
graph
.
nodes
.
GetMutable
(
i
);
Node
*
target
=
graph
.
nodes
.
GetMutable
(
j
);
target
->
inlinks
.
push_back
(
source
);
source
->
outlinks
.
push_back
(
target
);
};
add_link
(
0
,
4
);
add_link
(
0
,
5
);
add_link
(
1
,
6
);
add_link
(
2
,
7
);
add_link
(
4
,
5
);
add_link
(
4
,
7
);
add_link
(
4
,
3
);
add_link
(
7
,
3
);
graph
.
Build
();
auto
its
=
GraphTraits
<
DataFlowGraph
>
(
graph
).
nodes_in_TS
();
std
::
vector
<
int
>
sorted_ids
;
for
(
auto
it
=
its
.
begin
();
it
!=
its
.
end
();
++
it
)
{
LOG
(
INFO
)
<<
it
->
name
();
sorted_ids
.
push_back
(
it
->
id
());
}
// Assert a occurs prior to b in the sorted_ids.
auto
assert_positive_sequence_pair
=
[
&
](
int
a
,
int
b
)
{
auto
a_offset
=
std
::
find
(
sorted_ids
.
begin
(),
sorted_ids
.
end
(),
a
);
auto
b_offset
=
std
::
find
(
sorted_ids
.
begin
(),
sorted_ids
.
end
(),
b
);
ASSERT_LT
(
a_offset
,
b_offset
);
};
assert_positive_sequence_pair
(
2
,
7
);
assert_positive_sequence_pair
(
7
,
3
);
assert_positive_sequence_pair
(
4
,
3
);
assert_positive_sequence_pair
(
0
,
4
);
assert_positive_sequence_pair
(
0
,
5
);
assert_positive_sequence_pair
(
1
,
6
);
assert_positive_sequence_pair
(
4
,
5
);
assert_positive_sequence_pair
(
4
,
7
);
}
TEST
(
DataFlowGraph
,
Build_ProgramDesc
)
{
auto
desc
=
LoadProgramDesc
(
FLAGS_inference_model_dir
+
"/__model__"
);
DataFlowGraph
graph
;
graph
.
Build
(
desc
);
ASSERT_EQ
(
graph
.
nodes
.
size
(),
38UL
);
}
void
SetOp
(
framework
::
ProgramDesc
*
prog
,
const
std
::
string
&
type
,
const
std
::
vector
<
std
::
string
>&
inputs
,
const
std
::
vector
<
std
::
string
>&
outputs
)
{
auto
*
op
=
prog
->
MutableBlock
(
0
)
->
AppendOp
();
op
->
SetType
(
type
);
op
->
SetInput
(
"Xs"
,
inputs
);
op
->
SetOutput
(
"Xs"
,
outputs
);
op
->
SetAttr
(
framework
::
OpProtoAndCheckerMaker
::
OpRoleAttrName
(),
static_cast
<
int
>
(
framework
::
OpRole
::
kForward
));
}
TEST
(
DataFlowGraph
,
Build_IR_Graph
)
{
framework
::
ProgramDesc
prog
;
for
(
auto
&
v
:
std
::
vector
<
std
::
string
>
({
"a"
,
"b"
,
"c"
,
"d"
,
"e"
,
"f"
}))
{
auto
*
var
=
prog
.
MutableBlock
(
0
)
->
Var
(
v
);
var
->
SetType
(
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
if
(
v
==
"c"
)
{
var
->
SetPersistable
(
true
);
}
}
SetOp
(
&
prog
,
"OP0"
,
std
::
vector
<
std
::
string
>
({
"a"
}),
std
::
vector
<
std
::
string
>
({
"b"
}));
SetOp
(
&
prog
,
"OP1"
,
std
::
vector
<
std
::
string
>
({
"a"
}),
std
::
vector
<
std
::
string
>
({
"c"
}));
SetOp
(
&
prog
,
"mul"
,
std
::
vector
<
std
::
string
>
({
"b"
,
"c"
}),
std
::
vector
<
std
::
string
>
({
"d"
}));
SetOp
(
&
prog
,
"elementwise_add"
,
std
::
vector
<
std
::
string
>
({
"d"
,
"e"
}),
std
::
vector
<
std
::
string
>
({
"f"
}));
DataFlowGraph
graph
;
framework
::
ir
::
Graph
ir_graph
(
prog
);
graph
.
Build
(
ir_graph
);
ASSERT_EQ
(
graph
.
nodes
.
size
(),
ir_graph
.
Nodes
().
size
());
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h
已删除
100644 → 0
浏览文件 @
0b962680
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file implements the transformation from fluid ProgramDesc to data flow
* graph.
*/
#pragma once
#include <string>
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
class
DataFlowGraphToFluidPass
final
:
public
DataFlowGraphPass
{
public:
DataFlowGraphToFluidPass
()
=
default
;
bool
Initialize
(
Argument
*
argument
)
override
;
bool
Finalize
()
override
;
void
Run
(
DataFlowGraph
*
graph
)
override
;
std
::
string
repr
()
const
override
{
return
"DFG to fluid"
;
}
std
::
string
description
()
const
override
{
return
"Transform a DFG to a Fluid ProgramDesc"
;
}
AnalysisPass
*
CreateGraphvizDebugerPass
()
const
override
;
protected:
// Add a Fluid Op into the ProgramDesc.
void
AddFluidOp
(
Node
*
node
);
// Add a EngineOp into the ProgramDesc.
void
AddEngineOp
(
Node
*
node
);
private:
framework
::
proto
::
ProgramDesc
*
desc_
;
Argument
*
argument_
;
};
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass_tester.cc
已删除
100644 → 0
浏览文件 @
0b962680
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
#include <glog/logging.h>
#include <google/protobuf/text_format.h>
#include <gtest/gtest.h>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/io.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
TEST
(
DataFlowGraph
,
Test
)
{
Argument
argument
(
FLAGS_inference_model_dir
);
FluidToDataFlowGraphPass
pass0
;
DataFlowGraphToFluidPass
pass1
;
ASSERT_TRUE
(
pass0
.
Initialize
(
&
argument
));
ASSERT_TRUE
(
pass1
.
Initialize
(
&
argument
));
pass0
.
Run
(
argument
.
main_dfg
.
get
());
pass1
.
Run
(
argument
.
main_dfg
.
get
());
pass0
.
Finalize
();
pass1
.
Finalize
();
LOG
(
INFO
)
<<
argument
.
main_dfg
->
nodes
.
size
();
}
};
// namespace analysis
};
// namespace inference
};
// namespace paddle
paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.cc
已删除
100644 → 0
浏览文件 @
0b962680
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
int
DFG_GraphvizDrawPass
::
counter_
{
0
};
void
DFG_GraphvizDrawPass
::
Run
(
DataFlowGraph
*
graph
)
{
auto
content
=
Draw
(
graph
);
auto
dot_path
=
GenDotPath
();
std
::
ofstream
file
(
dot_path
);
file
.
write
(
content
.
c_str
(),
content
.
size
());
file
.
close
();
auto
png_path
=
dot_path
.
substr
(
0
,
dot_path
.
size
()
-
4
)
+
".png"
;
std
::
string
message
;
VLOG
(
30
)
<<
"draw to "
<<
png_path
;
ExecShellCommand
(
"dot -Tpng "
+
dot_path
+
" -o "
+
png_path
,
&
message
);
}
std
::
string
DFG_GraphvizDrawPass
::
Draw
(
DataFlowGraph
*
graph
)
{
Dot
dot
;
// Add nodes
for
(
size_t
i
=
0
;
i
<
graph
->
nodes
.
size
();
i
++
)
{
const
Node
&
node
=
graph
->
nodes
.
Get
(
i
);
if
(
config_
.
display_deleted_node
||
!
node
.
deleted
())
{
dot
.
AddNode
(
node
.
repr
(),
node
.
dot_attrs
());
}
}
// Add edges
for
(
size_t
i
=
0
;
i
<
graph
->
nodes
.
size
();
i
++
)
{
const
Node
&
node
=
graph
->
nodes
.
Get
(
i
);
if
(
!
config_
.
display_deleted_node
&&
node
.
deleted
())
continue
;
for
(
auto
&
out
:
node
.
outlinks
)
{
if
(
!
config_
.
display_deleted_node
&&
out
->
deleted
())
continue
;
dot
.
AddEdge
(
node
.
repr
(),
out
->
repr
(),
{});
}
}
return
dot
.
Build
();
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h
已删除
100644 → 0
浏览文件 @
0b962680
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file create an DFG_GraphvizDrawPass which helps to draw a data flow
* graph's structure using graphviz.
*/
#pragma once
#include <fstream>
#include <string>
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/inference/analysis/dot.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
/*
* Output a dot file and write to some place.
*/
class
DFG_GraphvizDrawPass
:
public
DataFlowGraphPass
{
public:
struct
Config
{
Config
(
const
std
::
string
&
dir
,
const
std
::
string
&
id
,
bool
display_deleted_node
=
false
)
:
dir
(
dir
),
id
(
id
),
display_deleted_node
(
display_deleted_node
)
{}
// The directory to store the .dot or .png files.
const
std
::
string
dir
;
// The identifier for this dot file.
const
std
::
string
id
;
// Whether to display deleted nodes, default false.
const
bool
display_deleted_node
;
};
explicit
DFG_GraphvizDrawPass
(
const
Config
&
config
)
:
config_
(
config
)
{}
bool
Initialize
(
Argument
*
argument
)
override
{
return
true
;
}
void
Run
(
DataFlowGraph
*
graph
)
override
;
bool
Finalize
()
override
{
return
true
;
}
std
::
string
repr
()
const
override
{
return
"DFG graphviz drawer"
;
}
std
::
string
description
()
const
override
{
return
"Debug a DFG by draw with graphviz"
;
}
protected:
// A counter to add a number prefix to the debugger image output so that they
// will sort in the triggered order.
static
int
counter_
;
// Path of the dot file to output.
std
::
string
GenDotPath
()
const
{
return
config_
.
dir
+
"/"
+
std
::
to_string
(
counter_
++
)
+
"-graph_"
+
config_
.
id
+
".dot"
;
}
virtual
std
::
string
Draw
(
DataFlowGraph
*
graph
);
Config
config_
;
};
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc
已删除
100644 → 0
浏览文件 @
0b962680
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
#include <gtest/gtest.h>
#include <fstream>
#include <string>
#include "paddle/fluid/inference/analysis/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
TEST
(
DFG_GraphvizDrawPass
,
dfg_graphviz_draw_pass_tester
)
{
Argument
argument
(
FLAGS_inference_model_dir
);
FluidToDataFlowGraphPass
pass0
;
ASSERT_TRUE
(
pass0
.
Initialize
(
&
argument
));
pass0
.
Run
(
argument
.
main_dfg
.
get
());
// auto dfg = ProgramDescToDFG(*argument.origin_program_desc);
DFG_GraphvizDrawPass
::
Config
config
(
"./"
,
"test"
);
DFG_GraphvizDrawPass
pass
(
config
);
pass
.
Initialize
(
&
argument
);
pass
.
Run
(
argument
.
main_dfg
.
get
());
// test content
std
::
ifstream
file
(
"./0-graph_test.dot"
);
ASSERT_TRUE
(
file
.
is_open
());
std
::
string
line
;
int
no
{
0
};
while
(
std
::
getline
(
file
,
line
))
{
no
++
;
}
// DFG is sensitive to ProgramDesc, be careful to change the existing models.
ASSERT_EQ
(
no
,
83
);
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/dot_tester.cc
浏览文件 @
ddb12035
...
@@ -16,7 +16,6 @@
...
@@ -16,7 +16,6 @@
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <memory>
#include <memory>
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
...
...
paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc
已删除
100644 → 0
浏览文件 @
0b962680
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include <string>
#include <vector>
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
bool
FluidToDataFlowGraphPass
::
Initialize
(
Argument
*
argument
)
{
ANALYSIS_ARGUMENT_CHECK_FIELD
(
argument
);
if
(
argument
->
origin_program_desc
)
{
LOG
(
WARNING
)
<<
"argument's origin_program_desc is already set, might "
"duplicate called"
;
}
if
(
!
argument
->
fluid_model_program_path
)
{
ANALYSIS_ARGUMENT_CHECK_FIELD
(
argument
->
fluid_model_dir
);
argument
->
fluid_model_program_path
.
reset
(
new
std
::
string
(
*
argument
->
fluid_model_dir
+
"/__model__"
));
}
ANALYSIS_ARGUMENT_CHECK_FIELD
(
argument
->
fluid_model_program_path
);
auto
program
=
LoadProgramDesc
(
*
argument
->
fluid_model_program_path
);
argument
->
origin_program_desc
.
reset
(
new
framework
::
proto
::
ProgramDesc
(
program
));
if
(
!
argument
->
main_dfg
)
{
argument
->
main_dfg
.
reset
(
new
DataFlowGraph
);
}
desc_
=
argument
->
origin_program_desc
.
get
();
return
true
;
}
bool
FluidToDataFlowGraphPass
::
Finalize
()
{
return
true
;
}
void
FluidToDataFlowGraphPass
::
Run
(
DataFlowGraph
*
graph
)
{
PADDLE_ENFORCE
(
graph
);
PADDLE_ENFORCE
(
desc_
);
graph
->
Build
(
*
desc_
);
}
namespace
{
class
DFG_DebuggerPass
:
public
DFG_GraphvizDrawPass
{
public:
using
Config
=
DFG_GraphvizDrawPass
::
Config
;
explicit
DFG_DebuggerPass
(
const
Config
&
config
)
:
DFG_GraphvizDrawPass
(
config
)
{}
std
::
string
repr
()
const
override
{
return
"fluid-to-dfg-debuger-pass"
;
}
bool
Finalize
()
override
{
return
true
;
}
};
}
AnalysisPass
*
FluidToDataFlowGraphPass
::
CreateGraphvizDebugerPass
()
const
{
return
new
DFG_DebuggerPass
(
DFG_GraphvizDrawPass
::
Config
(
FLAGS_IA_graphviz_log_root
,
"fluid-to-dfg-debuger"
));
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h
已删除
100644 → 0
浏览文件 @
0b962680
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file implements the transformation from data flow graph to fluid
* ProgramDesc.
*/
#pragma once
#include <string>
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
/*
* Transform a FluidDesc to a SSA.
*/
class
FluidToDataFlowGraphPass
final
:
public
DataFlowGraphPass
{
public:
FluidToDataFlowGraphPass
()
=
default
;
bool
Initialize
(
Argument
*
argument
)
override
;
bool
Finalize
()
override
;
void
Run
(
DataFlowGraph
*
graph
)
override
;
std
::
string
repr
()
const
override
{
return
"fluid-to-data-flow-graph"
;
}
std
::
string
description
()
const
override
{
return
"transform a fluid ProgramDesc to a data flow graph."
;
}
AnalysisPass
*
CreateGraphvizDebugerPass
()
const
override
;
private:
framework
::
proto
::
ProgramDesc
const
*
desc_
;
};
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/fluid_to_ir_pass.h
已删除
100644 → 0
浏览文件 @
0b962680
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/inference/analysis/flags.h"
#include "paddle/fluid/inference/analysis/ir_pass_manager.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
static
const
char
kFluidToIrPassesAttr
[]
=
"__fluid_to_ir_passes__"
;
class
FluidToIrPass
final
:
public
DataFlowGraphPass
{
public:
FluidToIrPass
()
=
default
;
bool
Initialize
(
Argument
*
argument
)
override
{
ANALYSIS_ARGUMENT_CHECK_FIELD
(
argument
);
PADDLE_ENFORCE
(
argument
->
Has
(
kFluidToIrPassesAttr
),
"argument need the attr %s"
,
kFluidToIrPassesAttr
);
argument_
=
argument
;
if
(
argument
->
origin_program_desc
)
{
LOG
(
WARNING
)
<<
"argument's origin_program_desc is already set, might "
"duplicate called"
;
}
// set fluid model program path
if
(
!
argument
->
fluid_model_program_path
)
{
ANALYSIS_ARGUMENT_CHECK_FIELD
(
argument
->
fluid_model_dir
);
argument
->
fluid_model_program_path
.
reset
(
new
std
::
string
(
*
argument
->
fluid_model_dir
+
"/__model__"
));
}
ANALYSIS_ARGUMENT_CHECK_FIELD
(
argument
->
fluid_model_program_path
);
// Load program.
auto
program
=
LoadProgramDesc
(
*
argument
->
fluid_model_program_path
);
argument
->
origin_program_desc
.
reset
(
new
framework
::
proto
::
ProgramDesc
(
program
));
// Create main data flow graph.
if
(
!
argument
->
main_dfg
)
{
argument
->
main_dfg
.
reset
(
new
DataFlowGraph
);
}
argument
->
Set
(
"ir_program_desc"
,
new
ProgramDesc
(
program
));
LOG
(
INFO
)
<<
"Loading parameters"
;
// Load parameters to argument if needed.
if
(
argument
->
fluid_model_dir
||
(
argument
->
fluid_model_program_path
&&
argument
->
fluid_model_param_path
))
{
#define SAFE_GET(ATTR) std::string ATTR = argument->ATTR ? *argument->ATTR : "";
SAFE_GET
(
fluid_model_dir
);
SAFE_GET
(
fluid_model_program_path
);
SAFE_GET
(
fluid_model_param_path
);
#undef SAFE_GET
EnableParamModify
(
fluid_model_dir
,
fluid_model_program_path
,
fluid_model_param_path
);
}
return
true
;
}
bool
Finalize
()
override
{
return
true
;
}
void
Run
(
DataFlowGraph
*
graph
)
override
{
// Call all the IR Passes
IRPassManager
ir_passes
(
argument_
->
Get
<
ProgramDesc
>
(
"ir_program_desc"
),
nullptr
);
// Pass the scope from analysis to IR if needed.
if
(
argument_
->
Has
(
framework
::
ir
::
kParamScopeAttr
))
{
// Here the address is passed, attention that IR doesn't own the scope, so
// the real scope in analysis should live during the IR phase.
ir_passes
.
graph
().
Set
(
framework
::
ir
::
kParamScopeAttr
,
new
framework
::
Scope
*
(
&
argument_
->
Get
<
framework
::
Scope
>
(
framework
::
ir
::
kParamScopeAttr
)));
}
if
(
FLAGS_IA_enable_ir
)
{
const
auto
&
ir_passes_to_apply
=
argument_
->
Get
<
std
::
vector
<
std
::
string
>>
(
kFluidToIrPassesAttr
);
ir_passes
.
Apply
(
ir_passes_to_apply
);
}
PADDLE_ENFORCE
(
argument_
->
main_dfg
.
get
());
argument_
->
main_dfg
->
Build
(
ir_passes
.
graph
());
// inherit the arguments from ir.
if
(
ir_passes
.
graph
().
Has
(
framework
::
ir
::
kFuseStatisAttr
))
{
argument_
->
Set
(
framework
::
ir
::
kFuseStatisAttr
,
new
std
::
unordered_map
<
std
::
string
,
int
>
(
ir_passes
.
graph
().
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
framework
::
ir
::
kFuseStatisAttr
)));
}
}
void
EnableParamModify
(
const
std
::
string
&
model_dir
,
const
std
::
string
&
prog_file
,
const
std
::
string
&
param_file
);
std
::
string
repr
()
const
override
{
return
"fluid-to-ir-pass"
;
}
private:
// Load parameters from a single file or from a directory.
bool
LoadParams
(
framework
::
Scope
*
scope
,
const
std
::
string
&
dir
,
const
std
::
string
&
prog_file
,
const
std
::
string
&
param_file
);
private:
Argument
*
argument_
{
nullptr
};
};
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/graph_traits.cc
已删除
100644 → 0
浏览文件 @
0b962680
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/graph_traits.h"
paddle/fluid/inference/analysis/graph_traits.h
已删除
100644 → 0
浏览文件 @
0b962680
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file defines the GraphTraits<X> template class that should be specified
* by classes that want to be iteratable by generic graph iterators.
*
* This file also defines the marker class Inverse that is used to iterate over
* graphs in a graph defined, inverse ordering...
*/
#pragma once
#include "paddle/fluid/inference/analysis/helper.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
/*
* This class should be specialized by different graph types...
* That's why the base class is empty.
*/
template
<
typename
GraphType
>
struct
GraphTraits
{
// using NodesBFSIterator = xxx
// NodesBFSIterator nodes_begin();
// NodesBFSIterator nodes_end();
};
/*
* Inverse - This class is used as a marker class to tell the graph iterator to
* iterate in a graph defined Inverse order.
*/
template
<
typename
GraphType
>
struct
Inverse
{
const
GraphType
&
graph
;
explicit
Inverse
(
const
GraphType
&
graph
)
:
graph
(
graph
)
{}
};
/*
* Provide a partial specialization of GraphTraits so that the inverse of an
* inverse turns into the original graph.
*/
template
<
typename
GraphType
>
struct
GraphTraits
<
Inverse
<
Inverse
<
GraphType
>>>
:
GraphTraits
<
GraphType
>
{};
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/helper.h
浏览文件 @
ddb12035
...
@@ -101,20 +101,20 @@ class OrderedRegistry {
...
@@ -101,20 +101,20 @@ class OrderedRegistry {
public:
public:
T
*
Register
(
const
std
::
string
&
name
,
T
*
x
)
{
T
*
Register
(
const
std
::
string
&
name
,
T
*
x
)
{
PADDLE_ENFORCE
(
!
dic_
.
count
(
name
),
"duplicate key [%s]"
,
name
);
PADDLE_ENFORCE
(
!
dic_
.
count
(
name
),
"duplicate key [%s]"
,
name
);
dic_
[
name
]
=
data
_
.
size
();
dic_
[
name
]
=
elements
_
.
size
();
data
_
.
emplace_back
(
std
::
unique_ptr
<
T
>
(
x
));
elements
_
.
emplace_back
(
std
::
unique_ptr
<
T
>
(
x
));
return
data
_
.
back
().
get
();
return
elements
_
.
back
().
get
();
}
}
T
*
Lookup
(
const
std
::
string
&
name
)
{
T
*
Lookup
(
const
std
::
string
&
name
)
{
auto
it
=
dic_
.
find
(
name
);
auto
it
=
dic_
.
find
(
name
);
if
(
it
==
dic_
.
end
())
return
nullptr
;
if
(
it
==
dic_
.
end
())
return
nullptr
;
return
data
_
[
it
->
second
].
get
();
return
elements
_
[
it
->
second
].
get
();
}
}
protected:
protected:
std
::
unordered_map
<
std
::
string
,
int
>
dic_
;
std
::
unordered_map
<
std
::
string
,
int
>
dic_
;
std
::
vector
<
std
::
unique_ptr
<
T
>>
data
_
;
std
::
vector
<
std
::
unique_ptr
<
T
>>
elements
_
;
};
};
template
<
typename
T
>
template
<
typename
T
>
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
ddb12035
...
@@ -18,6 +18,8 @@
...
@@ -18,6 +18,8 @@
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/analysis/argument.h"
#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/fluid/string/pretty_log.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -27,21 +29,33 @@ using string::PrettyLogEndl;
...
@@ -27,21 +29,33 @@ using string::PrettyLogEndl;
using
string
::
PrettyLog
;
using
string
::
PrettyLog
;
using
string
::
Style
;
using
string
::
Style
;
IRPassManager
::
IRPassManager
(
const
ProgramDesc
&
program
,
IRPassManager
::
IRPassManager
(
Argument
*
argument
)
{
framework
::
Scope
*
scope
)
ARGUMENT_CHECK_FIELD
(
argument
,
main_program
);
:
program_
(
program
)
{
graph_
=
std
::
unique_ptr
<
Graph
>
(
new
Graph
(
argument
->
main_program
()));
graph_
.
reset
(
new
framework
::
ir
::
Graph
(
program
));
if
(
argument
->
Has
(
"scope"
))
{
if
(
scope
)
graph_
->
Set
(
framework
::
ir
::
kParamScopeAttr
,
graph_
->
Set
(
framework
::
ir
::
kParamScopeAttr
,
new
framework
::
Scope
*
(
scope
));
new
framework
::
Scope
*
(
const_cast
<
framework
::
Scope
*>
(
&
argument
->
scope
())));
}
ARGUMENT_CHECK_FIELD
(
argument
,
ir_analysis_passes
);
CreatePasses
(
argument
,
argument
->
ir_analysis_passes
());
}
}
void
IRPassManager
::
Apply
(
const
std
::
vector
<
std
::
string
>
&
passes
)
{
void
IRPassManager
::
CreatePasses
(
Argument
*
argument
,
// Apply all the passes
const
std
::
vector
<
std
::
string
>
&
passes
)
{
std
::
string
pre_pass
;
std
::
string
pre_pass
;
int
pass_num
=
0
;
int
pass_num
=
0
;
for
(
const
std
::
string
&
pass_name
:
passes
)
{
for
(
const
std
::
string
&
pass_name
:
passes
)
{
PrettyLogEndl
(
Style
::
H2
(),
"--- Running IR pass [%s]"
,
pass_name
);
auto
pass
=
framework
::
ir
::
PassRegistry
::
Instance
().
Get
(
pass_name
);
auto
pass
=
framework
::
ir
::
PassRegistry
::
Instance
().
Get
(
pass_name
);
// Set some pass attributes.
if
(
pass_name
==
"ir_analysis_pass"
)
{
pass
->
Set
(
"tensorrt_node_teller"
,
new
SubgraphDetector
::
NodeInsideSubgraphTeller
(
argument
->
tensorrt_node_teller
()));
}
if
(
pass_name
==
"graph_viz_pass"
)
{
if
(
pass_name
==
"graph_viz_pass"
)
{
std
::
string
dot_file_path
=
std
::
to_string
(
pass_num
)
+
"_ir_"
+
std
::
string
dot_file_path
=
std
::
to_string
(
pass_num
)
+
"_ir_"
+
(
pre_pass
.
empty
()
?
"origin"
:
pre_pass
)
+
(
pre_pass
.
empty
()
?
"origin"
:
pre_pass
)
+
...
@@ -49,11 +63,47 @@ void IRPassManager::Apply(const std::vector<std::string> &passes) {
...
@@ -49,11 +63,47 @@ void IRPassManager::Apply(const std::vector<std::string> &passes) {
pass
->
Set
(
"graph_viz_path"
,
new
std
::
string
(
std
::
move
(
dot_file_path
)));
pass
->
Set
(
"graph_viz_path"
,
new
std
::
string
(
std
::
move
(
dot_file_path
)));
pass_num
++
;
pass_num
++
;
}
}
graph_
=
pass
->
Apply
(
std
::
move
(
graph_
));
if
(
pass_name
==
"tensorrt_subgraph_pass"
)
{
PADDLE_ENFORCE
(
argument
->
tensorrt_node_teller_valid
());
pass
->
SetNotOwned
(
"tensorrt_node_teller"
,
argument
->
tensorrt_node_teller_ptr
());
pass
->
Set
(
"workspace_size"
,
new
int
(
argument
->
tensorrt_workspace_size
()));
pass
->
Set
(
"max_batch_size"
,
new
int
(
argument
->
tensorrt_max_batch_size
()));
}
// graph_ = pass->Apply(std::move(graph_));
pre_pass
=
pass_name
;
pre_pass
=
pass_name
;
passes_
.
emplace_back
(
std
::
move
(
pass
));
}
}
}
}
std
::
unique_ptr
<
Graph
>
IRPassManager
::
Apply
(
std
::
unique_ptr
<
Graph
>
graph
)
{
if
(
passes_
.
empty
())
{
return
graph
;
}
PADDLE_ENFORCE
(
graph
.
get
());
// Apply all the passes
for
(
const
auto
&
pass
:
passes_
)
{
PrettyLogEndl
(
Style
::
H2
(),
"--- Running IR pass [%s]"
,
pass
->
Type
());
graph
=
pass
->
Apply
(
std
::
move
(
graph
));
}
return
std
::
move
(
graph
);
}
framework
::
proto
::
ProgramDesc
IRPassManager
::
AcquireProgram
(
std
::
unique_ptr
<
Graph
>
*
graph
,
const
ProgramDesc
&
program
)
const
{
auto
pass
=
framework
::
ir
::
PassRegistry
::
Instance
().
Get
(
"graph_to_program_pass"
);
ProgramDesc
desc
(
program
);
pass
->
SetNotOwned
(
"program"
,
&
desc
);
auto
*
the_graph
=
graph
->
release
();
*
graph
=
pass
->
Apply
(
std
::
unique_ptr
<
Graph
>
(
the_graph
));
return
*
desc
.
Proto
();
}
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/analysis/ir_pass_manager.h
浏览文件 @
ddb12035
...
@@ -20,27 +20,38 @@
...
@@ -20,27 +20,38 @@
* for inference.
* for inference.
*/
*/
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/analysis/argument.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
analysis
{
namespace
analysis
{
using
framework
::
ProgramDesc
;
using
framework
::
ProgramDesc
;
using
framework
::
ir
::
Graph
;
class
IRPassManager
final
{
class
IRPassManager
final
{
public:
public:
IRPassManager
(
const
ProgramDesc
&
program
,
framework
::
Scope
*
scope
);
explicit
IRPassManager
(
Argument
*
argument
);
std
::
unique_ptr
<
Graph
>
Apply
(
std
::
unique_ptr
<
Graph
>
graph
);
void
Apply
(
const
std
::
vector
<
std
::
string
>
&
passes
);
framework
::
proto
::
ProgramDesc
AcquireProgram
(
std
::
unique_ptr
<
Graph
>
*
graph
,
const
ProgramDesc
&
program
)
const
;
framework
::
ir
::
Graph
&
graph
()
const
{
return
*
graph_
;
}
framework
::
ir
::
Graph
&
graph
()
const
{
return
*
graph_
;
}
private:
private:
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
graph_
;
void
CreatePasses
(
Argument
*
argument
,
const
std
::
vector
<
std
::
string
>
&
passes
);
ProgramDesc
program_
;
std
::
unique_ptr
<
Graph
>
graph_
;
std
::
vector
<
std
::
unique_ptr
<
framework
::
ir
::
Pass
>>
passes_
;
};
};
}
// namespace analysis
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
0 → 100644
浏览文件 @
ddb12035
cc_library
(
subgraph_detector SRCS subgraph_detector.cc DEPS proto_desc
)
cc_library
(
tensorrt_subgraph_pass SRCS tensorrt_subgraph_pass.cc DEPS subgraph_detector
)
set
(
analysis_deps
${
analysis_deps
}
subgraph_detector tensorrt_subgraph_pass
CACHE INTERNAL
""
)
set
(
INFER_IR_PASSES
${
INFER_IR_PASSES
}
tensorrt_subgraph_pass CACHE INTERNAL
""
)
paddle/fluid/inference/analysis/
subgraph_splitte
r.cc
→
paddle/fluid/inference/analysis/
ir_passes/subgraph_detecto
r.cc
浏览文件 @
ddb12035
...
@@ -12,46 +12,110 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,46 +12,110 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/inference/analysis/subgraph_splitter.h"
#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h"
#include <string>
#include <utility>
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/node.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
analysis
{
namespace
analysis
{
const
char
*
SubGraphSplitter
::
kMarkerAttrName
=
using
framework
::
ir
::
Node
;
"_sub_graph_splitter_inside_sub_graph"
;
std
::
vector
<
std
::
vector
<
Node
*>>
SubGraphSplitter
::
operator
()()
{
std
::
pair
<
std
::
vector
<
Node
*>
,
std
::
vector
<
Node
*>>
ExtractInputAndOutputOfSubGraph
(
std
::
vector
<
Node
*>
&
graph
)
{
// NOLINT
std
::
unordered_set
<
Node
*>
nodes
(
graph
.
begin
(),
graph
.
end
());
std
::
unordered_set
<
Node
*>
inputs
;
std
::
unordered_set
<
Node
*>
outputs
;
// Input a Value, check whether its inlink is in the subgraph.
auto
inlink_in_subgraph
=
[
&
](
Node
*
n
)
{
for
(
auto
*
in
:
n
->
inputs
)
{
if
(
nodes
.
count
(
in
))
return
true
;
}
return
false
;
};
for
(
auto
&
node
:
graph
)
{
for
(
auto
*
in
:
node
->
inputs
)
{
// The Value that is written by nodes inside a sub-graph shouldn't be the
// input of the sub-graph.
if
(
!
nodes
.
count
(
in
)
&&
in
->
IsVar
()
&&
!
inlink_in_subgraph
(
in
))
{
inputs
.
insert
(
in
);
}
}
for
(
auto
*
out
:
node
->
outputs
)
{
if
(
!
nodes
.
count
(
out
)
&&
out
->
IsVar
())
{
outputs
.
insert
(
out
);
}
}
}
return
std
::
make_pair
(
std
::
vector
<
Node
*>
(
inputs
.
begin
(),
inputs
.
end
()),
std
::
vector
<
Node
*>
(
outputs
.
begin
(),
outputs
.
end
()));
}
// Filter the Intermediate results of the subgraph node.
void
FilterRedundantOutputOfSubGraph
(
Graph
*
graph
)
{
std
::
vector
<
Node
*>
op_nodes
;
for
(
auto
&
node
:
TopologicalSort
(
*
graph
))
{
if
(
node
.
IsVar
()
||
Agent
(
&
node
).
deleted
())
{
continue
;
}
op_nodes
.
push_back
(
&
node
);
}
size_t
op_num
=
op_nodes
.
size
();
for
(
size_t
i
=
0
;
i
<
op_num
;
i
++
)
{
if
(
op_nodes
[
i
]
->
IsOp
())
continue
;
std
::
unordered_set
<
std
::
string
>
follow_up_input_names
;
for
(
size_t
j
=
i
+
1
;
j
<
op_num
;
j
++
)
{
for
(
auto
*
in
:
op_nodes
[
j
]
->
inputs
)
{
follow_up_input_names
.
insert
(
in
->
Name
());
}
}
std
::
vector
<
Node
*>
filtered_subgraph_outlinks
;
for
(
auto
*
out
:
op_nodes
[
i
]
->
outputs
)
{
if
(
follow_up_input_names
.
count
(
out
->
Name
()))
{
filtered_subgraph_outlinks
.
push_back
(
out
);
}
else
{
Agent
(
out
).
set_deleted
(
true
);
}
}
// The filtered_subgraph_outlinks may be empty.
op_nodes
[
i
]
->
outputs
=
filtered_subgraph_outlinks
;
}
}
std
::
vector
<
std
::
vector
<
Node
*>>
SubgraphDetector
::
operator
()()
{
MarkNodesInsideSubGraph
();
MarkNodesInsideSubGraph
();
return
ExtractSubGraphs
();
return
ExtractSubGraphs
();
}
}
// Mark the output variables inside a subgraph with the func.
// Mark the output variables inside a subgraph with the func.
inline
void
MarkOutLinksInSubGraph
(
const
Function
*
func
)
{
inline
void
MarkOutLinksInSubGraph
(
const
Node
*
func
)
{
for
(
auto
*
var
:
func
->
out
link
s
)
{
for
(
auto
*
var
:
func
->
out
put
s
)
{
var
->
attr
(
SubGraphSplitter
::
kMarkerAttrName
).
Bool
()
=
true
;
Agent
(
var
).
set_marked
(
true
)
;
}
}
}
}
void
Sub
GraphSplitte
r
::
MarkNodesInsideSubGraph
()
{
void
Sub
graphDetecto
r
::
MarkNodesInsideSubGraph
()
{
for
(
auto
&
node
:
GraphTraits
<
DataFlowGraph
>
(
*
graph_
).
nodes
(
))
{
for
(
auto
&
node
:
framework
::
ir
::
GraphTraits
::
DFS
(
*
graph_
))
{
if
(
node_inside_subgraph_teller_
(
&
node
))
{
if
(
node_inside_subgraph_teller_
(
&
node
))
{
node
.
attr
(
kMarkerAttrName
).
Bool
()
=
true
;
Agent
(
&
node
).
set_marked
(
true
)
;
if
(
node
.
type
()
==
Node
::
Type
::
kFunction
)
{
if
(
node
.
IsOp
()
)
{
// If a function is inside the sub-graph, mark all the output variables
// If a function is inside the sub-graph, mark all the output variables
// to be inside too, so that two marked functions will be inside a same
// to be inside too, so that two marked functions will be inside a same
// sub-graph, lets take a example: A_function->var->B_function, if
// sub-graph, lets take a example: A_function->var->B_function, if
// A_function is marked, var should also be marked, so that B_function
// A_function is marked, var should also be marked, so that B_function
// will be in the same sub-graph with A_function if B_function is
// will be in the same sub-graph with A_function if B_function is
// marked.
// marked.
MarkOutLinksInSubGraph
(
static_cast
<
const
Function
*>
(
&
node
)
);
MarkOutLinksInSubGraph
(
&
node
);
}
}
}
}
}
}
}
}
const
char
*
kUnionFindParent
=
"_sub_graph_splitter_union_find_parent_"
;
// Use the Union Find(UF) algorithm to find fully connected sub-graphs, if node
// Use the Union Find(UF) algorithm to find fully connected sub-graphs, if node
// a's output is node b, that is a and b is in the same sub-graph. The UF
// a's output is node b, that is a and b is in the same sub-graph. The UF
// algorithm will group them to the same cluster.
// algorithm will group them to the same cluster.
...
@@ -60,8 +124,8 @@ using node_map_t = std::unordered_map<int, Node *>;
...
@@ -60,8 +124,8 @@ using node_map_t = std::unordered_map<int, Node *>;
int
UnionFindGetAncestor
(
const
node_map_t
&
node_map
,
size_t
id
)
{
int
UnionFindGetAncestor
(
const
node_map_t
&
node_map
,
size_t
id
)
{
int
tmp
=
id
;
int
tmp
=
id
;
do
{
do
{
tmp
=
node_map
.
at
(
tmp
)
->
attr
(
kUnionFindParent
).
Int32
();
tmp
=
Agent
(
node_map
.
at
(
tmp
)).
union_find_parent
();
}
while
(
node_map
.
at
(
tmp
)
->
attr
(
kUnionFindParent
).
Int32
()
!=
tmp
);
}
while
(
Agent
(
node_map
.
at
(
tmp
)).
union_find_parent
()
!=
tmp
);
return
tmp
;
return
tmp
;
}
}
// Make this two node share the same ancestor.
// Make this two node share the same ancestor.
...
@@ -69,9 +133,9 @@ int UnionFindGetAncestor(const node_map_t &node_map, size_t id) {
...
@@ -69,9 +133,9 @@ int UnionFindGetAncestor(const node_map_t &node_map, size_t id) {
void
UnionFindCombine
(
const
node_map_t
&
node_map
,
size_t
a
,
size_t
b
)
{
void
UnionFindCombine
(
const
node_map_t
&
node_map
,
size_t
a
,
size_t
b
)
{
int
a_ancestor
=
UnionFindGetAncestor
(
node_map
,
a
);
int
a_ancestor
=
UnionFindGetAncestor
(
node_map
,
a
);
int
b_ancestor
=
UnionFindGetAncestor
(
node_map
,
b
);
int
b_ancestor
=
UnionFindGetAncestor
(
node_map
,
b
);
node_map
.
at
(
b_ancestor
)
->
attr
(
kUnionFindParent
).
Int32
()
=
a_ancestor
;
Agent
(
node_map
.
at
(
b_ancestor
)).
set_union_find_parent
(
a_ancestor
)
;
node_map
.
at
(
a
)
->
attr
(
kUnionFindParent
).
Int32
()
=
a_ancestor
;
Agent
(
node_map
.
at
(
a
)).
set_union_find_parent
(
a_ancestor
)
;
node_map
.
at
(
b
)
->
attr
(
kUnionFindParent
).
Int32
()
=
a_ancestor
;
Agent
(
node_map
.
at
(
b
)).
set_union_find_parent
(
a_ancestor
)
;
}
}
// This is a simple representation of a graph.
// This is a simple representation of a graph.
...
@@ -195,16 +259,21 @@ void FlexibleDFS(const std::vector<BriefNode *> &source, bool reverse,
...
@@ -195,16 +259,21 @@ void FlexibleDFS(const std::vector<BriefNode *> &source, bool reverse,
}
}
}
}
std
::
vector
<
std
::
vector
<
Node
*>>
Sub
GraphSplitte
r
::
ExtractSubGraphs
()
{
std
::
vector
<
std
::
vector
<
Node
*>>
Sub
graphDetecto
r
::
ExtractSubGraphs
()
{
// Run the Extract algorithm to find all subgraphs.
// Run the Extract algorithm to find all subgraphs.
std
::
vector
<
Node
*>
marked_nodes
;
std
::
vector
<
Node
*>
marked_nodes
;
// We use brief_node_map to represent the original graph in order to avoid
// We use brief_node_map to represent the original graph in order to avoid
// changing the original graph.
// changing the original graph.
std
::
unordered_map
<
int
,
BriefNode
*>
brief_node_map
;
std
::
unordered_map
<
int
,
BriefNode
*>
brief_node_map
;
for
(
auto
&
node
:
GraphTraits
<
DataFlowGraph
>
(
*
graph_
).
nodes_in_TS
())
{
std
::
unordered_set
<
int32_t
>
valid_node_ids
;
for
(
auto
*
node
:
graph_
->
Nodes
())
{
valid_node_ids
.
insert
(
node
->
id
());
}
for
(
auto
&
node
:
framework
::
ir
::
GraphTraits
::
TS
(
*
graph_
))
{
brief_node_map
[
node
.
id
()]
=
new
BriefNode
(
&
node
);
brief_node_map
[
node
.
id
()]
=
new
BriefNode
(
&
node
);
if
(
node
.
attr
(
kMarkerAttrName
).
Bool
())
{
if
(
Agent
(
&
node
).
marked
())
{
marked_nodes
.
push_back
(
&
node
);
marked_nodes
.
push_back
(
&
node
);
}
}
}
}
...
@@ -213,26 +282,34 @@ std::vector<std::vector<Node *>> SubGraphSplitter::ExtractSubGraphs() {
...
@@ -213,26 +282,34 @@ std::vector<std::vector<Node *>> SubGraphSplitter::ExtractSubGraphs() {
node_map_t
node_map
;
// id to ptr
node_map_t
node_map
;
// id to ptr
for
(
auto
*
n
:
marked_nodes
)
{
for
(
auto
*
n
:
marked_nodes
)
{
// n's parent == n.id means it is the ancestor
// n's parent == n.id means it is the ancestor
n
->
attr
(
kUnionFindParent
).
Int32
()
=
n
->
id
(
);
Agent
(
n
).
set_union_find_parent
(
n
->
id
()
);
node_map
[
n
->
id
()]
=
n
;
node_map
[
n
->
id
()]
=
n
;
}
}
// create breif node map
// create breif node map
for
(
auto
&
itr
:
brief_node_map
)
{
for
(
auto
&
itr
:
brief_node_map
)
{
for
(
Node
*
node
:
itr
.
second
->
node
->
inlinks
)
{
for
(
Node
*
node
:
itr
.
second
->
node
->
inputs
)
{
itr
.
second
->
inlinks
.
push_back
(
brief_node_map
[
node
->
id
()]);
if
(
!
valid_node_ids
.
count
(
node
->
id
()))
{
LOG
(
INFO
)
<<
"invalid node id "
<<
node
->
id
();
continue
;
}
itr
.
second
->
inlinks
.
push_back
(
brief_node_map
.
at
(
node
->
id
()));
}
}
for
(
Node
*
node
:
itr
.
second
->
node
->
outlinks
)
{
for
(
Node
*
node
:
itr
.
second
->
node
->
outputs
)
{
itr
.
second
->
outlinks
.
push_back
(
brief_node_map
[
node
->
id
()]);
if
(
!
valid_node_ids
.
count
(
node
->
id
()))
{
LOG
(
INFO
)
<<
"invalid node id "
<<
node
->
id
();
continue
;
}
itr
.
second
->
outlinks
.
push_back
(
brief_node_map
.
at
(
node
->
id
()));
}
}
}
}
for
(
auto
&
itr
:
brief_node_map
)
{
for
(
auto
&
itr
:
brief_node_map
)
{
BriefNode
*
brief_node
=
itr
.
second
;
BriefNode
*
brief_node
=
itr
.
second
;
if
(
!
brief_node
->
node
->
attr
(
kMarkerAttrName
).
Bool
())
{
if
(
!
Agent
(
brief_node
->
node
).
marked
())
{
VLOG
(
4
0
)
<<
brief_node
->
node
->
id
()
<<
" node not a trt candic
ate."
;
VLOG
(
4
)
<<
brief_node
->
node
->
id
()
<<
" node not a trt candid
ate."
;
continue
;
continue
;
}
}
...
@@ -254,7 +331,7 @@ std::vector<std::vector<Node *>> SubGraphSplitter::ExtractSubGraphs() {
...
@@ -254,7 +331,7 @@ std::vector<std::vector<Node *>> SubGraphSplitter::ExtractSubGraphs() {
std
::
unordered_set
<
BriefNode
*>
contract_nodes
;
std
::
unordered_set
<
BriefNode
*>
contract_nodes
;
for
(
auto
*
out
:
brief_node
->
outlinks
)
{
for
(
auto
*
out
:
brief_node
->
outlinks
)
{
// must be an trt candidate
// must be an trt candidate
if
(
!
out
->
node
->
attr
(
kMarkerAttrName
).
Bool
())
continue
;
if
(
!
Agent
(
out
->
node
).
marked
())
continue
;
// get all dst input nodes except src.
// get all dst input nodes except src.
std
::
vector
<
BriefNode
*>
source_nodes
;
std
::
vector
<
BriefNode
*>
source_nodes
;
for
(
auto
*
n
:
out
->
inlinks
)
{
for
(
auto
*
n
:
out
->
inlinks
)
{
...
@@ -289,9 +366,8 @@ std::vector<std::vector<Node *>> SubGraphSplitter::ExtractSubGraphs() {
...
@@ -289,9 +366,8 @@ std::vector<std::vector<Node *>> SubGraphSplitter::ExtractSubGraphs() {
std
::
unordered_map
<
int
/*ancestor*/
,
std
::
vector
<
Node
*>>
clusters
;
std
::
unordered_map
<
int
/*ancestor*/
,
std
::
vector
<
Node
*>>
clusters
;
for
(
auto
*
n
:
marked_nodes
)
{
for
(
auto
*
n
:
marked_nodes
)
{
if
(
n
->
type
()
==
Node
::
Type
::
kFunction
)
{
if
(
n
->
IsOp
())
{
clusters
[
UnionFindGetAncestor
(
node_map
,
clusters
[
UnionFindGetAncestor
(
node_map
,
Agent
(
n
).
union_find_parent
())]
n
->
attr
(
kUnionFindParent
).
Int32
())]
.
push_back
(
n
);
.
push_back
(
n
);
}
}
}
}
...
@@ -304,28 +380,59 @@ std::vector<std::vector<Node *>> SubGraphSplitter::ExtractSubGraphs() {
...
@@ -304,28 +380,59 @@ std::vector<std::vector<Node *>> SubGraphSplitter::ExtractSubGraphs() {
return
result
;
return
result
;
}
}
void
SubGraphFuse
::
operator
()()
{
ReplaceNodesWithSubGraphs
();
}
void
SubGraphFuser
::
operator
()()
{
ReplaceNodesWithSubGraphs
();
}
void
RemoveIntermediateOutputInSubgraph
(
const
std
::
vector
<
Node
*>
&
subgraph
,
Graph
*
graph
,
std
::
vector
<
Node
*>
*
outputs
)
{
std
::
unordered_set
<
Node
*>
subgraph_set
(
subgraph
.
begin
(),
subgraph
.
end
());
std
::
unordered_set
<
Node
*>
valid_output
;
for
(
auto
*
output
:
*
outputs
)
{
int
num_used
=
0
;
for
(
auto
*
node
:
output
->
outputs
)
{
if
(
!
subgraph_set
.
count
(
node
))
++
num_used
;
if
(
num_used
>
0
)
valid_output
.
insert
(
output
);
}
}
outputs
->
assign
(
valid_output
.
begin
(),
valid_output
.
end
());
}
void
DetachDeletedNodes
(
framework
::
ir
::
Graph
*
graph
)
{
std
::
unordered_set
<
const
Node
*>
nodes
;
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
Agent
(
node
).
deleted
())
{
node
->
inputs
.
clear
();
node
->
outputs
.
clear
();
}
}
}
void
SubGraphFuse
::
ReplaceNodesWithSubGraphs
()
{
void
SubGraphFuse
r
::
ReplaceNodesWithSubGraphs
()
{
auto
subgraphs
=
Sub
GraphSplitte
r
(
graph_
,
node_inside_subgraph_teller_
)();
auto
subgraphs
=
Sub
graphDetecto
r
(
graph_
,
node_inside_subgraph_teller_
)();
for
(
auto
&
subgraph
:
subgraphs
)
{
for
(
auto
&
subgraph
:
subgraphs
)
{
if
(
subgraph
.
size
()
<=
argument_
->
Get
<
int
>
(
"minimum_subgraph_size"
))
if
(
subgraph
.
size
()
<=
min_subgraph_size_
)
continue
;
continue
;
LOG
(
INFO
)
<<
"detect a subgraph size "
<<
subgraph
.
size
()
;
std
::
unordered_set
<
Node
*>
subgraph_uniq
(
subgraph
.
begin
(),
subgraph
.
end
());
std
::
unordered_set
<
Node
*>
subgraph_uniq
(
subgraph
.
begin
(),
subgraph
.
end
());
// replace this sub-graph with the first node. Two steps: 1. Create a Block
// replace this sub-graph with the first node. Two steps: 1. Create a Block
// Node that contains this subgraph 2. Mark the nodes inside the sub-graph
// Node that contains this subgraph 2. Mark the nodes inside the sub-graph
// as deleted. 3. Replace the deleted node with the new Block Node.
// as deleted. 3. Replace the deleted node with the new Block Node.
auto
*
block_node
=
static_cast
<
FunctionBlock
*>
(
framework
::
OpDesc
empty_desc
;
graph_
->
nodes
.
Create
(
Node
::
Type
::
kFunctionBlock
));
empty_desc
.
SetType
(
"tensorrt_engine"
);
auto
*
block_node
=
graph_
->
CreateOpNode
(
&
empty_desc
);
Agent
(
block_node
).
set_subgraph
({});
auto
io
=
ExtractInputAndOutputOfSubGraph
(
subgraph
);
auto
io
=
ExtractInputAndOutputOfSubGraph
(
subgraph
);
block_node
->
inlinks
=
std
::
move
(
io
.
first
);
block_node
->
inputs
=
std
::
move
(
io
.
first
);
block_node
->
outlinks
=
std
::
move
(
io
.
second
);
block_node
->
outputs
=
std
::
move
(
io
.
second
);
RemoveIntermediateOutputInSubgraph
(
subgraph
,
graph_
,
&
block_node
->
outputs
);
for
(
auto
*
node
:
subgraph
)
{
for
(
auto
*
node
:
subgraph
)
{
// TODO(Superjomn) need a unified mechanism to treat deleted node in each
// TODO(Superjomn) need a unified mechanism to treat deleted node in each
// pass.
// pass.
node
->
SetDeleted
(
);
Agent
(
node
).
set_deleted
(
true
);
block_node
->
subgraph
.
push_back
(
node
);
Agent
(
block_node
).
subgraph
()
->
push_back
(
node
);
}
}
// Change all the sub-graph's inputs and outputs corresponding inlink and
// Change all the sub-graph's inputs and outputs corresponding inlink and
...
@@ -339,16 +446,92 @@ void SubGraphFuse::ReplaceNodesWithSubGraphs() {
...
@@ -339,16 +446,92 @@ void SubGraphFuse::ReplaceNodesWithSubGraphs() {
std
::
unordered_set
<
Node
*>
uniq
(
nodes
.
begin
(),
nodes
.
end
());
std
::
unordered_set
<
Node
*>
uniq
(
nodes
.
begin
(),
nodes
.
end
());
nodes
.
assign
(
uniq
.
begin
(),
uniq
.
end
());
nodes
.
assign
(
uniq
.
begin
(),
uniq
.
end
());
};
};
for
(
auto
*
i
:
block_node
->
in
link
s
)
{
for
(
auto
*
i
:
block_node
->
in
put
s
)
{
inlink_or_outlink_cleaner
(
i
->
out
link
s
);
inlink_or_outlink_cleaner
(
i
->
out
put
s
);
}
}
for
(
auto
*&
o
:
block_node
->
out
link
s
)
{
for
(
auto
*&
o
:
block_node
->
out
put
s
)
{
inlink_or_outlink_cleaner
(
o
->
in
link
s
);
inlink_or_outlink_cleaner
(
o
->
in
put
s
);
}
}
}
}
// DetachDeletedNodes(graph_);
FilterRedundantOutputOfSubGraph
(
graph_
);
FilterRedundantOutputOfSubGraph
(
graph_
);
}
}
inline
bool
CheckNodeIndegreeEquals
(
const
Node
&
node
,
size_t
n
)
{
return
node
.
inputs
.
size
()
==
n
;
}
NodesTSIterator
::
NodesTSIterator
(
const
std
::
vector
<
Node
*>
&
source
)
{
PADDLE_ENFORCE
(
!
source
.
empty
(),
"Start points of topological sorting should not be empty!"
);
// CHECK all the inputs' in-degree is 0
for
(
auto
*
node
:
source
)
{
PADDLE_ENFORCE
(
CheckNodeIndegreeEquals
(
*
node
,
0
));
}
std
::
unordered_set
<
Node
*>
visited
;
std
::
unordered_set
<
Node
*>
to_visit
{
source
.
begin
(),
source
.
end
()};
std
::
vector
<
Node
*>
inlink_visited
;
while
(
!
to_visit
.
empty
())
{
std
::
vector
<
Node
*>
queue
(
to_visit
.
begin
(),
to_visit
.
end
());
for
(
auto
*
p
:
queue
)
{
if
(
Agent
(
p
).
deleted
())
{
visited
.
insert
(
p
);
to_visit
.
erase
(
p
);
}
inlink_visited
.
clear
();
std
::
copy_if
(
p
->
inputs
.
begin
(),
p
->
inputs
.
end
(),
std
::
back_inserter
(
inlink_visited
),
[
&
](
Node
*
x
)
->
bool
{
return
visited
.
count
(
x
)
!=
0
;
});
if
(
inlink_visited
.
size
()
==
p
->
inputs
.
size
())
{
sorted_
.
push_back
(
p
);
for
(
auto
*
_
:
p
->
outputs
)
{
if
(
!
visited
.
count
(
_
))
{
to_visit
.
insert
(
_
);
}
}
to_visit
.
erase
(
p
);
visited
.
insert
(
p
);
}
}
}
}
NodesTSIterator
::
NodesTSIterator
(
const
NodesTSIterator
&
other
)
:
sorted_
(
other
.
sorted_
),
cursor_
(
other
.
cursor_
)
{}
Node
&
NodesTSIterator
::
operator
*
()
{
PADDLE_ENFORCE_LT
(
cursor_
,
sorted_
.
size
());
return
*
sorted_
[
cursor_
];
}
NodesTSIterator
&
NodesTSIterator
::
operator
++
()
{
if
(
++
cursor_
>=
sorted_
.
size
())
{
sorted_
.
clear
();
cursor_
=
0
;
}
return
*
this
;
}
NodesTSIterator
&
NodesTSIterator
::
operator
=
(
const
NodesTSIterator
&
other
)
{
cursor_
=
other
.
cursor_
;
sorted_
=
other
.
sorted_
;
return
*
this
;
}
bool
NodesTSIterator
::
operator
==
(
const
NodesTSIterator
&
other
)
{
return
sorted_
==
other
.
sorted_
&&
cursor_
==
other
.
cursor_
;
}
Node
*
NodesTSIterator
::
operator
->
()
{
PADDLE_ENFORCE_LT
(
cursor_
,
sorted_
.
size
());
return
sorted_
[
cursor_
];
}
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h
0 → 100644
浏览文件 @
ddb12035
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file defines the the class to partition a graph.
*/
#pragma once
#include <vector>
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_traits.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/inference/analysis/argument.h"
#include "paddle/fluid/inference/analysis/helper.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
using
framework
::
ir
::
Graph
;
const
char
kIsFunctionNode
[]
=
"__is_function_node__"
;
const
char
kFunctionNodeSubGraph
[]
=
"__function_node_sub_graph__"
;
const
char
kSubgraphSplitterMarkerAttrName
[]
=
"_sub_graph_splitter_inside_sub_graph"
;
/*
* Detect the nodes in a sub-graph that meet some conditions. This class doesn't
* modify the graph.
*/
class
SubgraphDetector
{
public:
// Tell whether a node is inside a sub-graph.
using
NodeInsideSubgraphTeller
=
std
::
function
<
bool
(
const
framework
::
ir
::
Node
*
)
>
;
SubgraphDetector
(
Graph
*
graph
,
const
NodeInsideSubgraphTeller
&
teller
)
:
graph_
(
graph
),
node_inside_subgraph_teller_
(
teller
)
{}
std
::
vector
<
std
::
vector
<
framework
::
ir
::
Node
*>>
operator
()();
protected:
// Mark the nodes inside the accepted sub-graph using
// node_inside_subgraph_teller.
void
MarkNodesInsideSubGraph
();
// Merge the marked nodes into sub-graphs and return the sub-graphs.
std
::
vector
<
std
::
vector
<
framework
::
ir
::
Node
*>>
ExtractSubGraphs
();
private:
Graph
*
graph_
;
NodeInsideSubgraphTeller
node_inside_subgraph_teller_
;
};
/*
* SubGraphFuser - Replace some nodes with the sub-graph node they are inside.
* To some extent, the TensorRT engine is just a fusion op for a model.
*/
class
SubGraphFuser
{
public:
using
NodeInsideSubgraphTeller
=
SubgraphDetector
::
NodeInsideSubgraphTeller
;
SubGraphFuser
(
Graph
*
graph
,
const
NodeInsideSubgraphTeller
&
teller
,
int
min_subgraph_size
)
:
graph_
(
graph
),
node_inside_subgraph_teller_
(
teller
),
min_subgraph_size_
{
min_subgraph_size
}
{}
// The main method which run all the logic.
void
operator
()();
protected:
// Remove the nodes inside sub-graphs and replace with the SubGraphNode.
void
ReplaceNodesWithSubGraphs
();
private:
Graph
*
graph_
;
NodeInsideSubgraphTeller
node_inside_subgraph_teller_
;
int
min_subgraph_size_
;
};
struct
NodeWrapper
{
bool
deleted
{
false
};
bool
marked
{
false
};
int
union_find_parent
{
-
1
};
std
::
vector
<
framework
::
ir
::
Node
*>
subgraph
;
};
/*
* ir::Node agent for subgraph detector.
*/
struct
Agent
{
explicit
Agent
(
framework
::
ir
::
Node
*
x
)
:
x_
(
x
)
{}
NodeWrapper
&
wrapper
()
{
if
(
!
x_
->
IsWrappedBy
<
NodeWrapper
>
())
{
x_
->
WrappedBy
<
NodeWrapper
>
(
new
NodeWrapper
);
}
return
x_
->
template
Wrapper
<
NodeWrapper
>();
}
bool
deleted
()
{
return
wrapper
().
deleted
;
}
void
set_deleted
(
bool
x
)
{
wrapper
().
deleted
=
x
;
}
bool
marked
()
{
return
wrapper
().
marked
;
}
void
set_marked
(
bool
x
)
{
wrapper
().
marked
=
x
;
}
void
set_subgraph
(
const
std
::
vector
<
framework
::
ir
::
Node
*>
&
x
)
{
wrapper
().
subgraph
=
x
;
}
int
union_find_parent
()
{
return
wrapper
().
union_find_parent
;
}
void
set_union_find_parent
(
int
v
)
{
wrapper
().
union_find_parent
=
v
;
}
std
::
vector
<
framework
::
ir
::
Node
*>
*
subgraph
()
{
return
&
wrapper
().
subgraph
;
}
std
::
vector
<
framework
::
ir
::
Node
*>
&
inputs
()
{
return
x_
->
inputs
;
}
std
::
vector
<
framework
::
ir
::
Node
*>
&
outputs
()
{
return
x_
->
outputs
;
}
private:
framework
::
ir
::
Node
*
x_
;
};
// Topological sorting iterator on nodes.
struct
NodesTSIterator
:
public
std
::
iterator
<
std
::
forward_iterator_tag
,
framework
::
ir
::
Node
*>
{
NodesTSIterator
()
=
default
;
explicit
NodesTSIterator
(
const
std
::
vector
<
framework
::
ir
::
Node
*>
&
source
);
NodesTSIterator
(
NodesTSIterator
&&
other
)
:
sorted_
(
std
::
move
(
other
.
sorted_
)),
cursor_
(
other
.
cursor_
)
{
other
.
cursor_
=
0
;
}
NodesTSIterator
(
const
NodesTSIterator
&
other
);
framework
::
ir
::
Node
&
operator
*
();
NodesTSIterator
&
operator
++
();
// TODO(Superjomn) current implementation just compare the first
// element, need to compare the graph and all the elements in the queue and
// set.
NodesTSIterator
&
operator
=
(
const
NodesTSIterator
&
other
);
bool
operator
==
(
const
NodesTSIterator
&
other
);
bool
operator
!=
(
const
NodesTSIterator
&
other
)
{
return
!
(
*
this
==
other
);
}
framework
::
ir
::
Node
*
operator
->
();
private:
std
::
vector
<
framework
::
ir
::
Node
*>
sorted_
;
size_t
cursor_
{
0
};
};
// The nodes those have no input will be treated as start points.
static
std
::
vector
<
framework
::
ir
::
Node
*>
ExtractStartPoints
(
const
Graph
&
g
)
{
std
::
vector
<
framework
::
ir
::
Node
*>
result
;
for
(
auto
*
node
:
g
.
Nodes
())
{
if
(
node
->
inputs
.
empty
())
{
result
.
push_back
(
node
);
}
}
return
result
;
}
static
iterator_range
<
NodesTSIterator
>
TopologicalSort
(
const
Graph
&
g
)
{
auto
start_points
=
ExtractStartPoints
(
g
);
PADDLE_ENFORCE
(
!
start_points
.
empty
());
NodesTSIterator
x
(
start_points
);
return
iterator_range
<
NodesTSIterator
>
(
NodesTSIterator
(
start_points
),
NodesTSIterator
());
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/
data_flow_graph_to_fluid
_pass.cc
→
paddle/fluid/inference/analysis/
ir_passes/tensorrt_subgraph
_pass.cc
浏览文件 @
ddb12035
...
@@ -12,120 +12,91 @@
...
@@ -12,120 +12,91 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
#include "paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h"
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h"
#include "paddle/fluid/framework/proto_desc.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
#include "paddle/fluid/inference/io.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
analysis
{
namespace
analysis
{
using
framework
::
proto
::
ProgramDesc
;
using
framework
::
ir
::
Node
;
std
::
vector
<
std
::
string
>
ExtractParameters
(
std
::
vector
<
std
::
string
>
ExtractParameters
(
const
std
::
vector
<
std
::
unique_ptr
<
Node
>
>
&
nodes
);
const
std
::
unordered_set
<
Node
*
>
&
nodes
);
bool
DataFlowGraphToFluidPass
::
Initialize
(
Argument
*
argument
)
{
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
analysis
::
TensorRtSubgraphPass
::
ApplyImpl
(
ANALYSIS_ARGUMENT_CHECK_FIELD
(
argument
)
ANALYSIS_ARGUMENT_CHECK_FIELD
(
argument
->
origin_program_desc
)
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
graph
)
const
{
// The transformed_program_desc should inherit all the VarDesc and BlockDesc
framework
::
ir
::
FusePassBase
::
Init
(
"tensorrt_subgraph_pass"
,
graph
.
get
());
// from the original program desc. The operators of the main block(the first
// block) should rewritten by data flow graph.
auto
teller
=
argument
->
transformed_program_desc
.
reset
(
Get
<
SubgraphDetector
::
NodeInsideSubgraphTeller
>
(
"tensorrt_node_teller"
);
new
ProgramDesc
(
*
argument
->
origin_program_desc
));
argument
->
transformed_program_desc
->
mutable_blocks
(
framework
::
kRootBlockIndex
)
->
clear_ops
();
desc_
=
argument
->
transformed_program_desc
.
get
();
argument_
=
argument
;
return
true
;
}
bool
DataFlowGraphToFluidPass
::
Finalize
()
{
return
true
;
}
SubGraphFuser
fuser
(
graph
.
get
(),
teller
,
2
/*min subgraph size*/
);
fuser
();
void
DataFlowGraphToFluidPass
::
Run
(
DataFlowGraph
*
graph
)
{
for
(
auto
*
node
:
graph
->
Nodes
())
{
// FilterRedundantOutputOfSubGraph(graph);
if
(
node
->
IsOp
()
&&
!
Agent
(
node
).
subgraph
()
->
empty
())
{
for
(
auto
&
node
:
GraphTraits
<
DataFlowGraph
>
(
*
graph
).
nodes_in_TS
())
{
CreateTensorRTOp
(
node
,
graph
.
get
());
if
(
node
.
deleted
())
continue
;
switch
(
node
.
type
())
{
std
::
unordered_set
<
const
Node
*>
nodes2remove
(
case
Node
::
Type
::
kFunction
:
{
Agent
(
node
).
subgraph
()
->
begin
(),
Agent
(
node
).
subgraph
()
->
end
());
AddFluidOp
(
&
node
);
framework
::
ir
::
GraphSafeRemoveNodes
(
graph
.
get
(),
nodes2remove
);
}
break
;
case
Node
::
Type
::
kFunctionBlock
:
{
AddEngineOp
(
&
node
);
}
break
;
default:
continue
;
}
}
}
}
if
(
argument_
->
Has
(
framework
::
ir
::
kParamScopeAttr
))
{
std
::
unordered_set
<
const
Node
*>
nodes2remove
;
LOG
(
WARNING
)
<<
"parameter changes in the scope takes effect"
;
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsOp
()
&&
Agent
(
node
).
deleted
())
{
nodes2remove
.
insert
(
node
);
}
}
}
framework
::
ir
::
GraphSafeRemoveNodes
(
graph
.
get
(),
nodes2remove
);
PADDLE_ENFORCE
(
argument_
->
transformed_program_desc
.
get
())
;
return
graph
;
}
}
void
DataFlowGraphToFluidPass
::
AddFluidOp
(
Node
*
node
)
{
void
TensorRtSubgraphPass
::
CreateTensorRTOp
(
framework
::
ir
::
Node
*
node
,
PADDLE_ENFORCE
(
node
);
Graph
*
graph
)
const
{
PADDLE_ENFORCE
(
node
->
IsFunction
());
auto
*
op_desc
=
node
->
Op
();
PADDLE_ENFORCE
(
node
->
pb_desc
()
||
!
node
->
pb_msg
().
empty
(),
static
int
counter
{
0
};
"node has invalid protobuf repr."
);
auto
&
subgraph
=
*
Agent
(
node
).
subgraph
();
PADDLE_ENFORCE
(
!
subgraph
.
empty
());
// currently only the main block is analyzed.
PADDLE_ENFORCE
(
desc_
);
auto
*
main_block
=
desc_
->
mutable_blocks
(
framework
::
kRootBlockIndex
);
auto
*
op
=
main_block
->
add_ops
();
if
(
node
->
pb_desc
())
{
// An fake block desc.
auto
*
ori_op
=
static_cast
<
framework
::
proto
::
OpDesc
*>
(
node
->
pb_desc
());
framework
::
proto
::
BlockDesc
block_proto
;
*
op
=
framework
::
BlockDesc
block_desc
(
nullptr
,
&
block_proto
);
*
ori_op
;
// copy the attributes, by default, these will not be changed
block_desc
.
Proto
()
->
set_parent_idx
(
-
1
);
// by analysis phrase.
block_desc
.
Proto
()
->
set_idx
(
0
);
// The inputs and outputs of the existing ops are not changed by tensorrt
for
(
auto
*
node
:
subgraph
)
{
// subgraph pass.
auto
*
op
=
block_desc
.
AppendOp
();
// NOTE It might be changed by other passes in the long run.
*
op
->
Proto
()
=
*
node
->
Op
()
->
Proto
();
}
else
{
op
->
ParseFromString
(
node
->
pb_msg
());
}
}
}
void
CreateTrtEngineOp
(
Node
*
node
,
Argument
*
argument
,
framework
::
proto
::
BlockDesc
*
block
)
{
PADDLE_ENFORCE
(
argument
->
main_dfg
.
get
());
const
DataFlowGraph
&
graph
=
*
(
argument
->
main_dfg
);
static
int
counter
{
0
};
PADDLE_ENFORCE
(
node
->
IsFunctionBlock
());
framework
::
OpDesc
desc
;
auto
*
func
=
static_cast
<
FunctionBlock
*>
(
node
);
// collect inputs
// collect inputs
std
::
unordered_set
<
std
::
string
>
input_names
;
std
::
unordered_set
<
std
::
string
>
input_names
;
std
::
unordered_set
<
std
::
string
>
input_names_with_id
;
std
::
unordered_set
<
std
::
string
>
input_names_with_id
;
for
(
auto
*
x
:
func
->
inlink
s
)
{
for
(
auto
*
x
:
node
->
input
s
)
{
input_names
.
insert
(
x
->
n
ame
());
input_names
.
insert
(
x
->
N
ame
());
input_names_with_id
.
insert
(
x
->
n
ame
()
+
std
::
to_string
(
x
->
id
()));
input_names_with_id
.
insert
(
x
->
N
ame
()
+
std
::
to_string
(
x
->
id
()));
}
}
desc
.
SetInput
(
op_desc
->
SetInput
(
"Xs"
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()));
"Xs"
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()));
std
::
unordered_set
<
std
::
string
>
output_names
;
std
::
unordered_set
<
std
::
string
>
output_names
;
std
::
unordered_set
<
std
::
string
>
output_names_with_id
;
std
::
unordered_set
<
std
::
string
>
output_names_with_id
;
for
(
auto
*
x
:
func
->
outlink
s
)
{
for
(
auto
*
x
:
node
->
output
s
)
{
output_names
.
insert
(
x
->
n
ame
());
output_names
.
insert
(
x
->
N
ame
());
output_names_with_id
.
insert
(
x
->
n
ame
()
+
std
::
to_string
(
x
->
id
()));
output_names_with_id
.
insert
(
x
->
N
ame
()
+
std
::
to_string
(
x
->
id
()));
}
}
desc
.
SetOutput
(
op_desc
->
SetOutput
(
"Ys"
,
std
::
vector
<
std
::
string
>
(
output_names
.
begin
(),
output_names
.
end
()));
"Ys"
,
std
::
vector
<
std
::
string
>
(
output_names
.
begin
(),
output_names
.
end
()));
desc
.
SetType
(
"tensorrt_engine"
);
op_desc
->
SetType
(
"tensorrt_engine"
);
std
::
unordered_map
<
std
::
string
,
std
::
string
>
output_name_map
;
std
::
unordered_map
<
std
::
string
,
std
::
string
>
output_name_map
;
...
@@ -134,7 +105,7 @@ void CreateTrtEngineOp(Node *node, Argument *argument,
...
@@ -134,7 +105,7 @@ void CreateTrtEngineOp(Node *node, Argument *argument,
// Why we do this?
// Why we do this?
// During the transition from fluid OP to tensorrt OP, we map
// During the transition from fluid OP to tensorrt OP, we map
// the input and output Tensor(fluid data structure) of fluid OP
// the input and output Tensor(fluid data structure) of fluid OP
// to the correspondin ITensor (trt data structure) through the
// to the correspondin
g
ITensor (trt data structure) through the
// Tensor name. When we set up ITensor for an variable, we must
// Tensor name. When we set up ITensor for an variable, we must
// ensure that it has not been set before.
// ensure that it has not been set before.
// If there is variable in the fluid graph, which is not only the
// If there is variable in the fluid graph, which is not only the
...
@@ -142,21 +113,22 @@ void CreateTrtEngineOp(Node *node, Argument *argument,
...
@@ -142,21 +113,22 @@ void CreateTrtEngineOp(Node *node, Argument *argument,
// So we have to rename the variable in the subgraph to make sure
// So we have to rename the variable in the subgraph to make sure
// it is either an OP's input or an OP's output.
// it is either an OP's input or an OP's output.
auto
subgraph_nodes
=
func
->
subgraph
;
auto
&
subgraph_nodes
=
*
Agent
(
node
).
subgraph
()
;
for
(
int
index
=
0
;
index
<
block
->
ops_s
ize
();
index
++
)
{
for
(
int
index
=
0
;
index
<
block
_desc
.
OpS
ize
();
index
++
)
{
framework
::
proto
::
OpDesc
*
op
=
block
->
mutable_ops
(
index
);
framework
::
proto
::
OpDesc
*
op
=
block
_desc
.
Op
(
index
)
->
Proto
(
);
auto
correspond_node
=
subgraph_nodes
[
index
];
auto
correspond_node
=
subgraph_nodes
[
index
];
PADDLE_ENFORCE_EQ
(
correspond_node
->
n
ame
(),
op
->
type
());
PADDLE_ENFORCE_EQ
(
correspond_node
->
N
ame
(),
op
->
type
());
std
::
unordered_map
<
std
::
string
,
size_t
>
var2id
;
std
::
unordered_map
<
std
::
string
,
size_t
>
var2id
;
for
(
auto
*
in_var
:
correspond_node
->
in
link
s
)
{
for
(
auto
*
in_var
:
correspond_node
->
in
put
s
)
{
var2id
[
in_var
->
n
ame
()]
=
in_var
->
id
();
var2id
[
in_var
->
N
ame
()]
=
in_var
->
id
();
}
}
// rename for the input variables of op inside subgraph
// rename for the input variables of op inside subgraph
for
(
int
i
=
0
;
i
<
op
->
inputs_size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
op
->
inputs_size
();
i
++
)
{
framework
::
proto
::
OpDesc_Var
*
in_var
=
op
->
mutable_inputs
(
i
);
// one input
auto
*
in_var
=
op
->
mutable_inputs
(
i
);
std
::
vector
<
std
::
string
>
replaced_names
;
std
::
vector
<
std
::
string
>
replaced_names
;
for
(
int
k
=
0
;
k
<
in_var
->
arguments_size
();
k
++
)
{
for
(
int
k
=
0
;
k
<
in_var
->
arguments_size
();
k
++
)
{
// all the arguments
std
::
string
arg_value
=
in_var
->
arguments
(
k
);
std
::
string
arg_value
=
in_var
->
arguments
(
k
);
std
::
string
arg_value_with_id
=
std
::
string
arg_value_with_id
=
arg_value
+
std
::
to_string
(
var2id
[
arg_value
]);
arg_value
+
std
::
to_string
(
var2id
[
arg_value
]);
...
@@ -172,8 +144,8 @@ void CreateTrtEngineOp(Node *node, Argument *argument,
...
@@ -172,8 +144,8 @@ void CreateTrtEngineOp(Node *node, Argument *argument,
}
}
}
}
var2id
.
clear
();
var2id
.
clear
();
for
(
auto
out_var
:
correspond_node
->
out
link
s
)
{
for
(
auto
out_var
:
correspond_node
->
out
put
s
)
{
var2id
[
out_var
->
n
ame
()]
=
out_var
->
id
();
var2id
[
out_var
->
N
ame
()]
=
out_var
->
id
();
}
}
// rename for the output variables of op inside subgraph
// rename for the output variables of op inside subgraph
...
@@ -195,91 +167,54 @@ void CreateTrtEngineOp(Node *node, Argument *argument,
...
@@ -195,91 +167,54 @@ void CreateTrtEngineOp(Node *node, Argument *argument,
}
}
}
}
}
}
// When tensorrt engine runs at the end of the operation,
// When tensorrt engine runs at the end of the operation,
// output_mapping help us copy the data from the renamed ITensor
// output_mapping help us copy the data from the renamed ITensor
// to Tensor.
// to Tensor.
std
::
vector
<
std
::
string
>
output_mapping
;
std
::
vector
<
std
::
string
>
output_mapping
;
for
(
auto
name
:
output_names
)
{
for
(
auto
name
:
output_names
)
{
// LOG(INFO) << name << " " << output_name_map.size();
PADDLE_ENFORCE
(
output_name_map
.
count
(
name
)
!=
0
);
PADDLE_ENFORCE
(
output_name_map
.
count
(
name
)
!=
0
);
output_mapping
.
push_back
(
output_name_map
[
name
]);
output_mapping
.
push_back
(
output_name_map
[
name
]);
}
}
PADDLE_ENFORCE
(
!
block
->
vars
().
empty
(),
"the block has no var-desc"
);
*
block_desc
.
Proto
()
->
mutable_vars
()
=
const_cast
<
framework
::
ProgramDesc
*>
(
&
graph
->
program
())
->
Proto
()
->
blocks
(
0
)
.
vars
();
PADDLE_ENFORCE
(
!
block_desc
.
Proto
()
->
vars
().
empty
(),
"the block has no var-desc"
);
PADDLE_ENFORCE
(
!
output_mapping
.
empty
());
// Set attrs
// Set attrs
SetAttr
(
op_desc
->
Proto
(),
"subgraph"
,
SetAttr
(
desc
.
Proto
(),
"subgraph"
,
block
->
SerializeAsString
());
block_desc
.
Proto
()
->
SerializeAsString
());
SetAttr
(
desc
.
Proto
(),
"max_batch_size"
,
argument
->
Get
<
int
>
(
"max_batch_size"
));
SetAttr
(
op_desc
->
Proto
(),
"max_batch_size"
,
Get
<
int
>
(
"max_batch_size"
));
SetAttr
(
desc
.
Proto
(),
"workspace_size"
,
argument
->
Get
<
int
>
(
"workspace_size"
));
SetAttr
(
op_desc
->
Proto
(),
"workspace_size"
,
Get
<
int
>
(
"workspace_size"
));
SetAttr
(
desc
.
Proto
(),
"engine_uniq_key"
,
"trt-"
+
std
::
to_string
(
counter
++
));
SetAttr
(
op_desc
->
Proto
(),
"engine_uniq_key"
,
SetAttr
(
desc
.
Proto
(),
"parameters"
,
ExtractParameters
(
graph
.
nodes
.
nodes
()
));
"trt-"
+
std
::
to_string
(
counter
++
));
SetAttr
(
desc
.
Proto
(),
"output_name_mapping"
,
output_mapping
);
SetAttr
(
op_desc
->
Proto
(),
"parameters"
,
ExtractParameters
(
graph
->
Nodes
())
);
node
->
SetPbMsg
(
desc
.
Proto
()
->
SerializeAsString
()
);
SetAttr
(
op_desc
->
Proto
(),
"output_name_mapping"
,
output_mapping
);
}
}
std
::
vector
<
std
::
string
>
ExtractParameters
(
std
::
vector
<
std
::
string
>
ExtractParameters
(
const
std
::
vector
<
std
::
unique_ptr
<
Node
>
>
&
nodes
)
{
const
std
::
unordered_set
<
Node
*
>
&
nodes
)
{
std
::
vector
<
std
::
string
>
parameters
;
std
::
vector
<
std
::
string
>
parameters
;
for
(
const
auto
&
node
:
nodes
)
{
for
(
const
auto
&
node
:
nodes
)
{
if
(
!
node
->
IsValue
())
continue
;
if
(
!
node
->
IsVar
())
continue
;
PADDLE_ENFORCE
(
!
node
->
pb_msg
().
empty
(),
"pb_msg should be set first"
);
if
(
node
->
Var
()
->
Persistable
())
{
framework
::
proto
::
VarDesc
var
;
parameters
.
push_back
(
node
->
Name
());
var
.
ParseFromString
(
node
->
pb_msg
());
if
(
var
.
persistable
())
{
parameters
.
push_back
(
var
.
name
());
}
}
}
}
return
parameters
;
return
parameters
;
}
}
void
DataFlowGraphToFluidPass
::
AddEngineOp
(
Node
*
node
)
{
// TODO(Superjomn) Here need to expose some arguments for default setting.
PADDLE_ENFORCE
(
node
->
IsFunctionBlock
());
auto
*
block_node
=
static_cast
<
FunctionBlock
*>
(
node
);
framework
::
proto
::
BlockDesc
proto
;
framework
::
BlockDesc
block_desc
(
nullptr
,
&
proto
);
block_desc
.
Proto
()
->
set_parent_idx
(
-
1
);
block_desc
.
Proto
()
->
set_idx
(
0
);
VLOG
(
40
)
<<
"origin variable size: "
<<
argument_
->
origin_program_desc
->
blocks
(
0
).
vars
().
size
();
VLOG
(
40
)
<<
"transformed variable size: "
<<
block_desc
.
Proto
()
->
vars
().
size
();
// copy ops.
for
(
auto
*
node
:
block_node
->
subgraph
)
{
auto
*
op
=
block_desc
.
AppendOp
();
PADDLE_ENFORCE
(
!
node
->
pb_msg
().
empty
());
op
->
Proto
()
->
ParseFromString
(
node
->
pb_msg
());
}
*
block_desc
.
Proto
()
->
mutable_vars
()
=
argument_
->
origin_program_desc
->
blocks
(
0
).
vars
();
PADDLE_ENFORCE
(
!
block_desc
.
Proto
()
->
vars
().
empty
());
CreateTrtEngineOp
(
node
,
argument_
,
block_desc
.
Proto
());
auto
*
main_block
=
desc_
->
mutable_blocks
(
framework
::
kRootBlockIndex
);
auto
*
op
=
main_block
->
add_ops
();
PADDLE_ENFORCE
(
!
node
->
pb_msg
().
empty
(),
"failed to set desc for block"
);
op
->
ParseFromString
(
node
->
pb_msg
());
}
namespace
{
class
DFG_DebuggerPass
:
public
DFG_GraphvizDrawPass
{
public:
using
Config
=
DFG_GraphvizDrawPass
::
Config
;
explicit
DFG_DebuggerPass
(
const
Config
&
config
)
:
DFG_GraphvizDrawPass
(
config
)
{}
std
::
string
repr
()
const
override
{
return
"dfg-to-fluid-debuger-pass"
;
}
bool
Finalize
()
override
{
return
true
;
}
};
}
// namespace
AnalysisPass
*
DataFlowGraphToFluidPass
::
CreateGraphvizDebugerPass
()
const
{
return
new
DFG_DebuggerPass
(
DFG_GraphvizDrawPass
::
Config
(
FLAGS_IA_graphviz_log_root
,
"data_flow_graph_to_fluid_graphviz_debugger"
));
}
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
REGISTER_PASS
(
tensorrt_subgraph_pass
,
paddle
::
inference
::
analysis
::
TensorRtSubgraphPass
)
.
RequirePassAttr
(
"tensorrt_node_teller"
)
.
RequirePassAttr
(
"max_batch_size"
)
.
RequirePassAttr
(
"workspace_size"
);
paddle/fluid/inference/analysis/
model_store_pass_tester.cc
→
paddle/fluid/inference/analysis/
ir_passes/tensorrt_subgraph_pass.h
浏览文件 @
ddb12035
...
@@ -12,31 +12,24 @@
...
@@ -12,31 +12,24 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/inference/analysis/model_store_pass.h"
#pragma once
#include <paddle/fluid/framework/ir/fuse_pass_base.h>
#include <gflags/gflags.h>
#include "paddle/fluid/framework/ir/pass.h"
#include <gtest/gtest.h>
#include "paddle/fluid/inference/analysis/analyzer.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
analysis
{
namespace
analysis
{
DEFINE_string
(
inference_model_dir
,
""
,
"Model path"
);
class
TensorRtSubgraphPass
:
public
framework
::
ir
::
FusePassBase
{
public:
TEST
(
DFG_StorePass
,
test
)
{
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
ApplyImpl
(
Analyzer
analyzer
;
std
::
unique_ptr
<
framework
::
ir
::
Graph
>
graph
)
const
override
;
Argument
argument
(
FLAGS_inference_model_dir
);
argument
.
model_output_store_path
.
reset
(
new
std
::
string
(
"./_dfg_store_pass_tmp"
));
// disable storage in alalyzer
FLAGS_IA_output_storage_path
=
""
;
analyzer
.
Run
(
&
argument
);
ModelStorePass
pass
;
private:
pass
.
Initialize
(
&
argument
);
void
CreateTensorRTOp
(
framework
::
ir
::
Node
*
x
,
pass
.
Run
(
argument
.
main_dfg
.
get
());
framework
::
ir
::
Graph
*
graph
)
const
;
}
void
CleanIntermediateOutputs
(
framework
::
ir
::
Node
*
node
);
};
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
...
...
paddle/fluid/inference/analysis/model_store_pass.cc
已删除
100644 → 0
浏览文件 @
0b962680
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/argument.h"
#include "paddle/fluid/inference/analysis/model_store_pass.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
void
ModelStorePass
::
Run
(
DataFlowGraph
*
x
)
{
if
(
!
argument_
->
fluid_model_param_path
)
{
PADDLE_ENFORCE_NOT_NULL
(
argument_
->
fluid_model_dir
);
argument_
->
fluid_model_param_path
.
reset
(
new
std
::
string
(
*
argument_
->
fluid_model_dir
+
"param"
));
}
PADDLE_ENFORCE_NOT_NULL
(
argument_
->
model_output_store_path
);
// Directly copy param file to destination.
std
::
stringstream
ss
;
// NOTE these commands only works on linux.
ss
<<
"mkdir -p "
<<
*
argument_
->
model_output_store_path
;
VLOG
(
30
)
<<
"run command: "
<<
ss
.
str
();
PADDLE_ENFORCE_EQ
(
system
(
ss
.
str
().
c_str
()),
0
);
ss
.
str
(
""
);
ss
<<
"cp "
<<
*
argument_
->
fluid_model_dir
<<
"/*"
<<
" "
<<
*
argument_
->
model_output_store_path
;
VLOG
(
30
)
<<
"run command: "
<<
ss
.
str
();
PADDLE_ENFORCE_EQ
(
system
(
ss
.
str
().
c_str
()),
0
);
// Store program
PADDLE_ENFORCE_NOT_NULL
(
argument_
->
transformed_program_desc
,
"program desc is not transformed, should call "
"DataFlowGraphToFluidPass first."
);
VLOG
(
30
)
<<
"store analyzed program to "
<<
*
argument_
->
model_output_store_path
;
const
std
::
string
program_output_path
=
*
argument_
->
model_output_store_path
+
"/__model__"
;
std
::
ofstream
file
(
program_output_path
,
std
::
ios
::
binary
);
PADDLE_ENFORCE
(
file
.
is_open
(),
"failed to open %s to write."
,
program_output_path
);
const
std
::
string
serialized_message
=
argument_
->
transformed_program_desc
->
SerializeAsString
();
file
.
write
(
serialized_message
.
c_str
(),
serialized_message
.
size
());
}
bool
ModelStorePass
::
Finalize
()
{
return
true
;
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/node.cc
已删除
100644 → 0
浏览文件 @
0b962680
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/node.h"
#include "glog/logging.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
std
::
vector
<
Dot
::
Attr
>
Value
::
dot_attrs
()
const
{
return
std
::
vector
<
Dot
::
Attr
>
({
Dot
::
Attr
(
"style"
,
"filled,rounded"
),
Dot
::
Attr
(
"shape"
,
"box"
),
Dot
::
Attr
(
"fillcolor"
,
"red"
)});
}
std
::
vector
<
Dot
::
Attr
>
Function
::
dot_attrs
()
const
{
return
std
::
vector
<
Dot
::
Attr
>
({
Dot
::
Attr
(
"style"
,
"filled,rounded"
),
Dot
::
Attr
(
"shape"
,
"diamond"
),
Dot
::
Attr
(
"fillcolor"
,
"yellow"
)});
}
Node
*
NodeMap
::
Create
(
Node
::
Type
type
)
{
switch
(
type
)
{
case
Node
::
Type
::
kFunction
:
nodes_
.
emplace_back
(
new
Function
);
break
;
case
Node
::
Type
::
kValue
:
nodes_
.
emplace_back
(
new
Value
);
break
;
case
Node
::
Type
::
kFunctionBlock
:
nodes_
.
emplace_back
(
new
FunctionBlock
);
break
;
default:
PADDLE_THROW
(
"Not supported node type."
);
}
nodes_
.
back
()
->
id_
=
size
()
-
1
;
return
nodes_
.
back
().
get
();
}
Node
*
NodeMap
::
GetMutable
(
size_t
id
)
{
PADDLE_ENFORCE_GT
(
size
(),
id
);
return
nodes_
[
id
].
get
();
}
const
Node
&
NodeMap
::
Get
(
size_t
id
)
const
{
PADDLE_ENFORCE_GT
(
size
(),
id
);
return
*
nodes_
[
id
].
get
();
}
void
NodeMap
::
Delete
(
size_t
id
)
{
PADDLE_ENFORCE_LT
(
id
,
size
());
nodes_
[
id
]
->
SetDeleted
();
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/node.h
已删除
100644 → 0
浏览文件 @
0b962680
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file defines the Node class and its subclasses. A Node is the basis
* analysis element in a computation graph.
* There are basically two kinds of nodes, the function node and value node.
*/
#pragma once
#include <limits>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/inference/analysis/device.h"
#include "paddle/fluid/inference/analysis/dot.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/platform/variant.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
class
NodeMap
;
// A helper class to maintain the status from Pass.
struct
AnyAttr
{
using
any_t
=
boost
::
variant
<
bool
,
float
,
int32_t
,
int64_t
,
void
*
,
std
::
string
>
;
// NOTE T should be a primary type or a struct combined by several primary
// types.
// NOTE the STL containers should not use here.
// Some usages
// Attr attr;
// attr.Bool() = true;
bool
&
Bool
()
{
return
As
<
bool
>
();
}
float
&
Float
()
{
return
As
<
float
>
();
}
int32_t
&
Int32
()
{
return
As
<
int32_t
>
();
}
int64_t
&
Int64
()
{
return
As
<
int64_t
>
();
}
void
*&
Pointer
()
{
return
As
<
void
*>
();
}
std
::
string
&
String
()
{
return
As
<
std
::
string
>
();
}
template
<
typename
T
>
T
&
As
()
{
if
(
type_index_
==
typeid
(
AnyAttr
))
{
type_index_
=
typeid
(
T
);
any_data_
=
T
();
}
else
{
PADDLE_ENFORCE
(
type_index_
==
typeid
(
T
),
"fetch error type"
);
}
return
boost
::
get
<
T
>
(
any_data_
);
}
private:
any_t
any_data_
;
std
::
type_index
type_index_
{
typeid
(
AnyAttr
)};
};
/*
* Node Representation.
*
* This is a very important class for analysis. It is the base class of all
* nodes computed by a program that may be used as operands to other nodes.
* Node is the super class of other important classes such as Function and
* Value, some nodes can have a name.
*/
class
Node
{
public:
// Node type. NOTE the new node types should add here.
enum
class
Type
{
kNone
=
-
1
,
kFunction
,
kValue
,
kFunctionBlock
};
Node
()
=
default
;
// Cast to a subclass type, Function for example.
template
<
typename
Subclass
>
Subclass
&
As
()
{
return
*
dynamic_cast
<
Subclass
*>
(
this
);
}
// Formatted representation of this Node.
virtual
std
::
string
repr
()
const
{
return
name
()
+
"("
+
std
::
to_string
(
id
())
+
")"
;
}
// DOT node representation. One Node type can customize its own node
// representation.
virtual
std
::
vector
<
Dot
::
Attr
>
dot_attrs
()
const
{
return
std
::
vector
<
Dot
::
Attr
>
({
Dot
::
Attr
(
"style"
,
"filled"
)});
}
// Get an additional attribute and convert it to T data type. NOTE this will
// silently create a new attribute if not exists.
AnyAttr
&
attr
(
const
std
::
string
&
name
)
const
{
return
attrs_
[
name
];
}
int
id
()
const
{
return
id_
;
}
// The Protobuf description is set/get with a void* to decouple Node interface
// from a specific kind of Protobuf message.
void
SetPbDesc
(
void
*
pb
)
{
attr
(
"pb_desc"
).
Pointer
()
=
pb
;
}
void
*
pb_desc
()
const
{
return
attr
(
"pb_desc"
).
Pointer
();
}
void
SetPbMsg
(
const
std
::
string
&
s
)
{
attr
(
"pb_msg"
).
String
()
=
s
;
}
const
std
::
string
&
pb_msg
()
const
{
return
attr
(
"pb_msg"
).
String
();
}
void
SetDeleted
()
{
deleted_
=
true
;
}
bool
deleted
()
const
{
return
deleted_
;
}
void
SetName
(
const
std
::
string
&
name
)
{
name_
=
name
;
}
const
std
::
string
&
name
()
const
{
return
name_
;
}
void
SetType
(
Type
type
)
{
type_
=
type
;
}
Type
type
()
const
{
return
type_
;
}
// Input links.
std
::
vector
<
Node
*>
inlinks
;
// Output links.
std
::
vector
<
Node
*>
outlinks
;
// Type checks.
bool
IsFunction
()
const
{
return
type_
==
Node
::
Type
::
kFunction
;
}
bool
IsValue
()
const
{
return
type_
==
Node
::
Type
::
kValue
;
}
bool
IsFunctionBlock
()
const
{
return
type_
==
Node
::
Type
::
kFunctionBlock
;
}
virtual
~
Node
()
{}
friend
class
NodeMap
;
PADDLE_DISALLOW_COPY_AND_ASSIGN
(
Node
);
protected:
// The id number not the name is a node's unique identifier in the computation
// graph.
int
id_
{
-
1
};
std
::
string
name_
;
Type
type_
{
Type
::
kNone
};
// Mark this node is deleted by some pass.
bool
deleted_
{
false
};
mutable
std
::
unordered_map
<
std
::
string
,
AnyAttr
>
attrs_
;
};
class
Function
;
/*
* Value represents a value node, it has some attributes including dims, data
* type and so on.
*/
class
Value
:
public
Node
{
public:
enum
class
DataType
{
kInt32
,
kInt64
,
kFloat32
,
kFloat64
};
using
Dims
=
std
::
vector
<
int
>
;
void
SetDataType
(
DataType
data_type
)
{
data_type_
=
data_type
;
}
DataType
data_type
()
const
{
return
data_type_
;
}
void
SetDims
(
const
Dims
&
dims
)
{
dims_
=
dims
;
}
const
Dims
&
dims
()
const
{
return
dims_
;
}
Device
device
()
const
{
return
device_
;
}
void
SetDevice
(
Device
device
)
{
device_
=
device
;
}
std
::
vector
<
Dot
::
Attr
>
dot_attrs
()
const
override
;
PADDLE_DISALLOW_COPY_AND_ASSIGN
(
Value
);
protected:
Value
()
{
SetType
(
Node
::
Type
::
kValue
);
}
friend
class
NodeMap
;
private:
DataType
data_type_
;
Dims
dims_
;
Device
device_
;
};
/*
* Function represents any kind of executable concepts that takes several Values
* as input, and outputs several Values.
*/
class
Function
:
public
Node
{
public:
std
::
vector
<
Dot
::
Attr
>
dot_attrs
()
const
override
;
// Get the operator's type from Desc.
const
std
::
string
&
func_type
()
const
{
return
func_type_
;
}
// Set the operator's type.
void
SetFuncType
(
const
std
::
string
&
func_type
)
{
func_type_
=
func_type
;
}
PADDLE_DISALLOW_COPY_AND_ASSIGN
(
Function
);
protected:
std
::
string
func_type_
;
Function
()
{
SetType
(
Node
::
Type
::
kFunction
);
}
friend
class
NodeMap
;
};
/*
* FunctionBlock is a Node that contains a sub-graph multiple Node.
*/
struct
FunctionBlock
:
public
Node
{
std
::
string
repr
()
const
override
{
return
"block-"
+
std
::
to_string
(
id
());
}
std
::
vector
<
Node
*>
subgraph
;
protected:
FunctionBlock
()
{
SetType
(
Node
::
Type
::
kFunctionBlock
);
}
friend
class
NodeMap
;
};
class
NodeMap
{
public:
// Create a new node with type.
Node
*
Create
(
Node
::
Type
type
);
// Get a node by its id.
Node
*
GetMutable
(
size_t
id
);
const
Node
&
Get
(
size_t
id
)
const
;
void
Delete
(
size_t
id
);
const
std
::
vector
<
std
::
unique_ptr
<
Node
>>
&
nodes
()
const
{
return
nodes_
;
}
size_t
size
()
const
{
return
nodes_
.
size
();
}
private:
std
::
vector
<
std
::
unique_ptr
<
Node
>>
nodes_
;
std
::
unordered_map
<
std
::
string
,
Node
*>
map_
;
};
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/node_tester.cc
已删除
100644 → 0
浏览文件 @
0b962680
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/node.h"
#include <gtest/gtest.h>
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
TEST
(
NodeAttr
,
bool
)
{
AnyAttr
x
;
x
.
Bool
()
=
true
;
ASSERT_EQ
(
x
.
Bool
(),
true
);
}
TEST
(
NodeAttr
,
int32
)
{
AnyAttr
x
;
x
.
Int32
()
=
32
;
ASSERT_EQ
(
x
.
Int32
(),
32
);
}
TEST
(
NodeAttr
,
string
)
{
AnyAttr
x
;
x
.
String
()
=
"Hello"
;
ASSERT_EQ
(
x
.
String
(),
"Hello"
);
}
TEST
(
Node
,
Attr
)
{
// Node is an abstract class, use Value instead for they share the same Attr
// logic.
NodeMap
nodes
;
auto
*
node
=
nodes
.
Create
(
Node
::
Type
::
kValue
);
node
->
attr
(
"v0"
).
Int32
()
=
2008
;
ASSERT_EQ
(
node
->
attr
(
"v0"
).
Int32
(),
2008
);
node
->
attr
(
"str"
).
String
()
=
"hello world"
;
ASSERT_EQ
(
node
->
attr
(
"str"
).
String
(),
"hello world"
);
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/pass_manager.cc
已删除
100644 → 0
浏览文件 @
0b962680
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/pass_manager.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
#include "paddle/fluid/string/pretty_log.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
bool
PassManager
::
Initialize
(
Argument
*
argument
)
{
argument_
=
argument
;
for
(
auto
&
pass
:
data_
)
{
VLOG
(
30
)
<<
"Initializing pass ["
<<
pass
->
repr
()
<<
"]"
;
if
(
!
pass
->
Initialize
(
argument
))
{
LOG
(
ERROR
)
<<
"Failed to initialize pass ["
<<
pass
->
repr
()
<<
"]"
;
return
false
;
}
}
return
true
;
}
void
DfgPassManager
::
RunAll
()
{
PADDLE_ENFORCE
(
argument_
);
VLOG
(
30
)
<<
"Total "
<<
data_
.
size
()
<<
" Analysys passes"
;
for
(
auto
&
pass
:
data_
)
{
string
::
PrettyLogEndl
(
string
::
Style
::
H1
(),
"* Running Analysis pass [%s]"
,
pass
->
repr
());
pass
->
Run
(
argument_
->
main_dfg
.
get
());
}
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/pass_manager.h
已删除
100644 → 0
浏览文件 @
0b962680
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file defines the logic of pass management. The analysis for inference is
* a pipeline of Passes, a PassManager is a agency that helps to manage the
* executation of the Passes.
*
* There are two modes of Passes, the first one is called NodePass and takes
* an Node as input and output; the second one is called DFGPass and takes a
* DFG(Data Flow Graph) as input and output. It is hard to put all the passes in
* the same pipeline, there are two kinds of PassManagers, both takes a DFG as
* input and output a DFG, but the Passes inside are different:
*
* 1. NodePassManager: the passes inside are all NodePasses, it can have
* different graph trivial algorithm, for example, DFS_NodePassManager will
* trigger the passes in depth first order;
* 2. DfgPassManager: the passes inside are all DfgPasses.
*/
#pragma once
#include <string>
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/analysis/analysis_pass.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
/*
* PassManager is the base class for all pass managers, a pass manager has
* several Pass-es registered, and execute them in the linear order.
*/
class
PassManager
:
public
OrderedRegistry
<
AnalysisPass
>
{
public:
PassManager
()
=
default
;
// Call all the passes' Initialize methods. The desc and data_flow_graph are
// globally shared, so pass them as the arguemnts for all the pass managers.
virtual
bool
Initialize
(
const
Argument
&
argument
)
{
return
false
;
}
virtual
bool
Initialize
(
Argument
*
argument
);
// Call all the passes' Finalize methods.
virtual
bool
Finalize
()
{
for
(
auto
&
pass
:
data_
)
{
if
(
!
pass
->
Finalize
())
{
LOG
(
ERROR
)
<<
"Failed to finalize pass ["
<<
pass
->
repr
()
<<
"]"
;
return
false
;
}
}
return
true
;
}
// Run all the passes.
virtual
void
RunAll
()
=
0
;
// Short identifier.
virtual
std
::
string
repr
()
const
=
0
;
// Long description.
virtual
std
::
string
description
()
const
=
0
;
virtual
~
PassManager
()
=
default
;
protected:
Argument
*
argument_
{
nullptr
};
};
/*
* A pass manager that process a DFG.
*/
class
DfgPassManager
:
public
PassManager
{
public:
DfgPassManager
()
=
default
;
void
RunAll
()
override
;
virtual
~
DfgPassManager
()
=
default
;
};
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/pass_manager_tester.cc
已删除
100644 → 0
浏览文件 @
0b962680
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
#include "paddle/fluid/inference/analysis/pass_manager.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
class
TestDfgPassManager
final
:
public
DfgPassManager
{
public:
TestDfgPassManager
()
=
default
;
virtual
~
TestDfgPassManager
()
=
default
;
// Short identifier.
std
::
string
repr
()
const
override
{
return
"test-pass-manager"
;
}
// Long description.
std
::
string
description
()
const
override
{
return
"test doc"
;
}
};
TEST
(
PassManager
,
DFG_pass_manager
)
{
TestDfgPassManager
manager
;
DFG_GraphvizDrawPass
::
Config
config
(
"./"
,
"dfg.dot"
);
manager
.
Register
(
"fluid-to-flow-graph"
,
new
FluidToDataFlowGraphPass
);
manager
.
Register
(
"graphviz"
,
new
DFG_GraphvizDrawPass
(
config
));
manager
.
Register
(
"dfg-to-fluid"
,
new
DataFlowGraphToFluidPass
);
Argument
argument
(
FLAGS_inference_model_dir
);
ASSERT_TRUE
(
&
argument
);
ASSERT_TRUE
(
manager
.
Initialize
(
&
argument
));
manager
.
RunAll
();
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/passes/CMakeLists.txt
0 → 100644
浏览文件 @
ddb12035
cc_library
(
ir_graph_build_pass SRCS ir_graph_build_pass.cc DEPS analysis_pass argument ir_pass_manager
)
cc_library
(
ir_analysis_pass SRCS ir_analysis_pass.cc DEPS analysis_pass argument ir_pass_manager
)
cc_library
(
analysis_passes SRCS passes.cc DEPS ir_graph_build_pass ir_analysis_pass
)
set
(
analysis_deps
${
analysis_deps
}
ir_graph_build_pass
ir_analysis_pass
analysis_passes
CACHE INTERNAL
""
)
paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc
0 → 100644
浏览文件 @
ddb12035
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.h"
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/analysis/ir_pass_manager.h"
#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h"
#include "paddle/fluid/string/pretty_log.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
void
IrAnalysisComposePass
::
RunImpl
(
Argument
*
argument
)
{
ARGUMENT_CHECK_FIELD
(
argument
,
ir_analysis_passes
);
if
(
argument
->
use_tensorrt_valid
()
&&
argument
->
use_tensorrt
())
{
InitTensorRTAttrs
(
argument
);
}
ApplyIrPasses
(
argument
);
CollectFusionStatis
(
argument
);
}
std
::
string
IrAnalysisComposePass
::
repr
()
const
{
return
"ir-analysis-compose-pass"
;
}
void
IrAnalysisComposePass
::
InitTensorRTAttrs
(
Argument
*
argument
)
{
if
(
argument
->
use_tensorrt_valid
()
&&
argument
->
use_tensorrt
())
{
LOG
(
INFO
)
<<
"Initing TensorRT pass"
;
argument
->
SetTensorRtNodeTeller
([](
const
framework
::
ir
::
Node
*
node
)
{
std
::
unordered_set
<
std
::
string
>
teller_set
(
{
"mul"
,
"conv2d"
,
"pool2d"
,
"relu"
,
"softmax"
,
"sigmoid"
,
"depthwise_conv2d"
,
"batch_norm"
,
"concat"
,
"tanh"
,
"pad"
,
"elementwise_add"
,
"dropout"
,
"split"
});
if
(
!
node
->
IsOp
())
return
false
;
if
(
teller_set
.
count
(
node
->
Op
()
->
Type
()))
{
return
true
;
}
else
{
return
false
;
}
});
}
}
void
IrAnalysisComposePass
::
ApplyIrPasses
(
Argument
*
argument
)
{
std
::
vector
<
std
::
string
>
passes
({
"ir_graph_build_pass"
,
"ir_analysis_pass"
,
});
for
(
const
auto
&
pass
:
passes
)
{
VLOG
(
2
)
<<
"Run pass "
<<
pass
;
auto
*
the_pass
=
PassRegistry
::
Global
().
Retreive
(
pass
);
the_pass
->
Run
(
argument
);
}
}
void
IrAnalysisComposePass
::
CollectFusionStatis
(
Argument
*
argument
)
{
if
(
!
argument
->
main_graph
().
Has
(
framework
::
ir
::
kFuseStatisAttr
))
{
LOG
(
INFO
)
<<
"argument has no fuse statis"
;
return
;
}
argument
->
SetFusionStatis
(
argument
->
main_graph
().
Get
<
Argument
::
fusion_statis_t
>
(
framework
::
ir
::
kFuseStatisAttr
));
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/
model_stor
e_pass.h
→
paddle/fluid/inference/analysis/
passes/ir_analysis_compos
e_pass.h
浏览文件 @
ddb12035
...
@@ -12,42 +12,35 @@
...
@@ -12,42 +12,35 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
/*
* This file defines ModelStorePass, which store the runtime DFG to a Paddle
* model in the disk, and that model can be reloaded for prediction.
*/
#pragma once
#pragma once
#include <string>
#include <string>
#include <vector>
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/inference/analysis/passes/passes.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
analysis
{
namespace
analysis
{
class
ModelStorePass
:
public
DataFlowGraphPass
{
/*
* The analysis pass to run a list of IR passes (like a function call).
* Currently, it should be the first pass of analysis phase.
*/
class
IrAnalysisComposePass
:
public
AnalysisPass
{
public:
public:
bool
Initialize
(
Argument
*
argument
)
override
{
void
RunImpl
(
Argument
*
argument
)
override
;
if
(
!
argument
)
{
std
::
string
repr
()
const
override
;
LOG
(
ERROR
)
<<
"invalid argument"
;
return
false
;
}
argument_
=
argument
;
return
true
;
}
void
Run
(
DataFlowGraph
*
x
)
override
;
private:
void
InitTensorRTAttrs
(
Argument
*
argument
);
std
::
string
repr
()
const
override
{
return
"DFG-store-pass"
;
}
void
ApplyIrPasses
(
Argument
*
argument
);
std
::
string
description
()
const
override
{
return
R"DD(This file defines ModelStorePass, which store the runtime DFG to a Paddle
model in the disk, and that model can be reloaded for prediction again.)DD"
;
}
bool
Finalize
()
override
;
void
CollectFusionStatis
(
Argument
*
argument
)
;
private:
// Assign a Scope for IR passes to modify the weights.
Argument
*
argument_
{
nullptr
}
;
void
AssignScopeToModify
(
Argument
*
argument
)
;
};
};
}
// namespace analysis
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc
0 → 100644
浏览文件 @
ddb12035
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
#include "paddle/fluid/inference/analysis/ir_pass_manager.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
void
IrAnalysisPass
::
RunImpl
(
Argument
*
argument
)
{
ARGUMENT_CHECK_FIELD
(
argument
,
ir_analysis_passes
);
ARGUMENT_CHECK_FIELD
(
argument
,
main_program
);
ARGUMENT_CHECK_FIELD
(
argument
,
scope
);
auto
*
the_graph
=
argument
->
ReleaseMainGraph
();
auto
graph
=
std
::
unique_ptr
<
Graph
>
(
the_graph
);
// Apply passes.
IRPassManager
the_ir_manager
(
argument
);
graph
=
the_ir_manager
.
Apply
(
std
::
move
(
graph
));
PADDLE_ENFORCE_GT
(
graph
->
Nodes
().
size
(),
0
);
argument
->
SetIrAnalyzedProgram
(
new
framework
::
proto
::
ProgramDesc
(
the_ir_manager
.
AcquireProgram
(
&
graph
,
argument
->
main_program
())));
argument
->
SetMainGraph
(
graph
.
release
());
}
std
::
string
IrAnalysisPass
::
repr
()
const
{
return
"ir-analysis-pass"
;
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/
node_attr_flag
s.h
→
paddle/fluid/inference/analysis/
passes/ir_analysis_pas
s.h
浏览文件 @
ddb12035
...
@@ -12,20 +12,25 @@
...
@@ -12,20 +12,25 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
/*
* This file contains all the flags that declared in Node::Attr.
*
* The Node::Attr is designed to share information between different passes, one
* can get other's attributes in a Node by the flags in this file.
*/
#pragma once
#pragma once
#include <string>
#include "paddle/fluid/inference/analysis/analysis_pass.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
analysis
{
namespace
analysis
{
#define DECLARE_NODE_ATTR(flag__) const char ATTR_##flag__[] = #flag__;
/*
* Perform IR analysis passes.
DECLARE_NODE_ATTR
(
supported_by_tensorrt
)
// bool
*
* It is used to fuse some
*/
class
IrAnalysisPass
:
public
AnalysisPass
{
public:
void
RunImpl
(
Argument
*
argument
)
override
;
std
::
string
repr
()
const
override
;
};
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
...
...
paddle/fluid/inference/analysis/
fluid_to_ir
_pass.cc
→
paddle/fluid/inference/analysis/
passes/ir_graph_build
_pass.cc
浏览文件 @
ddb12035
...
@@ -12,49 +12,62 @@
...
@@ -12,49 +12,62 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/inference/analysis/fluid_to_ir_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h"
#include <paddle/fluid/framework/ir/fuse_pass_base.h>
#include <string>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
extern
void
ReadBinaryFile
(
const
std
::
string
&
filename
,
std
::
string
*
contents
);
namespace
analysis
{
namespace
analysis
{
void
FluidToIrPass
::
EnableParamModify
(
const
std
::
string
&
model_dir
,
void
IrGraphBuildPass
::
RunImpl
(
Argument
*
argument
)
{
const
std
::
string
&
prog_file
,
if
(
!
argument
->
scope_valid
())
{
const
std
::
string
&
param_file
)
{
argument
->
SetScope
(
new
framework
::
Scope
);
PADDLE_ENFORCE
(
argument_
);
}
argument_
->
Set
(
framework
::
ir
::
kParamScopeAttr
,
new
framework
::
Scope
);
// Load parameters.
VLOG
(
30
)
<<
"Loading parameters from "
<<
model_dir
;
LoadParams
(
&
argument_
->
Get
<
framework
::
Scope
>
(
framework
::
ir
::
kParamScopeAttr
),
model_dir
,
prog_file
,
param_file
);
}
bool
FluidToIrPass
::
LoadParams
(
framework
::
Scope
*
scope
,
const
std
::
string
&
dir
,
if
(
argument
->
model_dir_valid
())
{
const
std
::
string
&
prog_file
,
auto
program
=
LoadModel
(
argument
->
model_dir
(),
argument
->
scope_ptr
());
const
std
::
string
&
param_file
)
{
argument
->
SetMainProgram
(
program
.
release
());
platform
::
CPUPlace
place
;
}
else
if
(
argument
->
model_program_path_valid
()
&&
platform
::
CPUDeviceContext
ctx
(
place
);
argument
->
model_params_path_valid
())
{
framework
::
Executor
executor
(
place
);
auto
program
=
PADDLE_ENFORCE
(
argument_
->
origin_program_desc
.
get
());
LoadModel
(
argument
->
model_program_path
(),
argument
->
model_params_path
(),
framework
::
ProgramDesc
program
(
*
argument_
->
origin_program_desc
);
argument
->
scope_ptr
());
if
((
!
prog_file
.
empty
())
&&
(
!
param_file
.
empty
()))
{
argument
->
SetMainProgram
(
program
.
release
());
LOG
(
INFO
)
<<
"load single model file from "
<<
prog_file
;
Load
(
&
executor
,
scope
,
prog_file
,
param_file
);
}
else
if
(
!
dir
.
empty
())
{
LOG
(
INFO
)
<<
"load from dir "
<<
dir
;
Load
(
&
executor
,
scope
,
dir
);
}
else
{
}
else
{
LOG
(
ERROR
)
<<
"failed to load parameters"
;
PADDLE_THROW
(
return
false
;
"either model_dir or (program path and parameter path) should be set."
)
;
}
}
return
true
;
auto
graph
=
std
::
unique_ptr
<
Graph
>
(
new
Graph
(
argument
->
main_program
()));
argument
->
SetMainGraph
(
graph
.
release
());
argument
->
main_graph
().
Set
(
framework
::
ir
::
kParamScopeAttr
,
new
framework
::
Scope
*
(
argument
->
scope_ptr
()));
}
}
std
::
unique_ptr
<
framework
::
ProgramDesc
>
IrGraphBuildPass
::
LoadModel
(
const
std
::
string
&
path
,
framework
::
Scope
*
scope
)
{
platform
::
CPUPlace
place
;
framework
::
Executor
exe
(
place
);
return
Load
(
&
exe
,
scope
,
path
);
}
std
::
unique_ptr
<
framework
::
ProgramDesc
>
IrGraphBuildPass
::
LoadModel
(
const
std
::
string
&
program_path
,
const
std
::
string
&
params_path
,
framework
::
Scope
*
scope
)
{
platform
::
CPUPlace
place
;
framework
::
Executor
exe
(
place
);
return
Load
(
&
exe
,
scope
,
program_path
,
params_path
);
}
std
::
string
IrGraphBuildPass
::
repr
()
const
{
return
"ir-graph-build-pass"
;
}
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h
0 → 100644
浏览文件 @
ddb12035
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/analysis/analysis_pass.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
/*
* Load program and parameter to memory from the disk.
*/
class
IrGraphBuildPass
:
public
AnalysisPass
{
public:
void
RunImpl
(
Argument
*
argument
)
override
;
std
::
string
repr
()
const
override
;
private:
std
::
unique_ptr
<
framework
::
ProgramDesc
>
LoadModel
(
const
std
::
string
&
path
,
framework
::
Scope
*
scope
);
std
::
unique_ptr
<
framework
::
ProgramDesc
>
LoadModel
(
const
std
::
string
&
program_path
,
const
std
::
string
&
params_path
,
framework
::
Scope
*
scope
);
std
::
string
model_binary_str_
;
};
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/
fluid_to_data_flow_graph_pass_tester
.cc
→
paddle/fluid/inference/analysis/
passes/passes
.cc
浏览文件 @
ddb12035
...
@@ -12,25 +12,21 @@
...
@@ -12,25 +12,21 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/inference/analysis/
fluid_to_data_flow_graph_pas
s.h"
#include "paddle/fluid/inference/analysis/
passes/passe
s.h"
#include "paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc"
#include
<gtest/gtest.h>
#include
"paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
#include "paddle/fluid/inference/analysis/
ut_helper
.h"
#include "paddle/fluid/inference/analysis/
passes/ir_graph_build_pass
.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
analysis
{
namespace
analysis
{
PassRegistry
::
PassRegistry
()
{
TEST
(
FluidToDataFlowGraphPass
,
Test
)
{
passes_
.
emplace
(
"ir_analysis_pass"
,
FluidToDataFlowGraphPass
pass
;
std
::
unique_ptr
<
AnalysisPass
>
(
new
IrAnalysisPass
));
Argument
argument
(
FLAGS_inference_model_dir
);
passes_
.
emplace
(
"ir_graph_build_pass"
,
pass
.
Initialize
(
&
argument
);
std
::
unique_ptr
<
AnalysisPass
>
(
new
IrGraphBuildPass
));
pass
.
Run
(
argument
.
main_dfg
.
get
());
passes_
.
emplace
(
"ir_analysis_compose_pass"
,
// Analysis is sensitive to ProgramDesc, careful to change the original model.
std
::
unique_ptr
<
AnalysisPass
>
(
new
IrAnalysisComposePass
));
ASSERT_EQ
(
argument
.
main_dfg
->
nodes
.
size
(),
38UL
);
pass
.
Finalize
();
ASSERT_FALSE
(
argument
.
main_dfg
->
DotString
().
empty
());
EXPECT_FALSE
(
argument
.
main_dfg
->
inputs
().
empty
());
}
}
}
// namespace analysis
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/
fluid_to_ir_pass_tester.cc
→
paddle/fluid/inference/analysis/
passes/passes.h
浏览文件 @
ddb12035
...
@@ -12,24 +12,30 @@
...
@@ -12,24 +12,30 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#
include "paddle/fluid/inference/analysis/fluid_to_ir_pass.h"
#
pragma once
#include <gtest/gtest.h>
#include <string>
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
namespace
analysis
{
namespace
analysis
{
TEST
(
FluidToIrPass
,
Test
)
{
struct
PassRegistry
{
FluidToIrPass
pass
;
PassRegistry
();
Argument
argument
(
FLAGS_inference_model_dir
);
argument
.
Set
(
kFluidToIrPassesAttr
,
AnalysisPass
*
Retreive
(
const
std
::
string
&
pass_type
)
{
new
std
::
vector
<
std
::
string
>
({
"infer_clean_graph_pass"
}));
return
passes_
[
pass_type
].
get
();
pass
.
Initialize
(
&
argument
);
}
pass
.
Run
(
argument
.
main_dfg
.
get
());
}
static
PassRegistry
&
Global
()
{
static
auto
*
x
=
new
PassRegistry
;
return
*
x
;
}
private:
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
AnalysisPass
>>
passes_
;
};
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
...
...
paddle/fluid/inference/analysis/subgraph_splitter.h
已删除
100644 → 0
浏览文件 @
0b962680
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file defines the the class to partition a graph.
*/
#pragma once
#include <vector>
#include "paddle/fluid/inference/analysis/argument.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/node.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
/*
* Detect the nodes in a sub-graph that meet some conditions. This class doesn't
* modify the graph.
*/
class
SubGraphSplitter
{
public:
static
const
char
*
kMarkerAttrName
;
// Tell whether a node is inside a sub-graph.
using
NodeInsideSubgraphTeller
=
std
::
function
<
bool
(
const
Node
*
)
>
;
SubGraphSplitter
(
DataFlowGraph
*
graph
,
const
NodeInsideSubgraphTeller
&
teller
)
:
graph_
(
graph
),
node_inside_subgraph_teller_
(
teller
)
{}
std
::
vector
<
std
::
vector
<
Node
*>>
operator
()();
protected:
// Mark the nodes inside the accepted sub-graph using
// node_inside_subgraph_teller.
void
MarkNodesInsideSubGraph
();
// Merge the marked nodes into sub-graphs and return the sub-graphs.
std
::
vector
<
std
::
vector
<
Node
*>>
ExtractSubGraphs
();
private:
DataFlowGraph
*
graph_
;
NodeInsideSubgraphTeller
node_inside_subgraph_teller_
;
};
/*
* SubGraphFuse - Replace some nodes with the sub-graph node they are inside. To
* some extent, the TensorRT engine is just a fusion op for a model.
*/
class
SubGraphFuse
{
public:
using
NodeInsideSubgraphTeller
=
SubGraphSplitter
::
NodeInsideSubgraphTeller
;
SubGraphFuse
(
DataFlowGraph
*
graph
,
const
NodeInsideSubgraphTeller
&
teller
,
Argument
*
argument
)
:
graph_
(
graph
),
node_inside_subgraph_teller_
(
teller
),
argument_
(
argument
)
{}
// The main method which run all the logic.
void
operator
()();
protected:
// Remove the nodes inside sub-graphs and replace with the SubGraphNode.
void
ReplaceNodesWithSubGraphs
();
private:
DataFlowGraph
*
graph_
;
NodeInsideSubgraphTeller
node_inside_subgraph_teller_
;
Argument
*
argument_
;
};
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/subgraph_splitter_tester.cc
已删除
100644 → 0
浏览文件 @
0b962680
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/subgraph_splitter.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
SubGraphSplitter
::
NodeInsideSubgraphTeller
teller
=
[](
const
Node
*
node
)
{
if
(
node
->
type
()
!=
Node
::
Type
::
kFunction
)
return
false
;
const
auto
*
func
=
static_cast
<
const
Function
*>
(
node
);
if
(
func
->
func_type
()
==
"elementwise_add"
||
func
->
func_type
()
==
"relu"
||
func
->
func_type
()
==
"conv2d"
||
func
->
func_type
()
==
"mul"
||
func
->
func_type
()
==
"sigmoid"
||
func
->
func_type
()
==
"softmax"
)
{
LOG
(
INFO
)
<<
"sub-graph marked "
<<
node
->
repr
();
return
true
;
}
return
false
;
};
TEST
(
SubGraphSplitter
,
Split
)
{
auto
desc
=
LoadProgramDesc
(
FLAGS_inference_model_dir
+
"/__model__"
);
auto
dfg
=
ProgramDescToDFG
(
desc
);
LOG
(
INFO
)
<<
"spliter
\n
"
<<
dfg
.
DotString
();
ASSERT_GT
(
dfg
.
nodes
.
size
(),
5UL
);
auto
subgraphs
=
SubGraphSplitter
(
&
dfg
,
teller
)();
// Check the number of the marked nodes.
int
marked_nodes
=
0
;
for
(
auto
&
node
:
dfg
.
nodes
.
nodes
())
{
if
(
node
->
IsFunction
()
&&
node
->
attr
(
SubGraphSplitter
::
kMarkerAttrName
).
Bool
())
{
++
marked_nodes
;
}
}
EXPECT_EQ
(
marked_nodes
,
6
);
// For human debug.
for
(
auto
&
subgraph
:
subgraphs
)
{
LOG
(
INFO
)
<<
"subgraph size "
<<
subgraph
.
size
();
for
(
auto
*
node
:
subgraph
)
{
LOG
(
INFO
)
<<
"node "
<<
node
->
repr
();
}
}
ASSERT_EQ
(
subgraphs
.
size
(),
1UL
);
// The last sub-graph has 5 Functions.
ASSERT_EQ
(
subgraphs
.
back
().
size
(),
6UL
);
}
TEST
(
SubGraphSplitter
,
Fuse
)
{
auto
desc
=
LoadProgramDesc
(
FLAGS_inference_model_dir
+
"/__model__"
);
auto
dfg
=
ProgramDescToDFG
(
desc
);
Argument
argument
;
argument
.
Set
<
int
>
(
"minimum_subgraph_size"
,
new
int
(
3
));
size_t
count0
=
dfg
.
nodes
.
size
();
SubGraphFuse
fuse
(
&
dfg
,
teller
,
&
argument
);
fuse
();
int
count1
=
0
;
for
(
auto
&
node
:
dfg
.
nodes
.
nodes
())
{
if
(
node
->
deleted
())
{
LOG
(
INFO
)
<<
"deleted "
<<
node
->
repr
();
}
count1
+=
node
->
deleted
();
}
// At least one nodes should be deleted.
ASSERT_EQ
(
dfg
.
nodes
.
size
(),
count0
+
1
);
// added a new FunctionBlock
ASSERT_EQ
(
11
,
count1
);
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc
已删除
100644 → 0
浏览文件 @
0b962680
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
#include "paddle/fluid/inference/analysis/node_attr_flags.h"
#include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
void
TensorRTSubgraphNodeMarkPass
::
Run
(
DataFlowGraph
*
graph
)
{
for
(
auto
&
node
:
graph
->
nodes
.
nodes
())
{
node
->
attr
(
ATTR_supported_by_tensorrt
).
Bool
()
=
teller_
(
node
.
get
());
}
}
class
DfgDebuggerPass
:
public
DFG_GraphvizDrawPass
{
public:
explicit
DfgDebuggerPass
(
const
DFG_GraphvizDrawPass
::
Config
&
config
)
:
DFG_GraphvizDrawPass
(
config
)
{}
std
::
string
repr
()
const
override
{
return
"tensorrt-subgraph-node-mark-debugger"
;
}
bool
Finalize
()
override
{
return
true
;
}
protected:
std
::
string
Draw
(
DataFlowGraph
*
graph
)
override
{
Dot
dot
;
// Add nodes
for
(
size_t
i
=
0
;
i
<
graph
->
nodes
.
size
();
i
++
)
{
const
Node
&
node
=
graph
->
nodes
.
Get
(
i
);
if
(
config_
.
display_deleted_node
||
!
node
.
deleted
())
{
auto
dot_attr
=
node
.
dot_attrs
();
if
(
node
.
attr
(
ATTR_supported_by_tensorrt
).
Bool
())
{
dot_attr
.
assign
(
{
Dot
::
Attr
{
"color"
,
"green"
},
Dot
::
Attr
{
"style"
,
"filled"
}});
}
dot
.
AddNode
(
node
.
repr
(),
dot_attr
);
}
}
// Add edges
for
(
size_t
i
=
0
;
i
<
graph
->
nodes
.
size
();
i
++
)
{
const
Node
&
node
=
graph
->
nodes
.
Get
(
i
);
if
(
!
config_
.
display_deleted_node
&&
node
.
deleted
())
continue
;
for
(
auto
&
in
:
node
.
inlinks
)
{
if
(
!
config_
.
display_deleted_node
&&
in
->
deleted
())
continue
;
dot
.
AddEdge
(
in
->
repr
(),
node
.
repr
(),
{});
}
}
return
dot
.
Build
();
}
};
AnalysisPass
*
TensorRTSubgraphNodeMarkPass
::
CreateGraphvizDebugerPass
()
const
{
DFG_GraphvizDrawPass
::
Config
config
(
FLAGS_IA_graphviz_log_root
,
"tensorrt_marked_node"
);
return
new
DfgDebuggerPass
(
config
);
}
bool
TensorRTSubgraphNodeMarkPass
::
Finalize
()
{
return
true
;
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass_tester.cc
已删除
100644 → 0
浏览文件 @
0b962680
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h"
#include <gtest/gtest.h>
#include "paddle/fluid/inference/analysis/node_attr_flags.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
TEST
(
TensorRTSubgraphNodeMarkPass
,
test
)
{
// init
FluidToDataFlowGraphPass
pass
;
Argument
argument
(
FLAGS_inference_model_dir
);
ASSERT_TRUE
(
pass
.
Initialize
(
&
argument
));
pass
.
Run
(
argument
.
main_dfg
.
get
());
TensorRTSubgraphNodeMarkPass
::
teller_t
teller
=
[](
const
Node
*
node
)
{
return
node
->
IsFunction
()
&&
static_cast
<
const
Function
*>
(
node
)
->
func_type
()
==
"mul"
;
};
TensorRTSubgraphNodeMarkPass
pass1
(
teller
);
ASSERT_TRUE
(
pass1
.
Initialize
(
&
argument
));
pass1
.
Run
(
argument
.
main_dfg
.
get
());
int
counter
{
0
};
for
(
auto
&
node
:
argument
.
main_dfg
->
nodes
.
nodes
())
{
counter
+=
node
->
attr
(
ATTR_supported_by_tensorrt
).
Bool
();
}
ASSERT_EQ
(
counter
,
2
);
LOG
(
INFO
)
<<
counter
<<
" nodes marked"
;
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
已删除
100644 → 0
浏览文件 @
0b962680
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h"
#include "paddle/fluid/inference/analysis/subgraph_splitter.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
TensorRTSubGraphPass
::
TensorRTSubGraphPass
(
const
TensorRTSubGraphPass
::
NodeInsideSubgraphTeller
&
teller
)
:
node_inside_subgraph_teller_
(
teller
)
{}
void
TensorRTSubGraphPass
::
Run
(
DataFlowGraph
*
graph
)
{
SubGraphFuse
(
graph
,
node_inside_subgraph_teller_
,
argument_
)();
VLOG
(
40
)
<<
"debug info "
<<
graph
->
HumanReadableInfo
(
false
/*show_values*/
,
true
/*show_functions*/
);
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h
已删除
100644 → 0
浏览文件 @
0b962680
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/inference/analysis/node.h"
#include "paddle/fluid/inference/analysis/subgraph_splitter.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
/*
* Parse the graph and replace TensorRT supported nodes with SubGraphNode
*/
class
TensorRTSubGraphPass
:
public
DataFlowGraphPass
{
public:
// Tell whether to transform a sub-graph into TensorRT.
using
NodeInsideSubgraphTeller
=
SubGraphFuse
::
NodeInsideSubgraphTeller
;
explicit
TensorRTSubGraphPass
(
const
NodeInsideSubgraphTeller
&
teller
);
bool
Initialize
(
Argument
*
argument
)
override
{
argument_
=
argument
;
return
true
;
}
// This class get a sub-graph as input and determine whether to transform this
// sub-graph into TensorRT.
void
Run
(
DataFlowGraph
*
graph
)
override
;
bool
Finalize
()
override
{
return
true
;
}
std
::
string
repr
()
const
override
{
return
"tensorrt-sub-graph"
;
}
std
::
string
description
()
const
override
{
return
"tensorrt sub graph pass"
;
}
private:
NodeInsideSubgraphTeller
node_inside_subgraph_teller_
;
Argument
*
argument_
;
};
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/tensorrt_subgraph_pass_tester.cc
已删除
100644 → 0
浏览文件 @
0b962680
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h"
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
DEFINE_string
(
dot_dir
,
"./"
,
""
);
TEST
(
TensorRTSubGraphPass
,
main
)
{
std
::
unordered_set
<
std
::
string
>
teller_set
(
{
"elementwise_add"
,
"mul"
,
"sigmoid"
});
SubGraphSplitter
::
NodeInsideSubgraphTeller
teller
=
[
&
](
const
Node
*
node
)
{
if
(
node
->
type
()
!=
Node
::
Type
::
kFunction
)
return
false
;
const
auto
*
func
=
static_cast
<
const
Function
*>
(
node
);
if
(
teller_set
.
count
(
func
->
func_type
()))
return
true
;
return
false
;
};
Argument
argument
(
FLAGS_inference_model_dir
);
argument
.
Set
<
int
>
(
"minimum_subgraph_size"
,
new
int
(
0
));
argument
.
Set
<
int
>
(
"max_batch_size"
,
new
int
(
3
));
argument
.
Set
<
int
>
(
"workspace_size"
,
new
int
(
1
<<
20
));
argument
.
Set
<
std
::
string
>
(
"precision_mode"
,
new
std
::
string
(
"FP32"
));
DFG_GraphvizDrawPass
::
Config
config
{
FLAGS_dot_dir
,
"origin"
};
DFG_GraphvizDrawPass
::
Config
config1
{
FLAGS_dot_dir
,
"fusion"
};
DFG_GraphvizDrawPass
dfg_pass
(
config
);
DFG_GraphvizDrawPass
dfg_pass1
(
config1
);
FluidToDataFlowGraphPass
pass0
;
TensorRTSubGraphPass
trt_pass
(
std
::
move
(
teller
));
dfg_pass
.
Initialize
(
&
argument
);
dfg_pass1
.
Initialize
(
&
argument
);
pass0
.
Initialize
(
&
argument
);
trt_pass
.
Initialize
(
&
argument
);
argument
.
main_dfg
.
reset
(
new
DataFlowGraph
);
pass0
.
Run
(
argument
.
main_dfg
.
get
());
dfg_pass
.
Run
(
argument
.
main_dfg
.
get
());
trt_pass
.
Run
(
argument
.
main_dfg
.
get
());
dfg_pass1
.
Run
(
argument
.
main_dfg
.
get
());
// Check the TRT op's block desc
for
(
auto
&
node
:
argument
.
main_dfg
->
nodes
.
nodes
())
{
if
(
node
->
IsFunctionBlock
())
{
LOG
(
INFO
)
<<
"get function block"
;
}
}
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/ut_helper.h
浏览文件 @
ddb12035
...
@@ -18,8 +18,6 @@ limitations under the License. */
...
@@ -18,8 +18,6 @@ limitations under the License. */
#include <fstream>
#include <fstream>
#include <string>
#include <string>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/helper.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -32,29 +30,6 @@ namespace analysis {
...
@@ -32,29 +30,6 @@ namespace analysis {
DEFINE_string
(
inference_model_dir
,
""
,
"inference test model dir"
);
DEFINE_string
(
inference_model_dir
,
""
,
"inference test model dir"
);
static
DataFlowGraph
ProgramDescToDFG
(
const
framework
::
proto
::
ProgramDesc
&
desc
)
{
DataFlowGraph
graph
;
FluidToDataFlowGraphPass
pass
;
Argument
argument
;
argument
.
fluid_model_dir
.
reset
(
new
std
::
string
(
FLAGS_inference_model_dir
));
argument
.
origin_program_desc
.
reset
(
new
framework
::
proto
::
ProgramDesc
(
desc
));
pass
.
Initialize
(
&
argument
);
pass
.
Run
(
&
graph
);
pass
.
Finalize
();
return
graph
;
}
class
DFG_Tester
:
public
::
testing
::
Test
{
protected:
void
SetUp
()
override
{
auto
desc
=
LoadProgramDesc
(
FLAGS_inference_model_dir
+
"/__model__"
);
argument
.
origin_program_desc
.
reset
(
new
framework
::
proto
::
ProgramDesc
(
desc
));
}
Argument
argument
;
};
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
ddb12035
...
@@ -17,17 +17,22 @@ if(APPLE)
...
@@ -17,17 +17,22 @@ if(APPLE)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-error=pessimizing-move"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-error=pessimizing-move"
)
endif
(
APPLE
)
endif
(
APPLE
)
set
(
inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager naive_executor
${
GLOB_PASS_LIB
}
)
set
(
inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager naive_executor analysis_predictor
${
GLOB_PASS_LIB
}
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
set
(
inference_deps
${
inference_deps
}
paddle_inference_tensorrt_subgraph_engine analysis_predicto
r
)
set
(
inference_deps
${
inference_deps
}
tensorrt_engine tensorrt_converte
r
)
endif
()
endif
()
cc_library
(
reset_tensor_array SRCS details/reset_tensor_array.cc DEPS lod_tensor scope
)
cc_library
(
reset_tensor_array SRCS details/reset_tensor_array.cc DEPS lod_tensor scope
)
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS reset_tensor_array lod_tensor scope
)
cc_library
(
analysis_config SRCS analysis_config.cc DEPS lod_tensor paddle_pass_builder
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis naive_executor zero_copy_tensor
)
cc_library
(
paddle_pass_builder SRCS paddle_pass_builder.cc
)
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor scope paddle_pass_builder reset_tensor_array analysis_config analysis_config paddle_pass_builder
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis naive_executor zero_copy_tensor reset_tensor_array analysis_config paddle_pass_builder
)
cc_library
(
zero_copy_tensor SRCS details/zero_copy_tensor.cc DEPS paddle_inference_api
)
cc_library
(
zero_copy_tensor SRCS details/zero_copy_tensor.cc DEPS paddle_inference_api
)
cc_library
(
zero_copy_tensor_dummy SRCS details/zero_copy_tensor_dummy.cc DEPS paddle_inference_api
)
cc_library
(
zero_copy_tensor_dummy SRCS details/zero_copy_tensor_dummy.cc DEPS paddle_inference_api
)
cc_test
(
test_paddle_inference_api
cc_test
(
test_paddle_inference_api
SRCS api_tester.cc
SRCS api_tester.cc
DEPS paddle_inference_api
)
DEPS paddle_inference_api
)
...
@@ -40,20 +45,10 @@ endif()
...
@@ -40,20 +45,10 @@ endif()
cc_test
(
test_analysis_predictor SRCS analysis_predictor_tester.cc DEPS analysis_predictor
${
inference_deps
}
cc_test
(
test_analysis_predictor SRCS analysis_predictor_tester.cc DEPS analysis_predictor
${
inference_deps
}
ARGS --dirname=
${
WORD2VEC_MODEL_DIR
}
)
ARGS --dirname=
${
WORD2VEC_MODEL_DIR
}
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
cc_library
(
paddle_inference_tensorrt_subgraph_engine
SRCS api_tensorrt_subgraph_engine.cc
DEPS paddle_inference_api analysis tensorrt_engine paddle_inference_api paddle_fluid_api tensorrt_converter zero_copy_tensor_dummy
)
if
(
WITH_TESTING
)
inference_base_test
(
test_api_tensorrt_subgraph_engine SRCS api_tensorrt_subgraph_engine_tester.cc DEPS
${
inference_deps
}
ARGS --dirname=
${
WORD2VEC_MODEL_DIR
}
)
endif
()
endif
()
if
(
WITH_ANAKIN AND WITH_MKL
)
# only needed in CI
if
(
WITH_ANAKIN AND WITH_MKL
)
# only needed in CI
# compile the libinference_anakin_api.a and anakin.so.
# compile the libinference_anakin_api.a and anakin.so.
cc_library
(
inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber mklml
scope
zero_copy_tensor_dummy
)
cc_library
(
inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber mklml zero_copy_tensor_dummy
)
cc_library
(
inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber
scope
)
cc_library
(
inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber
zero_copy_tensor_dummy
)
function
(
anakin_target target_name
)
function
(
anakin_target target_name
)
target_compile_options
(
${
target_name
}
BEFORE PUBLIC
${
ANAKIN_COMPILE_EXTRA_FLAGS
}
)
target_compile_options
(
${
target_name
}
BEFORE PUBLIC
${
ANAKIN_COMPILE_EXTRA_FLAGS
}
)
endfunction
()
endfunction
()
...
...
paddle/fluid/inference/api/README.md
浏览文件 @
ddb12035
...
@@ -2,25 +2,15 @@
...
@@ -2,25 +2,15 @@
Paddle inference offers the APIs in
`C`
and
`C++`
languages.
Paddle inference offers the APIs in
`C`
and
`C++`
languages.
One
can easily deploy a model trained by Paddle following the steps as below:
You
can easily deploy a model trained by Paddle following the steps as below:
1.
Optimize the native model;
1.
Optimize the native model;
2.
Write some codes for deployment.
2.
Write some codes for deployment.
## The APIs
Let's explain the steps in detail.
All the released APIs are located in the
`paddle_inference_api.h`
header file.
The stable APIs are wrapped by
`namespace paddle`
, the unstable APIs are protected by
`namespace paddle::contrib`
.
## Optimize the native Fluid Model
The native model that get from the training phase needs to be optimized for that.
-
Clean the noise such as the cost operators that do not need inference;
-
Prune unnecessary computation fork that has nothing to do with the output;
-
Remove extraneous variables;
-
Memory reuse for native Fluid executor;
-
Translate the model storage format to some third-party engine's, so that the inference API can utilize the engine for acceleration;
We have an official tool to do the optimization, call
`paddle_inference_optimize --help`
for more information.
## Write some codes
## Write some codes
...
...
paddle/fluid/inference/api/analysis_config.cc
0 → 100644
浏览文件 @
ddb12035
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle_pass_builder.h" // NOLINT
namespace
paddle
{
PassStrategy
*
contrib
::
AnalysisConfig
::
pass_builder
()
const
{
PADDLE_ENFORCE
(
pass_builder_
.
get
(),
"Should call constructor first, that will init the pass_builder_."
);
return
pass_builder_
.
get
();
}
contrib
::
AnalysisConfig
::
AnalysisConfig
(
bool
use_gpu
)
{
this
->
use_gpu
=
use_gpu
;
if
(
use_gpu
)
{
pass_builder_
.
reset
(
new
GpuPassStrategy
);
}
else
{
pass_builder_
.
reset
(
new
CpuPassStrategy
);
}
}
contrib
::
AnalysisConfig
::
AnalysisConfig
(
const
contrib
::
AnalysisConfig
&
other
)
{
// fields from Config
model_dir
=
other
.
model_dir
;
// fields from NativeConfig
use_gpu
=
other
.
use_gpu
;
device
=
other
.
device
;
fraction_of_gpu_memory
=
other
.
fraction_of_gpu_memory
;
prog_file
=
other
.
prog_file
;
param_file
=
other
.
param_file
;
specify_input_name
=
other
.
specify_input_name
;
// fields from this.
enable_ir_optim
=
other
.
enable_ir_optim
;
use_feed_fetch_ops
=
other
.
use_feed_fetch_ops
;
use_tensorrt_
=
other
.
use_tensorrt_
;
tensorrt_max_batchsize_
=
other
.
tensorrt_max_batchsize_
;
tensorrt_workspace_size_
=
other
.
tensorrt_workspace_size_
;
if
(
use_gpu
)
{
pass_builder_
.
reset
(
new
GpuPassStrategy
(
*
static_cast
<
GpuPassStrategy
*>
(
other
.
pass_builder
())));
}
else
{
pass_builder_
.
reset
(
new
CpuPassStrategy
(
*
static_cast
<
CpuPassStrategy
*>
(
other
.
pass_builder
())));
}
}
contrib
::
AnalysisConfig
::
AnalysisConfig
(
contrib
::
AnalysisConfig
&&
other
)
{
// fields from Config
model_dir
=
other
.
model_dir
;
// fields from NativeConfig
use_gpu
=
other
.
use_gpu
;
device
=
other
.
device
;
fraction_of_gpu_memory
=
other
.
fraction_of_gpu_memory
;
prog_file
=
other
.
prog_file
;
param_file
=
other
.
param_file
;
specify_input_name
=
other
.
specify_input_name
;
// fields from this.
enable_ir_optim
=
other
.
enable_ir_optim
;
use_feed_fetch_ops
=
other
.
use_feed_fetch_ops
;
use_tensorrt_
=
other
.
use_tensorrt_
;
tensorrt_max_batchsize_
=
other
.
tensorrt_max_batchsize_
;
tensorrt_workspace_size_
=
other
.
tensorrt_workspace_size_
;
pass_builder_
=
std
::
move
(
other
.
pass_builder_
);
}
void
contrib
::
AnalysisConfig
::
EnableMKLDNN
()
{
#ifdef PADDLE_WITH_MKLDNN
pass_builder
()
->
EnableMKLDNN
();
use_mkldnn_
=
true
;
#else
LOG
(
ERROR
)
<<
"Please compile with MKLDNN first to use MKLDNN"
;
use_mkldnn_
=
false
;
#endif
}
void
contrib
::
AnalysisConfig
::
EnableTensorRtEngine
(
int
workspace_size
,
int
max_batch_size
)
{
use_tensorrt_
=
true
;
tensorrt_workspace_size_
=
workspace_size
;
tensorrt_max_batchsize_
=
max_batch_size
;
// Append after the infer_clean pass.
pass_builder
()
->
InsertPass
(
1
,
"tensorrt_subgraph_pass"
);
}
}
// namespace paddle
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
ddb12035
...
@@ -13,10 +13,13 @@
...
@@ -13,10 +13,13 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include <glog/logging.h>
#include <algorithm>
#include <memory>
#include <memory>
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/framework/naive_executor.h"
...
@@ -24,6 +27,9 @@
...
@@ -24,6 +27,9 @@
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#if PADDLE_WITH_TENSORRT
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#endif
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
...
@@ -35,6 +41,17 @@ namespace paddle {
...
@@ -35,6 +41,17 @@ namespace paddle {
using
contrib
::
AnalysisConfig
;
using
contrib
::
AnalysisConfig
;
namespace
{
bool
IsPersistable
(
const
framework
::
VarDesc
*
var
)
{
if
(
var
->
Persistable
()
&&
var
->
GetType
()
!=
framework
::
proto
::
VarType
::
FEED_MINIBATCH
&&
var
->
GetType
()
!=
framework
::
proto
::
VarType
::
FETCH_LIST
)
{
return
true
;
}
return
false
;
}
}
// namespace
bool
AnalysisPredictor
::
Init
(
bool
AnalysisPredictor
::
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>
&
parent_scope
,
const
std
::
shared_ptr
<
framework
::
Scope
>
&
parent_scope
,
const
std
::
shared_ptr
<
framework
::
ProgramDesc
>
&
program
)
{
const
std
::
shared_ptr
<
framework
::
ProgramDesc
>
&
program
)
{
...
@@ -52,36 +69,93 @@ bool AnalysisPredictor::Init(
...
@@ -52,36 +69,93 @@ bool AnalysisPredictor::Init(
// no matter with or without MKLDNN
// no matter with or without MKLDNN
paddle
::
platform
::
SetNumThreads
(
FLAGS_paddle_num_threads
);
paddle
::
platform
::
SetNumThreads
(
FLAGS_paddle_num_threads
);
if
(
config_
.
use_gpu
)
{
if
(
!
PrepareScope
(
parent_scope
))
{
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
return
false
;
LOG
(
WARNING
)
<<
"ir optimize only supports CPU currently, enable_ir_optim "
}
"is turned false."
;
if
(
!
CreateExecutor
())
{
config_
.
enable_ir_optim
=
false
;
return
false
;
}
else
{
}
place_
=
paddle
::
platform
::
CPUPlace
();
if
(
!
PrepareProgram
(
program
))
{
return
false
;
}
}
// Prepare executor, create local variables.
if
(
!
PrepareExecutor
())
{
return
true
;
}
// Get the feed_target_names and fetch_target_names
PrepareFeedFetch
();
return
true
;
}
bool
AnalysisPredictor
::
PrepareScope
(
const
std
::
shared_ptr
<
framework
::
Scope
>
&
parent_scope
)
{
if
(
parent_scope
)
{
if
(
parent_scope
)
{
PADDLE_ENFORCE_NOT_NULL
(
parent_scope
,
"Both program and parent_scope should be set in Clone mode."
);
scope_
=
parent_scope
;
scope_
=
parent_scope
;
s
ub_scope_
=
&
(
parent_scope
->
NewScope
())
;
s
tatus_is_cloned_
=
true
;
}
else
{
}
else
{
paddle
::
framework
::
InitDevices
(
false
);
paddle
::
framework
::
InitDevices
(
false
);
scope_
.
reset
(
new
paddle
::
framework
::
Scope
());
scope_
.
reset
(
new
paddle
::
framework
::
Scope
());
status_is_cloned_
=
false
;
}
}
sub_scope_
=
&
scope_
->
NewScope
();
executor_
.
reset
(
new
paddle
::
framework
::
NaiveExecutor
(
place_
));
return
true
;
}
bool
AnalysisPredictor
::
PrepareProgram
(
const
std
::
shared_ptr
<
framework
::
ProgramDesc
>
&
program
)
{
if
(
!
program
)
{
if
(
!
program
)
{
if
(
!
LoadProgramDesc
())
return
false
;
if
(
!
LoadProgramDesc
())
return
false
;
// Optimize the program, and load parameters and modify them in the
// scope_.
// This will change the scope_ address.
if
(
config_
.
enable_ir_optim
)
{
status_ir_optim_enabled_
=
true
;
OptimizeInferenceProgram
();
OptimizeInferenceProgram
();
}
else
{
}
else
{
// If the parent_scope is passed, we assert that the persistable variables
// are already created, so just create the no persistable variables.
// If not cloned, the parameters should be loaded
// OptimizeInferenceProgram.
// So in both cases, just the local variables are needed to load, not the
// parematers.
executor_
->
CreateVariables
(
*
inference_program_
,
0
,
true
,
sub_scope_
);
// Load parameters
LOG
(
INFO
)
<<
"load parameters "
;
LoadParameters
();
}
}
else
{
// If the program is passed from external, no need to optimize it, this
// logic is used in the clone scenario.
inference_program_
=
program
;
inference_program_
=
program
;
}
}
executor_
->
Prepare
(
scope_
.
get
(),
*
inference_program_
,
0
,
executor_
->
CreateVariables
(
*
inference_program_
,
0
,
false
,
sub_scope_
);
return
true
;
}
bool
AnalysisPredictor
::
CreateExecutor
()
{
if
(
config_
.
use_gpu
)
{
status_use_gpu_
=
true
;
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
}
else
{
place_
=
paddle
::
platform
::
CPUPlace
();
}
executor_
.
reset
(
new
paddle
::
framework
::
NaiveExecutor
(
place_
));
return
true
;
}
bool
AnalysisPredictor
::
PrepareExecutor
()
{
executor_
->
Prepare
(
sub_scope_
,
*
inference_program_
,
0
,
config_
.
use_feed_fetch_ops
);
config_
.
use_feed_fetch_ops
);
// Get the feed_target_names and fetch_target_names
PADDLE_ENFORCE_NOT_NULL
(
sub_scope_
);
PrepareFeedFetch
();
return
true
;
return
true
;
}
}
...
@@ -206,54 +280,40 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
...
@@ -206,54 +280,40 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
return
true
;
return
true
;
}
}
// NOTE All the members in AnalysisConfig should be copied to Argument.
void
AnalysisPredictor
::
OptimizeInferenceProgram
()
{
void
AnalysisPredictor
::
OptimizeInferenceProgram
()
{
LOG
(
INFO
)
<<
"optimize begin"
;
status_program_optimized_
=
true
;
FLAGS_IA_enable_ir
=
config_
.
enable_ir_optim
;
FLAGS_IA_enable_tensorrt_subgraph_engine
=
false
;
argument_
.
SetUseGPU
(
config_
.
use_gpu
);
FLAGS_IA_output_storage_path
=
""
;
// Don't output the model.
// Analyze inference_program
// Analyze inference_program
if
(
!
config_
.
model_dir
.
empty
())
{
if
(
!
config_
.
model_dir
.
empty
())
{
argument_
.
fluid_model_dir
.
reset
(
new
std
::
string
(
config_
.
model_dir
)
);
argument_
.
SetModelDir
(
config_
.
model_dir
);
}
else
{
}
else
{
PADDLE_ENFORCE
(
PADDLE_ENFORCE
(
!
config_
.
param_file
.
empty
(),
!
config_
.
param_file
.
empty
(),
"Either model_dir or (param_file, prog_file) should be set."
);
"Either model_dir or (param_file, prog_file) should be set."
);
PADDLE_ENFORCE
(
!
config_
.
prog_file
.
empty
());
PADDLE_ENFORCE
(
!
config_
.
prog_file
.
empty
());
argument_
.
fluid_model_program_path
.
reset
(
argument_
.
SetModelProgramPath
(
config_
.
prog_file
);
new
std
::
string
(
config_
.
prog_file
));
argument_
.
SetModelParamsPath
(
config_
.
param_file
);
argument_
.
fluid_model_param_path
.
reset
(
new
std
::
string
(
config_
.
param_file
));
}
argument_
.
origin_program_desc
.
reset
(
new
ProgramDesc
(
*
inference_program_
->
Proto
()));
switch
(
config_
.
ir_mode
)
{
case
contrib
::
AnalysisConfig
::
IrPassMode
::
kExclude
:
Analyzer
()
.
IncludeAllIrPasses
()
.
SetUseMkldnn
(
config_
.
_use_mkldnn
)
.
DisableIrPasses
(
config_
.
ir_passes
)
.
Run
(
&
argument_
);
break
;
case
contrib
::
AnalysisConfig
::
IrPassMode
::
kInclude
:
Analyzer
()
.
SetUseMkldnn
(
config_
.
_use_mkldnn
)
.
IncludeIrPasses
(
config_
.
ir_passes
)
.
Run
(
&
argument_
);
break
;
default:
LOG
(
ERROR
)
<<
"Only kExclude and kInclude modes are supoorted yet."
;
}
CHECK
(
argument_
.
transformed_program_desc
);
VLOG
(
50
)
<<
"to prepare executor"
;
inference_program_
.
reset
(
new
framework
::
ProgramDesc
(
*
argument_
.
transformed_program_desc
));
if
(
argument_
.
Has
(
framework
::
ir
::
kParamScopeAttr
))
{
// Update scope.
scope_
.
reset
(
argument_
.
Release
<
framework
::
Scope
>
(
framework
::
ir
::
kParamScopeAttr
));
}
}
if
(
config_
.
use_gpu
&&
config_
.
use_tensorrt_
)
{
argument_
.
SetUseTensorRT
(
true
);
argument_
.
SetTensorRtWorkspaceSize
(
config_
.
tensorrt_workspace_size_
);
argument_
.
SetTensorRtMaxBatchSize
(
config_
.
tensorrt_max_batchsize_
);
}
auto
passes
=
config_
.
pass_builder
()
->
AllPasses
();
if
(
!
config_
.
enable_ir_optim
)
passes
.
clear
();
argument_
.
SetIrAnalysisPasses
(
passes
);
argument_
.
SetScopeNotOwned
(
const_cast
<
framework
::
Scope
*>
(
scope_
.
get
()));
Analyzer
().
Run
(
&
argument_
);
PADDLE_ENFORCE
(
argument_
.
scope_valid
());
VLOG
(
5
)
<<
"to prepare executor"
;
ARGUMENT_CHECK_FIELD
((
&
argument_
),
ir_analyzed_program
);
inference_program_
.
reset
(
new
framework
::
ProgramDesc
(
argument_
.
ir_analyzed_program
()));
LOG
(
INFO
)
<<
"== optimize end =="
;
LOG
(
INFO
)
<<
"== optimize end =="
;
}
}
...
@@ -283,10 +343,12 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
...
@@ -283,10 +343,12 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
if
(
!
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
())
->
Init
(
nullptr
))
{
if
(
!
dynamic_cast
<
AnalysisPredictor
*>
(
predictor
.
get
())
->
Init
(
nullptr
))
{
return
nullptr
;
return
nullptr
;
}
}
return
predictor
;
return
std
::
move
(
predictor
)
;
}
}
void
AnalysisPredictor
::
PrepareFeedFetch
()
{
void
AnalysisPredictor
::
PrepareFeedFetch
()
{
PADDLE_ENFORCE_NOT_NULL
(
sub_scope_
);
CreateFeedFetchVar
(
sub_scope_
);
for
(
auto
*
op
:
inference_program_
->
Block
(
0
).
AllOps
())
{
for
(
auto
*
op
:
inference_program_
->
Block
(
0
).
AllOps
())
{
if
(
op
->
Type
()
==
"feed"
)
{
if
(
op
->
Type
()
==
"feed"
)
{
int
idx
=
boost
::
get
<
int
>
(
op
->
GetAttr
(
"col"
));
int
idx
=
boost
::
get
<
int
>
(
op
->
GetAttr
(
"col"
));
...
@@ -305,6 +367,14 @@ void AnalysisPredictor::PrepareFeedFetch() {
...
@@ -305,6 +367,14 @@ void AnalysisPredictor::PrepareFeedFetch() {
}
}
}
}
void
AnalysisPredictor
::
CreateFeedFetchVar
(
framework
::
Scope
*
scope
)
{
PADDLE_ENFORCE_NOT_NULL
(
scope
);
auto
*
var
=
scope
->
Var
(
"feed"
);
var
->
GetMutable
<
framework
::
FeedFetchList
>
();
var
=
scope
->
Var
(
"fetch"
);
var
->
GetMutable
<
framework
::
FeedFetchList
>
();
}
std
::
unique_ptr
<
ZeroCopyTensor
>
AnalysisPredictor
::
GetInputTensor
(
std
::
unique_ptr
<
ZeroCopyTensor
>
AnalysisPredictor
::
GetInputTensor
(
const
std
::
string
&
name
)
{
const
std
::
string
&
name
)
{
PADDLE_ENFORCE
(
executor_
->
scope
()
->
FindVar
(
name
),
"no name called %s"
,
name
);
PADDLE_ENFORCE
(
executor_
->
scope
()
->
FindVar
(
name
),
"no name called %s"
,
name
);
...
@@ -335,27 +405,98 @@ bool AnalysisPredictor::ZeroCopyRun() {
...
@@ -335,27 +405,98 @@ bool AnalysisPredictor::ZeroCopyRun() {
bool
AnalysisPredictor
::
LoadProgramDesc
()
{
bool
AnalysisPredictor
::
LoadProgramDesc
()
{
// Initialize the inference program
// Initialize the inference program
std
::
unique_ptr
<
framework
::
Executor
>
tmp_exe
(
std
::
string
filename
;
new
framework
::
Executor
(
platform
::
CPUPlace
()));
if
(
!
config_
.
model_dir
.
empty
())
{
if
(
!
config_
.
model_dir
.
empty
())
{
// Parameters are saved in separate files sited in
filename
=
config_
.
model_dir
+
"/__model__"
;
// the specified `dirname`.
inference_program_
=
paddle
::
inference
::
Load
(
static_cast
<
framework
::
Executor
*>
(
tmp_exe
.
get
()),
scope_
.
get
(),
config_
.
model_dir
);
}
else
if
(
!
config_
.
prog_file
.
empty
()
&&
!
config_
.
param_file
.
empty
())
{
}
else
if
(
!
config_
.
prog_file
.
empty
()
&&
!
config_
.
param_file
.
empty
())
{
// All parameters are saved in a single file.
// All parameters are saved in a single file.
// The file names should be consistent with that used
// The file names should be consistent with that used
// in Python API `fluid.io.save_inference_model`.
// in Python API `fluid.io.save_inference_model`.
inference_program_
=
paddle
::
inference
::
Load
(
filename
=
config_
.
prog_file
;
static_cast
<
framework
::
Executor
*>
(
tmp_exe
.
get
()),
scope_
.
get
(),
config_
.
prog_file
,
config_
.
param_file
);
}
else
{
}
else
{
if
(
config_
.
model_dir
.
empty
()
&&
config_
.
prog_file
.
empty
())
{
LOG
(
ERROR
)
<<
"Either model_dir or (prog_file, param_file) should be set."
;
return
false
;
}
LOG
(
ERROR
)
<<
string
::
Sprintf
(
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"not valid model path '%s' or program path '%s'."
,
config_
.
model_dir
,
"not valid model path '%s' or program path '%s'."
,
config_
.
model_dir
,
config_
.
param_file
);
config_
.
param_file
);
return
false
;
return
false
;
}
}
std
::
string
pb_content
;
// Read binary
std
::
ifstream
fin
(
filename
,
std
::
ios
::
in
|
std
::
ios
::
binary
);
PADDLE_ENFORCE
(
static_cast
<
bool
>
(
fin
),
"Cannot open file %s"
,
filename
);
fin
.
seekg
(
0
,
std
::
ios
::
end
);
pb_content
.
resize
(
fin
.
tellg
());
fin
.
seekg
(
0
,
std
::
ios
::
beg
);
fin
.
read
(
&
(
pb_content
.
at
(
0
)),
pb_content
.
size
());
fin
.
close
();
// Create ProgramDesc
framework
::
proto
::
ProgramDesc
proto
;
proto
.
ParseFromString
(
pb_content
);
inference_program_
.
reset
(
new
framework
::
ProgramDesc
(
proto
));
return
true
;
}
bool
AnalysisPredictor
::
LoadParameters
()
{
PADDLE_ENFORCE_NOT_NULL
(
inference_program_
.
get
(),
"The inference program should be loaded first."
);
const
auto
&
global_block
=
inference_program_
->
MutableBlock
(
0
);
// create a temporary program to load parameters.
std
::
unique_ptr
<
framework
::
ProgramDesc
>
load_program
(
new
framework
::
ProgramDesc
());
framework
::
BlockDesc
*
load_block
=
load_program
->
MutableBlock
(
0
);
std
::
vector
<
std
::
string
>
params
;
for
(
auto
*
var
:
global_block
->
AllVars
())
{
if
(
IsPersistable
(
var
))
{
VLOG
(
3
)
<<
"persistable variable's name: "
<<
var
->
Name
();
framework
::
VarDesc
*
new_var
=
load_block
->
Var
(
var
->
Name
());
new_var
->
SetShape
(
var
->
GetShape
());
new_var
->
SetDataType
(
var
->
GetDataType
());
new_var
->
SetType
(
var
->
GetType
());
new_var
->
SetLoDLevel
(
var
->
GetLoDLevel
());
new_var
->
SetPersistable
(
true
);
if
(
!
config_
.
param_file
.
empty
())
{
params
.
push_back
(
new_var
->
Name
());
}
else
{
// append_op
framework
::
OpDesc
*
op
=
load_block
->
AppendOp
();
op
->
SetType
(
"load"
);
op
->
SetOutput
(
"Out"
,
{
new_var
->
Name
()});
op
->
SetAttr
(
"file_path"
,
{
config_
.
model_dir
+
"/"
+
new_var
->
Name
()});
op
->
CheckAttrs
();
}
}
}
if
(
!
config_
.
param_file
.
empty
())
{
// sort paramlist to have consistent ordering
std
::
sort
(
params
.
begin
(),
params
.
end
());
// append just the load_combine op
framework
::
OpDesc
*
op
=
load_block
->
AppendOp
();
op
->
SetType
(
"load_combine"
);
op
->
SetOutput
(
"Out"
,
params
);
op
->
SetAttr
(
"file_path"
,
{
config_
.
param_file
});
op
->
CheckAttrs
();
}
// Use NaiveExecutor to Load parameters.
platform
::
CPUPlace
place
;
framework
::
NaiveExecutor
e
(
place
);
e
.
Prepare
(
scope_
.
get
(),
*
load_program
,
0
,
false
);
e
.
Run
();
VLOG
(
3
)
<<
"get "
<<
scope_
->
LocalVarNames
().
size
()
<<
" vars after load"
;
return
true
;
return
true
;
}
}
...
@@ -385,3 +526,27 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<contrib::AnalysisConfig>(
...
@@ -385,3 +526,27 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<contrib::AnalysisConfig>(
}
}
}
// namespace paddle
}
// namespace paddle
#if PADDLE_WITH_TENSORRT
USE_TRT_CONVERTER
(
elementwise_add_weight
);
USE_TRT_CONVERTER
(
elementwise_add_tensor
);
USE_TRT_CONVERTER
(
elementwise_sub_tensor
);
USE_TRT_CONVERTER
(
elementwise_div_tensor
);
USE_TRT_CONVERTER
(
elementwise_mul_tensor
);
USE_TRT_CONVERTER
(
elementwise_max_tensor
);
USE_TRT_CONVERTER
(
elementwise_min_tensor
);
USE_TRT_CONVERTER
(
elementwise_pow_tensor
);
USE_TRT_CONVERTER
(
mul
);
USE_TRT_CONVERTER
(
conv2d
);
USE_TRT_CONVERTER
(
relu
);
USE_TRT_CONVERTER
(
sigmoid
);
USE_TRT_CONVERTER
(
tanh
);
USE_TRT_CONVERTER
(
fc
);
USE_TRT_CONVERTER
(
pool2d
);
USE_TRT_CONVERTER
(
softmax
);
USE_TRT_CONVERTER
(
batch_norm
);
USE_TRT_CONVERTER
(
concat
);
USE_TRT_CONVERTER
(
dropout
);
USE_TRT_CONVERTER
(
pad
);
USE_TRT_CONVERTER
(
split
);
#endif
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
ddb12035
...
@@ -23,7 +23,10 @@
...
@@ -23,7 +23,10 @@
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/string/printf.h"
#ifdef PADDLE_WITH_TESTING
#include <gtest/gtest.h>
#include <gtest/gtest_prod.h>
#endif
namespace
paddle
{
namespace
paddle
{
using
inference
::
analysis
::
Argument
;
using
inference
::
analysis
::
Argument
;
...
@@ -54,6 +57,7 @@ class AnalysisPredictor : public PaddlePredictor {
...
@@ -54,6 +57,7 @@ class AnalysisPredictor : public PaddlePredictor {
bool
ZeroCopyRun
()
override
;
bool
ZeroCopyRun
()
override
;
void
CreateFeedFetchVar
(
framework
::
Scope
*
scope
);
void
PrepareFeedFetch
();
void
PrepareFeedFetch
();
void
OptimizeInferenceProgram
();
void
OptimizeInferenceProgram
();
...
@@ -62,11 +66,17 @@ class AnalysisPredictor : public PaddlePredictor {
...
@@ -62,11 +66,17 @@ class AnalysisPredictor : public PaddlePredictor {
std
::
unique_ptr
<
PaddlePredictor
>
Clone
()
override
;
std
::
unique_ptr
<
PaddlePredictor
>
Clone
()
override
;
framework
::
Scope
*
scope
()
{
return
executor_
->
scope
();
}
framework
::
Scope
*
scope
()
{
return
scope_
.
get
();
}
framework
::
ProgramDesc
&
program
()
{
return
*
inference_program_
;
}
framework
::
ProgramDesc
&
program
()
{
return
*
inference_program_
;
}
protected:
protected:
bool
PrepareProgram
(
const
std
::
shared_ptr
<
framework
::
ProgramDesc
>
&
program
);
bool
PrepareScope
(
const
std
::
shared_ptr
<
framework
::
Scope
>
&
parent_scope
);
bool
CreateExecutor
();
bool
PrepareExecutor
();
bool
LoadProgramDesc
();
bool
LoadProgramDesc
();
bool
LoadParameters
();
bool
SetFeed
(
const
std
::
vector
<
PaddleTensor
>
&
input_datas
,
bool
SetFeed
(
const
std
::
vector
<
PaddleTensor
>
&
input_datas
,
framework
::
Scope
*
scope
);
framework
::
Scope
*
scope
);
...
@@ -77,6 +87,14 @@ class AnalysisPredictor : public PaddlePredictor {
...
@@ -77,6 +87,14 @@ class AnalysisPredictor : public PaddlePredictor {
PaddleTensor
*
output_data
);
PaddleTensor
*
output_data
);
~
AnalysisPredictor
();
~
AnalysisPredictor
();
// Some more detailed tests, they are made the friends of the predictor, so that
// the all the details can be tested.
#if PADDLE_WITH_TESTING
FRIEND_TEST
(
AnalysisPredictor
,
analysis_off
);
FRIEND_TEST
(
AnalysisPredictor
,
analysis_on
);
FRIEND_TEST
(
AnalysisPredictor
,
with_gpu
);
#endif
private:
private:
contrib
::
AnalysisConfig
config_
;
contrib
::
AnalysisConfig
config_
;
Argument
argument_
;
Argument
argument_
;
...
@@ -92,6 +110,13 @@ class AnalysisPredictor : public PaddlePredictor {
...
@@ -92,6 +110,13 @@ class AnalysisPredictor : public PaddlePredictor {
// concurrency problems, so cache them.
// concurrency problems, so cache them.
std
::
vector
<
framework
::
LoDTensor
>
feed_tensors_
;
std
::
vector
<
framework
::
LoDTensor
>
feed_tensors_
;
details
::
TensorArrayBatchCleaner
tensor_array_batch_cleaner_
;
details
::
TensorArrayBatchCleaner
tensor_array_batch_cleaner_
;
private:
// Some status here that help to determine the status inside the predictor.
bool
status_program_optimized_
{
false
};
bool
status_is_cloned_
{
false
};
bool
status_use_gpu_
{
false
};
bool
status_ir_optim_enabled_
{
false
};
};
};
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/api/analysis_predictor_tester.cc
浏览文件 @
ddb12035
...
@@ -12,16 +12,85 @@
...
@@ -12,16 +12,85 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include <glog/logging.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <thread>
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
DEFINE_string
(
dirname
,
""
,
"dirname to tests."
);
DEFINE_string
(
dirname
,
""
,
"dirname to tests."
);
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
using
contrib
::
AnalysisConfig
;
using
contrib
::
AnalysisConfig
;
TEST
(
AnalysisPredictor
,
analysis_off
)
{
AnalysisConfig
config
(
false
);
config
.
model_dir
=
FLAGS_dirname
;
config
.
enable_ir_optim
=
false
;
auto
_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
auto
*
predictor
=
static_cast
<
AnalysisPredictor
*>
(
_predictor
.
get
());
// Without analysis, the scope_ and sub_scope_ are created by predictor
// itself.
ASSERT_TRUE
(
predictor
->
scope_
);
ASSERT_TRUE
(
predictor
->
sub_scope_
);
ASSERT_EQ
(
predictor
->
scope_
->
parent
(),
nullptr
);
ASSERT_EQ
(
predictor
->
sub_scope_
->
parent
(),
predictor
->
scope_
.
get
());
// ir is turned off, so program shouldn't be optimized.
ASSERT_FALSE
(
predictor
->
status_program_optimized_
);
LOG
(
INFO
)
<<
"scope parameters "
<<
predictor
->
scope_
->
LocalVarNames
().
size
();
// 2. Dummy Input Data
int64_t
data
[
4
]
=
{
1
,
2
,
3
,
4
};
PaddleTensor
tensor
;
tensor
.
shape
=
std
::
vector
<
int
>
({
4
,
1
});
tensor
.
data
.
Reset
(
data
,
sizeof
(
data
));
tensor
.
dtype
=
PaddleDType
::
INT64
;
std
::
vector
<
PaddleTensor
>
inputs
(
4
,
tensor
);
std
::
vector
<
PaddleTensor
>
outputs
;
ASSERT_TRUE
(
predictor
->
Run
(
inputs
,
&
outputs
));
}
TEST
(
AnalysisPredictor
,
analysis_on
)
{
AnalysisConfig
config
(
false
);
config
.
model_dir
=
FLAGS_dirname
;
config
.
enable_ir_optim
=
true
;
auto
_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
auto
*
predictor
=
static_cast
<
AnalysisPredictor
*>
(
_predictor
.
get
());
ASSERT_TRUE
(
predictor
->
scope_
);
ASSERT_TRUE
(
predictor
->
sub_scope_
);
ASSERT_EQ
(
predictor
->
scope_
->
parent
(),
nullptr
);
ASSERT_EQ
(
predictor
->
sub_scope_
->
parent
(),
predictor
->
scope_
.
get
());
// ir is turned on, so program should be optimized.
ASSERT_TRUE
(
predictor
->
status_program_optimized_
);
// 2. Dummy Input Data
int64_t
data
[
4
]
=
{
1
,
2
,
3
,
4
};
PaddleTensor
tensor
;
tensor
.
shape
=
std
::
vector
<
int
>
({
4
,
1
});
tensor
.
data
.
Reset
(
data
,
sizeof
(
data
));
tensor
.
dtype
=
PaddleDType
::
INT64
;
std
::
vector
<
PaddleTensor
>
inputs
(
4
,
tensor
);
std
::
vector
<
PaddleTensor
>
outputs
;
ASSERT_TRUE
(
predictor
->
Run
(
inputs
,
&
outputs
));
for
(
auto
&
output
:
outputs
)
{
LOG
(
INFO
)
<<
inference
::
DescribeTensor
(
output
);
}
// compare with NativePredictor
auto
naive_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
std
::
vector
<
PaddleTensor
>
naive_outputs
;
ASSERT_TRUE
(
naive_predictor
->
Run
(
inputs
,
&
naive_outputs
));
ASSERT_EQ
(
naive_outputs
.
size
(),
1UL
);
inference
::
CompareTensor
(
outputs
.
front
(),
naive_outputs
.
front
());
}
TEST
(
AnalysisPredictor
,
ZeroCopy
)
{
TEST
(
AnalysisPredictor
,
ZeroCopy
)
{
AnalysisConfig
config
;
AnalysisConfig
config
;
config
.
model_dir
=
FLAGS_dirname
;
config
.
model_dir
=
FLAGS_dirname
;
...
@@ -61,5 +130,59 @@ TEST(AnalysisPredictor, ZeroCopy) {
...
@@ -61,5 +130,59 @@ TEST(AnalysisPredictor, ZeroCopy) {
LOG
(
INFO
)
<<
"output_data: "
<<
out_data
;
LOG
(
INFO
)
<<
"output_data: "
<<
out_data
;
}
}
}
// namespace inference
TEST
(
AnalysisPredictor
,
Clone
)
{
AnalysisConfig
config
;
config
.
model_dir
=
FLAGS_dirname
;
config
.
use_feed_fetch_ops
=
true
;
config
.
enable_ir_optim
=
true
;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
predictors
.
emplace_back
(
CreatePaddlePredictor
(
config
));
LOG
(
INFO
)
<<
"************** to clone ************************"
;
const
int
num_threads
=
3
;
for
(
int
i
=
1
;
i
<
num_threads
;
i
++
)
{
predictors
.
emplace_back
(
predictors
.
front
()
->
Clone
());
}
auto
*
root_scope
=
static_cast
<
AnalysisPredictor
*>
(
predictors
[
0
].
get
())
->
scope
();
ASSERT_FALSE
(
root_scope
->
kids
().
empty
());
LOG
(
INFO
)
<<
"***** scope ******
\n
"
<<
framework
::
GenScopeTreeDebugInfo
(
root_scope
);
// 2. Dummy Input Data
int64_t
data
[
4
]
=
{
1
,
2
,
3
,
4
};
PaddleTensor
tensor
;
tensor
.
shape
=
std
::
vector
<
int
>
({
4
,
1
});
tensor
.
data
.
Reset
(
data
,
sizeof
(
data
));
tensor
.
dtype
=
PaddleDType
::
INT64
;
std
::
vector
<
PaddleTensor
>
inputs
(
4
,
tensor
);
std
::
vector
<
PaddleTensor
>
outputs
;
predictors
[
0
]
->
Run
(
inputs
,
&
outputs
);
LOG
(
INFO
)
<<
"Run with single thread"
;
for
(
int
i
=
0
;
i
<
num_threads
;
i
++
)
{
LOG
(
INFO
)
<<
"run predictor "
<<
i
;
ASSERT_TRUE
(
predictors
[
i
]
->
Run
(
inputs
,
&
outputs
));
}
LOG
(
INFO
)
<<
"Run with multiple threads"
;
std
::
vector
<
std
::
thread
>
threads
;
for
(
int
i
=
0
;
i
<
num_threads
;
i
++
)
{
threads
.
emplace_back
([
&
predictors
,
&
inputs
,
i
]
{
LOG
(
INFO
)
<<
"thread #"
<<
i
<<
" running"
;
std
::
vector
<
PaddleTensor
>
outputs
;
for
(
int
j
=
0
;
j
<
10
;
j
++
)
{
ASSERT_TRUE
(
predictors
[
i
]
->
Run
(
inputs
,
&
outputs
));
}
});
}
for
(
auto
&
t
:
threads
)
{
t
.
join
();
}
}
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/api/api.cc
浏览文件 @
ddb12035
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_pass_builder.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/inference/api/api_anakin_engine.h
浏览文件 @
ddb12035
...
@@ -19,11 +19,13 @@ limitations under the License. */
...
@@ -19,11 +19,13 @@ limitations under the License. */
#pragma once
#pragma once
#define WITH_ANAKIN
#include <vector>
#include <vector>
#include "framework/core/net/net.h"
#include "framework/core/net/net.h"
#include "framework/graph/graph.h"
#include "framework/graph/graph.h"
#include "paddle/fluid/inference/api/paddle_
inference_api
.h"
#include "paddle/fluid/inference/api/paddle_
anakin_config
.h"
#include "saber/core/shape.h"
#include "saber/core/shape.h"
#include "saber/saber_types.h"
#include "saber/saber_types.h"
...
...
paddle/fluid/inference/api/api_impl_tester.cc
浏览文件 @
ddb12035
...
@@ -292,7 +292,14 @@ TEST(inference_api_native, image_classification_gpu) {
...
@@ -292,7 +292,14 @@ TEST(inference_api_native, image_classification_gpu) {
// TEST(inference_api_native, image_classification_gpu_threads) {
// TEST(inference_api_native, image_classification_gpu_threads) {
// MainThreadsImageClassification(true /*use_gpu*/);
// MainThreadsImageClassification(true /*use_gpu*/);
// }
// }
#endif
#endif
TEST
(
PassBuilder
,
Delete
)
{
contrib
::
AnalysisConfig
config
(
false
);
config
.
pass_builder
()
->
DeletePass
(
"attention_lstm_fuse_pass"
);
const
auto
&
passes
=
config
.
pass_builder
()
->
AllPasses
();
auto
it
=
std
::
find
(
passes
.
begin
(),
passes
.
end
(),
"attention_lstm_fuse_pass"
);
ASSERT_EQ
(
it
,
passes
.
end
());
}
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
已删除
100644 → 0
浏览文件 @
0b962680
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/operators/tensorrt_engine_op.h"
namespace
paddle
{
using
inference
::
analysis
::
Argument
;
using
inference
::
Singleton
;
using
inference
::
analysis
::
Analyzer
;
using
framework
::
proto
::
ProgramDesc
;
using
paddle
::
contrib
::
MixedRTConfig
;
class
TensorRTSubgraphPredictor
:
public
NativePaddlePredictor
{
public:
explicit
TensorRTSubgraphPredictor
(
const
MixedRTConfig
&
config
)
:
NativePaddlePredictor
(
config
),
config_
(
config
)
{}
bool
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>&
parent_scope
)
{
FLAGS_IA_enable_tensorrt_subgraph_engine
=
true
;
VLOG
(
30
)
<<
"Predictor::init()"
;
if
(
config_
.
use_gpu
)
{
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
}
else
{
place_
=
paddle
::
platform
::
CPUPlace
();
}
if
(
parent_scope
)
{
scope_
=
parent_scope
;
sub_scope_
=
&
(
parent_scope
->
NewScope
());
}
else
{
paddle
::
framework
::
InitDevices
(
false
);
scope_
.
reset
(
new
paddle
::
framework
::
Scope
());
}
executor_
.
reset
(
new
paddle
::
framework
::
Executor
(
place_
));
// Initialize the inference program
if
(
!
config_
.
model_dir
.
empty
())
{
// Parameters are saved in separate files sited in
// the specified `dirname`.
inference_program_
=
paddle
::
inference
::
Load
(
executor_
.
get
(),
scope_
.
get
(),
config_
.
model_dir
);
}
else
if
(
!
config_
.
prog_file
.
empty
()
&&
!
config_
.
param_file
.
empty
())
{
// All parameters are saved in a single file.
// The file names should be consistent with that used
// in Python API `fluid.io.save_inference_model`.
inference_program_
=
paddle
::
inference
::
Load
(
executor_
.
get
(),
scope_
.
get
(),
config_
.
prog_file
,
config_
.
param_file
);
}
else
{
LOG
(
ERROR
)
<<
"fail to load inference model."
;
return
false
;
}
OptimizeInferenceProgram
();
ctx_
=
executor_
->
Prepare
(
*
inference_program_
,
0
);
VLOG
(
50
)
<<
"to create variables"
;
executor_
->
CreateVariables
(
*
inference_program_
,
sub_scope_
?
sub_scope_
:
scope_
.
get
(),
0
);
// Get the feed_target_names and fetch_target_names
PrepareFeedFetch
();
return
true
;
}
bool
Run
(
const
std
::
vector
<
PaddleTensor
>&
inputs
,
std
::
vector
<
PaddleTensor
>*
output_data
,
int
batch_size
=
-
1
)
override
{
PADDLE_ENFORCE_GT
(
batch_size
,
0
,
"TensorRT engine needs the argument batch_size set"
);
FLAGS_tensorrt_engine_batch_size
=
batch_size
;
return
NativePaddlePredictor
::
Run
(
inputs
,
output_data
,
batch_size
);
}
void
OptimizeInferenceProgram
()
{
// Analyze inference_program
Argument
argument
;
argument
.
Set
<
int
>
(
"minimum_subgraph_size"
,
new
int
(
config_
.
minimum_subgraph_size
));
argument
.
Set
<
int
>
(
"max_batch_size"
,
new
int
(
config_
.
max_batch_size
));
argument
.
Set
<
int
>
(
"workspace_size"
,
new
int
(
config_
.
workspace_size
));
argument
.
Set
<
std
::
string
>
(
"precision_mode"
,
new
std
::
string
(
config_
.
precision_mode
));
if
(
!
config_
.
model_dir
.
empty
())
{
argument
.
fluid_model_dir
.
reset
(
new
std
::
string
(
config_
.
model_dir
));
}
else
{
PADDLE_ENFORCE
(
!
config_
.
param_file
.
empty
(),
"Either model_dir or (param_file, prog_file) should be set."
);
PADDLE_ENFORCE
(
!
config_
.
prog_file
.
empty
());
argument
.
fluid_model_program_path
.
reset
(
new
std
::
string
(
config_
.
prog_file
));
argument
.
fluid_model_param_path
.
reset
(
new
std
::
string
(
config_
.
param_file
));
}
argument
.
origin_program_desc
.
reset
(
new
ProgramDesc
(
*
inference_program_
->
Proto
()));
Singleton
<
Analyzer
>::
Global
().
Run
(
&
argument
);
CHECK
(
argument
.
transformed_program_desc
);
VLOG
(
50
)
<<
"transformed program:
\n
"
<<
argument
.
transformed_program_desc
->
SerializeAsString
();
VLOG
(
50
)
<<
"to prepare executor"
;
inference_program_
.
reset
(
new
framework
::
ProgramDesc
(
*
argument
.
transformed_program_desc
));
}
private:
MixedRTConfig
config_
;
};
template
<
>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
MixedRTConfig
,
PaddleEngineKind
::
kAutoMixedTensorRT
>
(
const
MixedRTConfig
&
config
)
{
VLOG
(
30
)
<<
"create TensorRTSubgraphPredictor"
;
if
(
config
.
use_gpu
)
{
// 1. GPU memeroy
PADDLE_ENFORCE_GT
(
config
.
fraction_of_gpu_memory
,
0.
f
,
"fraction_of_gpu_memory in the config should be set to range (0., 1.]"
);
PADDLE_ENFORCE_GE
(
config
.
device
,
0
,
"Invalid device id %d"
,
config
.
device
);
std
::
vector
<
std
::
string
>
flags
;
if
(
config
.
fraction_of_gpu_memory
>=
0.0
f
||
config
.
fraction_of_gpu_memory
<=
0.95
f
)
{
flags
.
push_back
(
"dummpy"
);
std
::
string
flag
=
"--fraction_of_gpu_memory_to_use="
+
std
::
to_string
(
config
.
fraction_of_gpu_memory
);
flags
.
push_back
(
flag
);
VLOG
(
30
)
<<
"set flag: "
<<
flag
;
framework
::
InitGflags
(
flags
);
}
}
std
::
unique_ptr
<
PaddlePredictor
>
predictor
(
new
TensorRTSubgraphPredictor
(
config
));
if
(
!
dynamic_cast
<
TensorRTSubgraphPredictor
*>
(
predictor
.
get
())
->
Init
(
nullptr
))
{
return
nullptr
;
}
return
std
::
move
(
predictor
);
}
template
<
>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
<
MixedRTConfig
>
(
const
MixedRTConfig
&
config
)
{
return
CreatePaddlePredictor
<
MixedRTConfig
,
PaddleEngineKind
::
kAutoMixedTensorRT
>
(
config
);
}
}
// namespace paddle
USE_TRT_CONVERTER
(
elementwise_add_weight
);
USE_TRT_CONVERTER
(
elementwise_add_tensor
);
USE_TRT_CONVERTER
(
elementwise_sub_tensor
);
USE_TRT_CONVERTER
(
elementwise_div_tensor
);
USE_TRT_CONVERTER
(
elementwise_mul_tensor
);
USE_TRT_CONVERTER
(
elementwise_max_tensor
);
USE_TRT_CONVERTER
(
elementwise_min_tensor
);
USE_TRT_CONVERTER
(
elementwise_pow_tensor
);
USE_TRT_CONVERTER
(
mul
);
USE_TRT_CONVERTER
(
conv2d
);
USE_TRT_CONVERTER
(
relu
);
USE_TRT_CONVERTER
(
sigmoid
);
USE_TRT_CONVERTER
(
tanh
);
USE_TRT_CONVERTER
(
fc
);
USE_TRT_CONVERTER
(
pool2d
);
USE_TRT_CONVERTER
(
softmax
);
USE_TRT_CONVERTER
(
batch_norm
);
USE_TRT_CONVERTER
(
concat
);
USE_TRT_CONVERTER
(
dropout
);
USE_TRT_CONVERTER
(
pad
);
USE_TRT_CONVERTER
(
split
);
paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc
已删除
100644 → 0
浏览文件 @
0b962680
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
namespace
paddle
{
using
contrib
::
MixedRTConfig
;
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
void
CompareTensorRTWithFluid
(
bool
enable_tensorrt
)
{
FLAGS_IA_enable_tensorrt_subgraph_engine
=
enable_tensorrt
;
//# 1. Create PaddlePredictor with a config.
NativeConfig
config0
;
config0
.
model_dir
=
FLAGS_dirname
;
config0
.
use_gpu
=
true
;
config0
.
fraction_of_gpu_memory
=
0.3
;
config0
.
device
=
0
;
MixedRTConfig
config1
;
config1
.
model_dir
=
FLAGS_dirname
;
config1
.
use_gpu
=
true
;
config1
.
fraction_of_gpu_memory
=
0.3
;
config1
.
device
=
0
;
config1
.
max_batch_size
=
10
;
auto
predictor0
=
CreatePaddlePredictor
<
NativeConfig
>
(
config0
);
auto
predictor1
=
CreatePaddlePredictor
<
MixedRTConfig
>
(
config1
);
for
(
int
batch_id
=
0
;
batch_id
<
1
;
batch_id
++
)
{
//# 2. Prepare input.
std
::
vector
<
int64_t
>
data
(
20
);
for
(
int
i
=
0
;
i
<
20
;
i
++
)
data
[
i
]
=
i
;
PaddleTensor
tensor
;
tensor
.
shape
=
std
::
vector
<
int
>
({
10
,
1
});
tensor
.
data
=
PaddleBuf
(
data
.
data
(),
data
.
size
()
*
sizeof
(
int64_t
));
tensor
.
dtype
=
PaddleDType
::
INT64
;
// For simplicity, we set all the slots with the same data.
std
::
vector
<
PaddleTensor
>
slots
(
4
,
tensor
);
//# 3. Run
std
::
vector
<
PaddleTensor
>
outputs0
;
std
::
vector
<
PaddleTensor
>
outputs1
;
CHECK
(
predictor0
->
Run
(
slots
,
&
outputs0
));
CHECK
(
predictor1
->
Run
(
slots
,
&
outputs1
,
10
));
//# 4. Get output.
ASSERT_EQ
(
outputs0
.
size
(),
1UL
);
ASSERT_EQ
(
outputs1
.
size
(),
1UL
);
const
size_t
num_elements
=
outputs0
.
front
().
data
.
length
()
/
sizeof
(
float
);
const
size_t
num_elements1
=
outputs1
.
front
().
data
.
length
()
/
sizeof
(
float
);
EXPECT_EQ
(
num_elements
,
num_elements1
);
auto
*
data0
=
static_cast
<
float
*>
(
outputs0
.
front
().
data
.
data
());
auto
*
data1
=
static_cast
<
float
*>
(
outputs1
.
front
().
data
.
data
());
ASSERT_GT
(
num_elements
,
0UL
);
for
(
size_t
i
=
0
;
i
<
std
::
min
(
num_elements
,
num_elements1
);
i
++
)
{
EXPECT_NEAR
(
data0
[
i
],
data1
[
i
],
1e-3
);
}
}
}
TEST
(
paddle_inference_api_tensorrt_subgraph_engine
,
without_tensorrt
)
{
CompareTensorRTWithFluid
(
false
);
}
TEST
(
paddle_inference_api_tensorrt_subgraph_engine
,
with_tensorrt
)
{
CompareTensorRTWithFluid
(
true
);
}
}
// namespace paddle
paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc
浏览文件 @
ddb12035
...
@@ -23,7 +23,7 @@ limitations under the License. */
...
@@ -23,7 +23,7 @@ limitations under the License. */
#include <memory>
#include <memory>
#include <thread> //NOLINT
#include <thread> //NOLINT
#include "
paddle/include/paddle_inference_api
.h"
#include "
utils
.h"
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
DEFINE_bool
(
use_gpu
,
false
,
"Whether use gpu."
);
DEFINE_bool
(
use_gpu
,
false
,
"Whether use gpu."
);
...
...
paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
浏览文件 @
ddb12035
...
@@ -36,14 +36,13 @@ namespace demo {
...
@@ -36,14 +36,13 @@ namespace demo {
*/
*/
void
Main
()
{
void
Main
()
{
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
paddle
::
contrib
::
MixedRTConfig
config
;
paddle
::
contrib
::
AnalysisConfig
config
(
true
)
;
config
.
param_file
=
FLAGS_modeldir
+
"/__params__"
;
config
.
param_file
=
FLAGS_modeldir
+
"/__params__"
;
config
.
prog_file
=
FLAGS_modeldir
+
"/__model__"
;
config
.
prog_file
=
FLAGS_modeldir
+
"/__model__"
;
config
.
use_gpu
=
true
;
config
.
device
=
0
;
config
.
device
=
0
;
config
.
max_batch_size
=
1
;
config
.
EnableTensorRtEngine
()
;
config
.
fraction_of_gpu_memory
=
0.1
;
// set by yourself
config
.
fraction_of_gpu_memory
=
0.1
;
// set by yourself
predictor
=
CreatePaddlePredictor
<
paddle
::
contrib
::
MixedRTConfig
>
(
config
);
predictor
=
CreatePaddlePredictor
(
config
);
VLOG
(
30
)
<<
"begin to process data"
;
VLOG
(
30
)
<<
"begin to process data"
;
// Just a single batch of data.
// Just a single batch of data.
...
...
paddle/fluid/inference/api/demo_ci/vis_demo.cc
浏览文件 @
ddb12035
...
@@ -17,7 +17,7 @@ limitations under the License. */
...
@@ -17,7 +17,7 @@ limitations under the License. */
*/
*/
#include <gflags/gflags.h>
#include <gflags/gflags.h>
#include <glog/logging.h>
// use glog instead of CHECK to avoid importing other paddle header files.
#include <glog/logging.h>
#include "utils.h" // NOLINT
#include "utils.h" // NOLINT
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
...
@@ -40,20 +40,17 @@ using contrib::AnalysisConfig;
...
@@ -40,20 +40,17 @@ using contrib::AnalysisConfig;
*/
*/
void
Main
(
bool
use_gpu
)
{
void
Main
(
bool
use_gpu
)
{
std
::
unique_ptr
<
PaddlePredictor
>
predictor
,
analysis_predictor
;
std
::
unique_ptr
<
PaddlePredictor
>
predictor
,
analysis_predictor
;
AnalysisConfig
config
;
AnalysisConfig
config
(
use_gpu
)
;
config
.
param_file
=
FLAGS_modeldir
+
"/__params__"
;
config
.
param_file
=
FLAGS_modeldir
+
"/__params__"
;
config
.
prog_file
=
FLAGS_modeldir
+
"/__model__"
;
config
.
prog_file
=
FLAGS_modeldir
+
"/__model__"
;
config
.
use_gpu
=
use_gpu
;
config
.
device
=
0
;
config
.
device
=
0
;
if
(
FLAGS_use_gpu
)
{
if
(
FLAGS_use_gpu
)
{
config
.
fraction_of_gpu_memory
=
0.1
;
// set by yourself
config
.
fraction_of_gpu_memory
=
0.1
;
// set by yourself
}
}
VLOG
(
30
)
<<
"init predictor"
;
predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
analysis_predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
analysis_predictor
=
CreatePaddlePredictor
(
config
);
VLOG
(
30
)
<<
"begin to process data"
;
// Just a single batch of data.
// Just a single batch of data.
std
::
string
line
;
std
::
string
line
;
std
::
ifstream
file
(
FLAGS_data
);
std
::
ifstream
file
(
FLAGS_data
);
...
@@ -68,13 +65,10 @@ void Main(bool use_gpu) {
...
@@ -68,13 +65,10 @@ void Main(bool use_gpu) {
PaddleBuf
(
record
.
data
.
data
(),
record
.
data
.
size
()
*
sizeof
(
float
));
PaddleBuf
(
record
.
data
.
data
(),
record
.
data
.
size
()
*
sizeof
(
float
));
input
.
dtype
=
PaddleDType
::
FLOAT32
;
input
.
dtype
=
PaddleDType
::
FLOAT32
;
VLOG
(
30
)
<<
"run executor"
;
std
::
vector
<
PaddleTensor
>
output
,
analysis_output
;
std
::
vector
<
PaddleTensor
>
output
,
analysis_output
;
predictor
->
Run
({
input
},
&
output
,
1
);
predictor
->
Run
({
input
},
&
output
,
1
);
VLOG
(
30
)
<<
"output.size "
<<
output
.
size
();
auto
&
tensor
=
output
.
front
();
auto
&
tensor
=
output
.
front
();
VLOG
(
30
)
<<
"output: "
<<
SummaryTensor
(
tensor
);
// compare with reference result
// compare with reference result
CheckOutput
(
FLAGS_refer
,
tensor
);
CheckOutput
(
FLAGS_refer
,
tensor
);
...
...
paddle/fluid/inference/api/details/zero_copy_tensor.cc
浏览文件 @
ddb12035
...
@@ -51,7 +51,7 @@ T *ZeroCopyTensor::mutable_data(PaddlePlace place) {
...
@@ -51,7 +51,7 @@ T *ZeroCopyTensor::mutable_data(PaddlePlace place) {
}
}
template
<
typename
T
>
template
<
typename
T
>
T
*
ZeroCopyTensor
::
data
(
PaddlePlace
*
place
,
int
*
size
)
{
T
*
ZeroCopyTensor
::
data
(
PaddlePlace
*
place
,
int
*
size
)
const
{
auto
*
tensor
=
static_cast
<
framework
::
LoDTensor
*>
(
FindTensor
());
auto
*
tensor
=
static_cast
<
framework
::
LoDTensor
*>
(
FindTensor
());
auto
*
res
=
tensor
->
data
<
T
>
();
auto
*
res
=
tensor
->
data
<
T
>
();
...
@@ -67,8 +67,10 @@ T *ZeroCopyTensor::data(PaddlePlace *place, int *size) {
...
@@ -67,8 +67,10 @@ T *ZeroCopyTensor::data(PaddlePlace *place, int *size) {
return
res
;
return
res
;
}
}
template
float
*
ZeroCopyTensor
::
data
<
float
>(
PaddlePlace
*
place
,
int
*
size
);
template
float
*
ZeroCopyTensor
::
data
<
float
>(
PaddlePlace
*
place
,
template
int64_t
*
ZeroCopyTensor
::
data
<
int64_t
>(
PaddlePlace
*
place
,
int
*
size
);
int
*
size
)
const
;
template
int64_t
*
ZeroCopyTensor
::
data
<
int64_t
>(
PaddlePlace
*
place
,
int
*
size
)
const
;
template
float
*
ZeroCopyTensor
::
mutable_data
<
float
>(
PaddlePlace
place
);
template
float
*
ZeroCopyTensor
::
mutable_data
<
float
>(
PaddlePlace
place
);
template
int64_t
*
ZeroCopyTensor
::
mutable_data
<
int64_t
>(
PaddlePlace
place
);
template
int64_t
*
ZeroCopyTensor
::
mutable_data
<
int64_t
>(
PaddlePlace
place
);
...
@@ -84,7 +86,7 @@ void *ZeroCopyTensor::FindTensor() const {
...
@@ -84,7 +86,7 @@ void *ZeroCopyTensor::FindTensor() const {
return
tensor
;
return
tensor
;
}
}
std
::
vector
<
int64_t
>
ZeroCopyTensor
::
shape
()
{
std
::
vector
<
int64_t
>
ZeroCopyTensor
::
shape
()
const
{
auto
*
tensor
=
static_cast
<
framework
::
LoDTensor
*>
(
FindTensor
());
auto
*
tensor
=
static_cast
<
framework
::
LoDTensor
*>
(
FindTensor
());
PADDLE_ENFORCE
(
tensor
,
"not found tensor called %s in the scope"
,
name_
);
PADDLE_ENFORCE
(
tensor
,
"not found tensor called %s in the scope"
,
name_
);
return
framework
::
vectorize
(
tensor
->
dims
());
return
framework
::
vectorize
(
tensor
->
dims
());
...
...
paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc
浏览文件 @
ddb12035
...
@@ -24,18 +24,20 @@ T *ZeroCopyTensor::mutable_data(PaddlePlace place) {
...
@@ -24,18 +24,20 @@ T *ZeroCopyTensor::mutable_data(PaddlePlace place) {
}
}
template
<
typename
T
>
template
<
typename
T
>
T
*
ZeroCopyTensor
::
data
(
PaddlePlace
*
place
,
int
*
size
)
{
T
*
ZeroCopyTensor
::
data
(
PaddlePlace
*
place
,
int
*
size
)
const
{
return
nullptr
;
return
nullptr
;
}
}
template
float
*
ZeroCopyTensor
::
data
<
float
>(
PaddlePlace
*
place
,
int
*
size
);
template
float
*
ZeroCopyTensor
::
data
<
float
>(
PaddlePlace
*
place
,
template
int64_t
*
ZeroCopyTensor
::
data
<
int64_t
>(
PaddlePlace
*
place
,
int
*
size
);
int
*
size
)
const
;
template
int64_t
*
ZeroCopyTensor
::
data
<
int64_t
>(
PaddlePlace
*
place
,
int
*
size
)
const
;
template
float
*
ZeroCopyTensor
::
mutable_data
(
PaddlePlace
place
);
template
float
*
ZeroCopyTensor
::
mutable_data
(
PaddlePlace
place
);
template
int64_t
*
ZeroCopyTensor
::
mutable_data
(
PaddlePlace
place
);
template
int64_t
*
ZeroCopyTensor
::
mutable_data
(
PaddlePlace
place
);
void
*
ZeroCopyTensor
::
FindTensor
()
const
{
return
nullptr
;
}
void
*
ZeroCopyTensor
::
FindTensor
()
const
{
return
nullptr
;
}
std
::
vector
<
int64_t
>
ZeroCopyTensor
::
shape
()
{
return
{};
}
std
::
vector
<
int64_t
>
ZeroCopyTensor
::
shape
()
const
{
return
{};
}
void
ZeroCopyTensor
::
SetLoD
(
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
x
)
{}
void
ZeroCopyTensor
::
SetLoD
(
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
x
)
{}
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
ddb12035
...
@@ -125,6 +125,51 @@ static int ZeroCopyTensorAssignData(ZeroCopyTensor *tensor,
...
@@ -125,6 +125,51 @@ static int ZeroCopyTensorAssignData(ZeroCopyTensor *tensor,
return
size
;
return
size
;
}
}
static
bool
CompareTensor
(
const
PaddleTensor
&
a
,
const
PaddleTensor
&
b
)
{
if
(
a
.
dtype
!=
b
.
dtype
)
{
LOG
(
ERROR
)
<<
"dtype not match"
;
return
false
;
}
if
(
a
.
lod
.
size
()
!=
b
.
lod
.
size
())
{
LOG
(
ERROR
)
<<
"lod not match"
;
return
false
;
}
for
(
size_t
i
=
0
;
i
<
a
.
lod
.
size
();
i
++
)
{
if
(
a
.
lod
[
i
].
size
()
!=
b
.
lod
[
i
].
size
())
{
LOG
(
ERROR
)
<<
"lod not match"
;
return
false
;
}
for
(
size_t
j
=
0
;
j
<
a
.
lod
[
i
].
size
();
j
++
)
{
if
(
a
.
lod
[
i
][
j
]
!=
b
.
lod
[
i
][
j
])
{
LOG
(
ERROR
)
<<
"lod not match"
;
return
false
;
}
}
}
if
(
a
.
shape
.
size
()
!=
b
.
shape
.
size
())
{
LOG
(
INFO
)
<<
"shape not match"
;
return
false
;
}
for
(
size_t
i
=
0
;
i
<
a
.
shape
.
size
();
i
++
)
{
if
(
a
.
shape
[
i
]
!=
b
.
shape
[
i
])
{
LOG
(
ERROR
)
<<
"shape not match"
;
return
false
;
}
}
auto
*
adata
=
static_cast
<
float
*>
(
a
.
data
.
data
());
auto
*
bdata
=
static_cast
<
float
*>
(
b
.
data
.
data
());
for
(
int
i
=
0
;
i
<
VecReduceToInt
(
a
.
shape
);
i
++
)
{
if
(
adata
[
i
]
!=
bdata
[
i
])
{
LOG
(
ERROR
)
<<
"data not match"
;
return
false
;
}
}
return
true
;
}
static
std
::
string
DescribeTensor
(
const
PaddleTensor
&
tensor
)
{
static
std
::
string
DescribeTensor
(
const
PaddleTensor
&
tensor
)
{
std
::
stringstream
os
;
std
::
stringstream
os
;
os
<<
"Tensor ["
<<
tensor
.
name
<<
"]
\n
"
;
os
<<
"Tensor ["
<<
tensor
.
name
<<
"]
\n
"
;
...
@@ -157,6 +202,26 @@ static std::string DescribeTensor(const PaddleTensor &tensor) {
...
@@ -157,6 +202,26 @@ static std::string DescribeTensor(const PaddleTensor &tensor) {
return
os
.
str
();
return
os
.
str
();
}
}
static
std
::
string
DescribeZeroCopyTensor
(
const
ZeroCopyTensor
&
tensor
)
{
std
::
stringstream
os
;
os
<<
"Tensor ["
<<
tensor
.
name
()
<<
"]
\n
"
;
os
<<
" - shape: "
<<
to_string
(
tensor
.
shape
())
<<
'\n'
;
os
<<
" - lod: "
;
for
(
auto
&
l
:
tensor
.
lod
())
{
os
<<
to_string
(
l
)
<<
"; "
;
}
os
<<
"
\n
"
;
os
<<
" - data: "
;
PaddlePlace
place
;
int
size
;
const
auto
*
data
=
tensor
.
data
<
float
>
(
&
place
,
&
size
);
for
(
int
i
=
0
;
i
<
size
;
i
++
)
{
os
<<
data
[
i
]
<<
" "
;
}
return
os
.
str
();
}
static
void
PrintTime
(
int
batch_size
,
int
repeat
,
int
num_threads
,
int
tid
,
static
void
PrintTime
(
int
batch_size
,
int
repeat
,
int
num_threads
,
int
tid
,
double
latency
,
int
epoch
=
1
)
{
double
latency
,
int
epoch
=
1
)
{
LOG
(
INFO
)
<<
"====== batch_size: "
<<
batch_size
<<
", repeat: "
<<
repeat
LOG
(
INFO
)
<<
"====== batch_size: "
<<
batch_size
<<
", repeat: "
<<
repeat
...
...
paddle/fluid/inference/a
nalysis/analyzer_main.cc
→
paddle/fluid/inference/a
pi/paddle_anakin_config.h
浏览文件 @
ddb12035
...
@@ -11,23 +11,25 @@
...
@@ -11,23 +11,25 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#pragma once
/*
#include <cassert>
* This file implements analysizer -- an executation help to analyze and
#include <memory>
* optimize trained model.
#include <string>
*/
#include <vector>
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <gflags/gflags.h>
#include <glog/logging.h>
int
main
(
int
argc
,
char
**
argv
)
{
#include "paddle_api.h" // NOLINT
google
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
true
);
using
paddle
::
inference
::
analysis
::
Analyzer
;
using
paddle
::
inference
::
analysis
::
Argument
;
Argument
argument
;
namespace
paddle
{
Analyzer
analyzer
;
namespace
contrib
{
analyzer
.
Run
(
&
argument
);
// Configurations for Anakin engine.
struct
AnakinConfig
:
public
PaddlePredictor
::
Config
{
enum
TargetType
{
NVGPU
=
0
,
X86
};
int
device
;
std
::
string
model_file
;
int
max_batch_size
{
-
1
};
TargetType
target_type
;
};
return
0
;
}
// namespace contrib
}
}
// namespace paddle
paddle/fluid/inference/api/paddle_analysis_config.h
0 → 100644
浏览文件 @
ddb12035
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cassert>
#include <memory>
#include <string>
#include <vector>
// Here we include some header files with relative paths, for that in deploy,
// the abstract path of this header file will be changed.
#include "paddle_api.h" // NOLINT
#include "paddle_pass_builder.h" // NOLINT
namespace
paddle
{
class
AnalysisPredictor
;
// ==
//
// -----------------------------------------------------------------------------------
// NOTE: The following APIs are not mature yet, we are still working on them.
namespace
contrib
{
// NOTE WIP, not stable yet.
struct
AnalysisConfig
:
public
NativeConfig
{
explicit
AnalysisConfig
(
bool
use_gpu
=
false
);
explicit
AnalysisConfig
(
const
AnalysisConfig
&
other
);
explicit
AnalysisConfig
(
AnalysisConfig
&&
other
);
// Determine whether to perform graph optimization.
bool
enable_ir_optim
=
true
;
// Get a pass builder for customize the passes in IR analysis phase.
PassStrategy
*
pass_builder
()
const
;
// NOT stable yet.
bool
use_feed_fetch_ops
{
true
};
void
EnableTensorRtEngine
(
int
workspace_size
=
1
<<
20
,
int
max_batch_size
=
1
);
// NOTE this is just for internal development, please not use it.
// NOT stable yet.
void
EnableMKLDNN
();
bool
use_mkldnn
()
const
{
return
use_mkldnn_
;
}
friend
class
::
paddle
::
AnalysisPredictor
;
protected:
bool
use_tensorrt_
{
false
};
bool
use_mkldnn_
{
false
};
int
tensorrt_workspace_size_
;
int
tensorrt_max_batchsize_
;
std
::
unique_ptr
<
PassStrategy
>
pass_builder_
;
};
// Configurations for Anakin engine.
struct
AnakinConfig
:
public
PaddlePredictor
::
Config
{
enum
TargetType
{
NVGPU
=
0
,
X86
};
int
device
;
std
::
string
model_file
;
int
max_batch_size
{
-
1
};
TargetType
target_type
;
};
}
// namespace contrib
}
// namespace paddle
paddle/fluid/inference/api/paddle_api.h
0 → 100644
浏览文件 @
ddb12035
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cassert>
#include <memory>
#include <string>
#include <vector>
namespace
paddle
{
// Data type.
enum
PaddleDType
{
FLOAT32
,
INT64
,
// TODO(Superjomn) support more data types if needed.
};
/*
* Memory menage for PaddleTensor.
* The PaddleBuf holds a buffer for data input or output. The memory can be
* allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf
* should be reused for better performance.
*
* For user allocated memory, the following API can be used:
* - PaddleBuf(void* data, size_t length) to set an external memory by
* specifying
* the memory address and length.
* - Reset(void* data, size_t length) to reset the PaddleBuf with an external
* memory.
* ATTENTION, for user allocated memory, deallocation should be done by users
* externally after the program finished. The PaddleBuf won't do any allocation
* or deallocation.
*
* To have the PaddleBuf allocate and manage the memory:
* - PaddleBuf(size_t length) will allocate a memory of size `length`.
* - Resize(size_t length) resize the memory to no less than `length`, ATTENTION
* if the allocated memory is larger than `length`, nothing will done.
*/
class
PaddleBuf
{
public:
// PaddleBuf allocate memory internally, and manage it.
explicit
PaddleBuf
(
size_t
length
)
:
data_
(
new
char
[
length
]),
length_
(
length
),
memory_owned_
(
true
)
{}
// Set external memory, the PaddleBuf won't manage it.
PaddleBuf
(
void
*
data
,
size_t
length
)
:
data_
(
data
),
length_
(
length
),
memory_owned_
{
false
}
{}
// Copy only available when memory is managed externally.
explicit
PaddleBuf
(
const
PaddleBuf
&
);
// Resize the memory.
void
Resize
(
size_t
length
);
// Reset to external memory, with address and length set.
void
Reset
(
void
*
data
,
size_t
length
);
// Tell whether the buffer is empty.
bool
empty
()
const
{
return
length_
==
0
;
}
// Get the memory address.
void
*
data
()
const
{
return
data_
;
}
// Get the memory length.
size_t
length
()
const
{
return
length_
;
}
~
PaddleBuf
()
{
Free
();
}
PaddleBuf
&
operator
=
(
const
PaddleBuf
&
);
PaddleBuf
&
operator
=
(
PaddleBuf
&&
);
PaddleBuf
()
=
default
;
PaddleBuf
(
PaddleBuf
&&
other
);
private:
void
Free
();
void
*
data_
{
nullptr
};
// pointer to the data memory.
size_t
length_
{
0
};
// number of memory bytes.
bool
memory_owned_
{
true
};
};
// Basic input and output data structure for PaddlePredictor.
struct
PaddleTensor
{
PaddleTensor
()
=
default
;
std
::
string
name
;
// variable name.
std
::
vector
<
int
>
shape
;
PaddleBuf
data
;
// blob of data.
PaddleDType
dtype
;
std
::
vector
<
std
::
vector
<
size_t
>>
lod
;
// Tensor+LoD equals LoDTensor
};
enum
class
PaddlePlace
{
kUNK
=
-
1
,
kCPU
,
kGPU
};
// Tensor without copy, currently only supports AnalysisPredictor.
class
ZeroCopyTensor
{
public:
void
Reshape
(
const
std
::
vector
<
int
>&
shape
);
// Get the memory in CPU or GPU with specific data type, should Reshape first
// to tell the data size.
// Once can directly call this data to feed the data.
// This is for write the input tensor.
template
<
typename
T
>
T
*
mutable_data
(
PaddlePlace
place
);
// Get the memory directly, will return the place and memory size by pointer.
// This is for reading the output tensor.
template
<
typename
T
>
T
*
data
(
PaddlePlace
*
place
,
int
*
size
)
const
;
std
::
vector
<
int64_t
>
shape
()
const
;
void
SetLoD
(
const
std
::
vector
<
std
::
vector
<
size_t
>>&
x
);
std
::
vector
<
std
::
vector
<
size_t
>>
lod
()
const
;
const
std
::
string
&
name
()
const
{
return
name_
;
}
protected:
explicit
ZeroCopyTensor
(
void
*
scope
)
:
scope_
{
scope
}
{}
void
SetName
(
const
std
::
string
&
name
)
{
name_
=
name
;
}
void
*
FindTensor
()
const
;
private:
std
::
string
name_
;
bool
input_or_output_
;
friend
class
AnalysisPredictor
;
void
*
scope_
{
nullptr
};
};
/*
* A simple Inference API for Paddle.
*/
class
PaddlePredictor
{
public:
struct
Config
;
PaddlePredictor
()
=
default
;
PaddlePredictor
(
const
PaddlePredictor
&
)
=
delete
;
PaddlePredictor
&
operator
=
(
const
PaddlePredictor
&
)
=
delete
;
// Predict an record.
// The caller should be responsible for allocating and releasing the memory of
// `inputs`. `inputs` should be available until Run returns. Caller should be
// responsible for the output tensor's buffer, either allocated or passed from
// outside.
virtual
bool
Run
(
const
std
::
vector
<
PaddleTensor
>&
inputs
,
std
::
vector
<
PaddleTensor
>*
output_data
,
int
batch_size
=
-
1
)
=
0
;
// Zero copy input and output optimization.
// Get the input or output tensors, and operate on their memory directly,
// without copy.
virtual
std
::
unique_ptr
<
ZeroCopyTensor
>
GetInputTensor
(
const
std
::
string
&
name
)
{
return
nullptr
;
}
virtual
std
::
unique_ptr
<
ZeroCopyTensor
>
GetOutputTensor
(
const
std
::
string
&
name
)
{
return
nullptr
;
}
virtual
bool
ZeroCopyRun
()
{
return
false
;
}
// Clone a predictor that share the model weights, the Cloned predictor should
// be thread-safe.
virtual
std
::
unique_ptr
<
PaddlePredictor
>
Clone
()
=
0
;
// Destroy the Predictor.
virtual
~
PaddlePredictor
()
=
default
;
// The common configs for all the predictors.
struct
Config
{
std
::
string
model_dir
;
// path to the model directory.
};
};
struct
NativeConfig
:
public
PaddlePredictor
::
Config
{
// GPU related fields.
bool
use_gpu
{
false
};
int
device
{
0
};
float
fraction_of_gpu_memory
{
-
1.
f
};
// Change to a float in (0,1] if needed.
// Specify the exact path of program and parameter files.
std
::
string
prog_file
;
std
::
string
param_file
;
// Specify the variable's name of each input if input tensors don't follow the
// `feeds` and `fetches` of the phase `save_inference_model`.
bool
specify_input_name
{
false
};
};
// A factory to help create different predictors.
//
// Usage:
//
// NativeConfig config;
// ... // change the configs.
// auto native_predictor = CreatePaddlePredictor(config);
//
// FOR EXTENSION DEVELOPER:
// Different predictors are designated by config type. Similar configs can be
// merged, but there shouldn't be a huge config containing different fields for
// more than one kind of predictors.
template
<
typename
ConfigT
>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
(
const
ConfigT
&
config
);
// NOTE The following APIs are too trivial, we will discard it in the following
// versions.
enum
class
PaddleEngineKind
{
kNative
=
0
,
// Use the native Fluid facility.
kAutoMixedTensorRT
,
// Automatically mix Fluid with TensorRT.
kAnalysis
,
// More optimization.
kAnakin
// Use Anakin for inference, not mature yet.
};
template
<
typename
ConfigT
,
PaddleEngineKind
engine
>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
(
const
ConfigT
&
config
);
int
PaddleDtypeSize
(
PaddleDType
dtype
);
}
// namespace paddle
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
ddb12035
...
@@ -26,265 +26,9 @@ limitations under the License. */
...
@@ -26,265 +26,9 @@ limitations under the License. */
#include <string>
#include <string>
#include <vector>
#include <vector>
namespace
paddle
{
#include "paddle_api.h" // NOLINT
#ifndef WITH_ANAKIN
// Data type.
#include "paddle_analysis_config.h" // NOLINT
enum
PaddleDType
{
#else
FLOAT32
,
#include "paddle_anakin_config.h" // NOLINT
INT64
,
#endif
// TODO(Superjomn) support more data types if needed.
};
/*
* Memory menage for PaddleTensor.
* The PaddleBuf holds a buffer for data input or output. The memory can be
* allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf
* should be reused for better performance.
*
* For user allocated memory, the following API can be used:
* - PaddleBuf(void* data, size_t length) to set an external memory by
* specifying
* the memory address and length.
* - Reset(void* data, size_t length) to reset the PaddleBuf with an external
* memory.
* ATTENTION, for user allocated memory, deallocation should be done by users
* externally after the program finished. The PaddleBuf won't do any allocation
* or deallocation.
*
* To have the PaddleBuf allocate and manage the memory:
* - PaddleBuf(size_t length) will allocate a memory of size `length`.
* - Resize(size_t length) resize the memory to no less than `length`, ATTENTION
* if the allocated memory is larger than `length`, nothing will done.
*/
class
PaddleBuf
{
public:
// PaddleBuf allocate memory internally, and manage it.
explicit
PaddleBuf
(
size_t
length
)
:
data_
(
new
char
[
length
]),
length_
(
length
),
memory_owned_
(
true
)
{}
// Set external memory, the PaddleBuf won't manage it.
PaddleBuf
(
void
*
data
,
size_t
length
)
:
data_
(
data
),
length_
(
length
),
memory_owned_
{
false
}
{}
// Copy only available when memory is managed externally.
explicit
PaddleBuf
(
const
PaddleBuf
&
);
// Resize the memory.
void
Resize
(
size_t
length
);
// Reset to external memory, with address and length set.
void
Reset
(
void
*
data
,
size_t
length
);
// Tell whether the buffer is empty.
bool
empty
()
const
{
return
length_
==
0
;
}
// Get the memory address.
void
*
data
()
const
{
return
data_
;
}
// Get the memory length.
size_t
length
()
const
{
return
length_
;
}
~
PaddleBuf
()
{
Free
();
}
PaddleBuf
&
operator
=
(
const
PaddleBuf
&
);
PaddleBuf
&
operator
=
(
PaddleBuf
&&
);
PaddleBuf
()
=
default
;
PaddleBuf
(
PaddleBuf
&&
other
);
private:
void
Free
();
void
*
data_
{
nullptr
};
// pointer to the data memory.
size_t
length_
{
0
};
// number of memory bytes.
bool
memory_owned_
{
true
};
};
// Basic input and output data structure for PaddlePredictor.
struct
PaddleTensor
{
PaddleTensor
()
=
default
;
std
::
string
name
;
// variable name.
std
::
vector
<
int
>
shape
;
PaddleBuf
data
;
// blob of data.
PaddleDType
dtype
;
std
::
vector
<
std
::
vector
<
size_t
>>
lod
;
// Tensor+LoD equals LoDTensor
};
enum
class
PaddlePlace
{
kUNK
=
-
1
,
kCPU
,
kGPU
};
// Tensor without copy, currently only supports AnalysisPredictor.
class
ZeroCopyTensor
{
public:
void
Reshape
(
const
std
::
vector
<
int
>&
shape
);
// Get the memory in CPU or GPU with specific data type, should Reshape first
// to tell the data size.
// Once can directly call this data to feed the data.
// This is for write the input tensor.
template
<
typename
T
>
T
*
mutable_data
(
PaddlePlace
place
);
// Get the memory directly, will return the place and memory size by pointer.
// This is for reading the output tensor.
template
<
typename
T
>
T
*
data
(
PaddlePlace
*
place
,
int
*
size
);
std
::
vector
<
int64_t
>
shape
();
void
SetLoD
(
const
std
::
vector
<
std
::
vector
<
size_t
>>&
x
);
std
::
vector
<
std
::
vector
<
size_t
>>
lod
()
const
;
protected:
explicit
ZeroCopyTensor
(
void
*
scope
)
:
scope_
{
scope
}
{}
void
SetName
(
const
std
::
string
&
name
)
{
name_
=
name
;
}
void
*
FindTensor
()
const
;
private:
std
::
string
name_
;
bool
input_or_output_
;
friend
class
AnalysisPredictor
;
void
*
scope_
{
nullptr
};
};
/*
* A simple Inference API for Paddle.
*/
class
PaddlePredictor
{
public:
struct
Config
;
PaddlePredictor
()
=
default
;
PaddlePredictor
(
const
PaddlePredictor
&
)
=
delete
;
PaddlePredictor
&
operator
=
(
const
PaddlePredictor
&
)
=
delete
;
// Predict an record.
// The caller should be responsible for allocating and releasing the memory of
// `inputs`. `inputs` should be available until Run returns. Caller should be
// responsible for the output tensor's buffer, either allocated or passed from
// outside.
virtual
bool
Run
(
const
std
::
vector
<
PaddleTensor
>&
inputs
,
std
::
vector
<
PaddleTensor
>*
output_data
,
int
batch_size
=
-
1
)
=
0
;
// Zero copy input and output optimization.
// Get the input or output tensors, and operate on their memory directly,
// without copy.
virtual
std
::
unique_ptr
<
ZeroCopyTensor
>
GetInputTensor
(
const
std
::
string
&
name
)
{
return
nullptr
;
}
virtual
std
::
unique_ptr
<
ZeroCopyTensor
>
GetOutputTensor
(
const
std
::
string
&
name
)
{
return
nullptr
;
}
virtual
bool
ZeroCopyRun
()
{
return
false
;
}
// Clone a predictor that share the model weights, the Cloned predictor should
// be thread-safe.
virtual
std
::
unique_ptr
<
PaddlePredictor
>
Clone
()
=
0
;
// Destroy the Predictor.
virtual
~
PaddlePredictor
()
=
default
;
// The common configs for all the predictors.
struct
Config
{
std
::
string
model_dir
;
// path to the model directory.
};
};
struct
NativeConfig
:
public
PaddlePredictor
::
Config
{
// GPU related fields.
bool
use_gpu
{
false
};
int
device
{
0
};
float
fraction_of_gpu_memory
{
-
1.
f
};
// Change to a float in (0,1] if needed.
// Specify the exact path of program and parameter files.
std
::
string
prog_file
;
std
::
string
param_file
;
// Specify the variable's name of each input if input tensors don't follow the
// `feeds` and `fetches` of the phase `save_inference_model`.
bool
specify_input_name
{
false
};
};
// A factory to help create different predictors.
//
// Usage:
//
// NativeConfig config;
// ... // change the configs.
// auto native_predictor = CreatePaddlePredictor(config);
//
// FOR EXTENSION DEVELOPER:
// Different predictors are designated by config type. Similar configs can be
// merged, but there shouldn't be a huge config containing different fields for
// more than one kind of predictors.
template
<
typename
ConfigT
>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
(
const
ConfigT
&
config
);
// NOTE The following APIs are too trivial, we will discard it in the following
// versions.
enum
class
PaddleEngineKind
{
kNative
=
0
,
// Use the native Fluid facility.
kAutoMixedTensorRT
,
// Automatically mix Fluid with TensorRT.
kAnalysis
,
// More optimization.
kAnakin
// Use Anakin for inference, not mature yet.
};
template
<
typename
ConfigT
,
PaddleEngineKind
engine
>
std
::
unique_ptr
<
PaddlePredictor
>
CreatePaddlePredictor
(
const
ConfigT
&
config
);
// ==
//
// -----------------------------------------------------------------------------------
// NOTE: The following APIs are not mature yet, we are still working on them.
namespace
contrib
{
// Accelerate GPU computation with TensorRT engine.
struct
MixedRTConfig
:
public
NativeConfig
{
// Determine whether a subgraph will be executed by TRT.
int
min_subgraph_size
{
1
};
// While TensorRT allows an engine optimized for a given max batch size
// to run at any smaller size, the performance for those smaller
// sizes may not be as well-optimized. Therefore, Max batch is best
// equivalent to the runtime batch size.
int
max_batch_size
{
1
};
// For workspace_size, refer it from here:
// https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting
int
workspace_size
{
1
<<
30
};
// We transform the Ops that can be converted into TRT layer in the model,
// and aggregate these Ops into subgraphs for TRT execution.
// We set this variable to control the minimum number of nodes in the
// subgraph, 3 as default value.
int
minimum_subgraph_size
=
3
;
// Reserved configuration
// We just support "FP32" now, "FP16" and "INT8" will be supported.
std
::
string
precision_mode
=
"FP32"
;
};
// NOTE WIP, not stable yet.
struct
AnalysisConfig
:
public
NativeConfig
{
enum
class
IrPassMode
{
kSystem
,
// Use system default passes, not customize.
kInclude
,
// Specify the passes in `ir_passes`.
kExclude
// Specify the disabled passes in `ir_passes`.
};
// Determine whether to perform graph optimization.
bool
enable_ir_optim
=
true
;
// Manually determine the IR passes to run.
IrPassMode
ir_mode
{
IrPassMode
::
kExclude
};
// passes to be excluded/included
std
::
vector
<
std
::
string
>
ir_passes
{
"embedding_fc_lstm_fuse_pass"
};
// NOT stable yet.
bool
use_feed_fetch_ops
{
true
};
// NOTE this is just for internal development, please not use it.
// NOT stable yet.
bool
_use_mkldnn
{
false
};
};
// Configurations for Anakin engine.
struct
AnakinConfig
:
public
PaddlePredictor
::
Config
{
enum
TargetType
{
NVGPU
=
0
,
X86
};
int
device
;
std
::
string
model_file
;
int
max_batch_size
{
-
1
};
TargetType
target_type
;
};
}
// namespace contrib
int
PaddleDtypeSize
(
PaddleDType
dtype
);
}
// namespace paddle
paddle/fluid/inference/a
nalysis/tensorrt_subgraph_node_mark_pass.h
→
paddle/fluid/inference/a
pi/paddle_pass_builder.cc
浏览文件 @
ddb12035
...
@@ -12,49 +12,57 @@
...
@@ -12,49 +12,57 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
/*
#include "paddle/fluid/inference/api/paddle_pass_builder.h"
* This file defines TensorRTSubgraphNodeMarkPass which helps to mark the ops
#include <glog/logging.h>
* that supported by TensorRT engine.
*/
#pragma once
#include <string>
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/inference/analysis/subgraph_splitter.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
/*
void
PaddlePassBuilder
::
AppendPass
(
const
std
::
string
&
pass_type
)
{
* Mark the operators that TensorRT engine supports.
passes_
.
push_back
(
pass_type
);
*/
}
class
TensorRTSubgraphNodeMarkPass
:
public
DataFlowGraphPass
{
public:
using
teller_t
=
SubGraphSplitter
::
NodeInsideSubgraphTeller
;
explicit
TensorRTSubgraphNodeMarkPass
(
const
teller_t
&
teller
)
void
PaddlePassBuilder
::
TurnOnDebug
()
{
:
teller_
(
teller
)
{}
std
::
vector
<
std
::
string
>
passes
;
auto
it
=
std
::
begin
(
passes_
);
bool
Initialize
(
Argument
*
argument
)
override
{
return
true
;
}
while
(
it
!=
std
::
end
(
passes_
))
{
if
(
*
it
!=
"graph_viz_pass"
)
{
it
=
passes_
.
insert
(
it
+
1
,
"graph_viz_pass"
);
}
else
{
++
it
;
}
}
}
// This class get a sub-graph as input and determine whether to transform this
std
::
string
PaddlePassBuilder
::
DebugString
()
{
// sub-graph into TensorRT.
std
::
stringstream
ss
;
void
Run
(
DataFlowGraph
*
graph
)
override
;
ss
<<
"Passes to apply:
\n
"
;
for
(
auto
&
pass
:
passes_
)
{
ss
<<
" - "
<<
pass
<<
'\n'
;
}
return
ss
.
str
();
}
std
::
string
repr
()
const
override
{
return
"tensorrt-sub-subgraph-mark"
;
}
void
PaddlePassBuilder
::
DeletePass
(
const
std
::
string
&
pass_type
)
{
std
::
string
description
()
const
override
{
auto
it
=
std
::
begin
(
passes_
);
return
"tensorrt sub-graph mark pass"
;
while
(
it
!=
std
::
end
(
passes_
))
{
if
(
*
it
==
pass_type
)
{
it
=
passes_
.
erase
(
it
);
}
else
{
++
it
;
}
}
}
}
void
PaddlePassBuilder
::
InsertPass
(
size_t
idx
,
const
std
::
string
&
pass_type
)
{
passes_
.
insert
(
std
::
begin
(
passes_
)
+
idx
,
pass_type
);
}
AnalysisPass
*
CreateGraphvizDebugerPass
()
const
override
;
void
PaddlePassBuilder
::
DeletePass
(
size_t
idx
)
{
bool
Finalize
()
override
;
passes_
.
erase
(
std
::
begin
(
passes_
)
+
idx
);
}
private:
void
GpuPassStrategy
::
EnableMKLDNN
()
{
teller_t
teller_
;
LOG
(
ERROR
)
<<
"GPU not support MKLDNN yet"
;
}
;
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/api/paddle_pass_builder.h
0 → 100644
浏览文件 @
ddb12035
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <sstream>
#include <string>
#include <vector>
namespace
paddle
{
/*
* This is a pass builder based on string. It is part of inference API.
*/
class
PaddlePassBuilder
{
public:
explicit
PaddlePassBuilder
(
const
std
::
vector
<
std
::
string
>
&
passes
)
:
passes_
(
passes
)
{}
void
AppendPass
(
const
std
::
string
&
pass_type
);
void
InsertPass
(
size_t
idx
,
const
std
::
string
&
pass_type
);
// Delete the `idx`-th pass.
void
DeletePass
(
size_t
idx
);
// Delete all the passes that has type `pass_type`.
void
DeletePass
(
const
std
::
string
&
pass_type
);
// Visualize the computation graph after each pass by generating a DOT
// language file, one can draw them with the Graphviz toolkit.
void
TurnOnDebug
();
// Human-readible information.
std
::
string
DebugString
();
const
std
::
vector
<
std
::
string
>
&
AllPasses
()
const
{
return
passes_
;
}
protected:
std
::
vector
<
std
::
string
>
passes_
;
};
/*
* Pass strategy to help control the IR passes.
*/
class
PassStrategy
:
public
PaddlePassBuilder
{
public:
explicit
PassStrategy
(
const
std
::
vector
<
std
::
string
>
&
passes
)
:
PaddlePassBuilder
(
passes
)
{}
// The MKLDNN control exists in both CPU and GPU mode, because there can be
// still some CPU kernels running in CPU mode.
virtual
void
EnableMKLDNN
()
=
0
;
virtual
~
PassStrategy
()
=
default
;
};
/*
* The CPU passes controller, it is used in AnalysisPredictor with CPU mode.
*/
class
CpuPassStrategy
:
public
PassStrategy
{
public:
CpuPassStrategy
()
:
PassStrategy
({})
{
// NOTE the large fusions should be located in the front, so that they will
// not be damaged by smaller ones.
passes_
.
assign
({
"infer_clean_graph_pass"
,
//
"attention_lstm_fuse_pass"
,
//
"seqconv_eltadd_relu_fuse_pass"
,
//
// "embedding_fc_lstm_fuse_pass", //
"fc_lstm_fuse_pass"
,
//
"mul_lstm_fuse_pass"
,
//
"fc_gru_fuse_pass"
,
//
"mul_gru_fuse_pass"
,
//
"seq_concat_fc_fuse_pass"
,
//
"fc_fuse_pass"
,
//
"conv_bn_fuse_pass"
,
//
"conv_eltwiseadd_bn_fuse_pass"
,
//
});
}
virtual
~
CpuPassStrategy
()
=
default
;
virtual
void
EnableMKLDNN
()
override
{
// TODO(Superjomn) Consider the way to mix CPU with GPU.
#ifdef PADDLE_WITH_MKLDNN
passes_
.
insert
(
passes_
.
begin
(),
"mkldnn_placement_pass"
);
for
(
auto
&
pass
:
std
::
vector
<
std
::
string
>
({
"depthwise_conv_mkldnn_pass"
,
//
"conv_bias_mkldnn_fuse_pass"
,
//
"conv_relu_mkldnn_fuse_pass"
,
//
"conv_elementwise_add_mkldnn_fuse_pass"
}))
{
passes_
.
push_back
(
pass
);
}
#endif
}
CpuPassStrategy
(
const
CpuPassStrategy
&
other
)
:
PassStrategy
(
other
.
passes_
)
{}
};
/*
* The GPU passes strategy, it is used in
*/
class
GpuPassStrategy
:
public
PassStrategy
{
public:
GpuPassStrategy
()
:
PassStrategy
({})
{
passes_
.
assign
({
"infer_clean_graph_pass"
,
"conv_bn_fuse_pass"
,
});
}
GpuPassStrategy
(
const
GpuPassStrategy
&
other
)
:
PassStrategy
(
other
.
AllPasses
())
{}
virtual
void
EnableMKLDNN
()
override
;
virtual
~
GpuPassStrategy
()
=
default
;
};
}
// namespace paddle
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
ddb12035
...
@@ -61,6 +61,7 @@ TensorRTEngine::~TensorRTEngine() {
...
@@ -61,6 +61,7 @@ TensorRTEngine::~TensorRTEngine() {
}
}
void
TensorRTEngine
::
FreezeNetwork
()
{
void
TensorRTEngine
::
FreezeNetwork
()
{
VLOG
(
3
)
<<
"TRT to freeze network"
;
freshDeviceId
();
freshDeviceId
();
PADDLE_ENFORCE
(
infer_builder_
!=
nullptr
,
PADDLE_ENFORCE
(
infer_builder_
!=
nullptr
,
"Call InitNetwork first to initialize network."
);
"Call InitNetwork first to initialize network."
);
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
ddb12035
...
@@ -108,7 +108,8 @@ if(WITH_GPU AND TENSORRT_FOUND)
...
@@ -108,7 +108,8 @@ if(WITH_GPU AND TENSORRT_FOUND)
if
(
NOT EXISTS
${
TRT_MODEL_INSTALL_DIR
}
)
if
(
NOT EXISTS
${
TRT_MODEL_INSTALL_DIR
}
)
inference_download_and_uncompress
(
${
TRT_MODEL_INSTALL_DIR
}
${
INFERENCE_URL
}
/tensorrt_test
"trt_test_models.tar.gz"
)
inference_download_and_uncompress
(
${
TRT_MODEL_INSTALL_DIR
}
${
INFERENCE_URL
}
/tensorrt_test
"trt_test_models.tar.gz"
)
endif
()
endif
()
cc_test
(
test_trt_models SRCS trt_models_tester.cc
ARGS --dirname=
${
TRT_MODEL_INSTALL_DIR
}
/trt_test_models
inference_analysis_test
(
test_trt_models SRCS trt_models_tester.cc
DEPS paddle_inference_tensorrt_subgraph_engine SERIAL
)
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
analysis
${
analysis_deps
}
ir_pass_manager analysis_predictor
ARGS --dirname=
${
TRT_MODEL_INSTALL_DIR
}
/trt_test_models SERIAL
)
endif
()
endif
()
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
浏览文件 @
ddb12035
...
@@ -37,7 +37,10 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
...
@@ -37,7 +37,10 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
void
profile
(
bool
use_mkldnn
=
false
)
{
void
profile
(
bool
use_mkldnn
=
false
)
{
AnalysisConfig
cfg
;
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
cfg
.
_use_mkldnn
=
use_mkldnn
;
if
(
use_mkldnn
)
{
cfg
.
EnableMKLDNN
();
}
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
...
@@ -65,7 +68,9 @@ TEST(Analyzer_resnet50, fuse_statis) {
...
@@ -65,7 +68,9 @@ TEST(Analyzer_resnet50, fuse_statis) {
void
compare
(
bool
use_mkldnn
=
false
)
{
void
compare
(
bool
use_mkldnn
=
false
)
{
AnalysisConfig
cfg
;
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
cfg
.
_use_mkldnn
=
use_mkldnn
;
if
(
use_mkldnn
)
{
cfg
.
EnableMKLDNN
();
}
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
...
...
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
浏览文件 @
ddb12035
...
@@ -210,7 +210,6 @@ void SetConfig(AnalysisConfig *cfg) {
...
@@ -210,7 +210,6 @@ void SetConfig(AnalysisConfig *cfg) {
cfg
->
device
=
0
;
cfg
->
device
=
0
;
cfg
->
specify_input_name
=
true
;
cfg
->
specify_input_name
=
true
;
cfg
->
enable_ir_optim
=
true
;
cfg
->
enable_ir_optim
=
true
;
cfg
->
ir_passes
.
clear
();
// Do not exclude any pass.
}
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
...
@@ -226,13 +225,15 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
...
@@ -226,13 +225,15 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
// Easy for profiling independently.
// Easy for profiling independently.
TEST
(
Analyzer_rnn1
,
profile
)
{
TEST
(
Analyzer_rnn1
,
profile
)
{
contrib
::
AnalysisConfig
cfg
;
contrib
::
AnalysisConfig
cfg
(
false
)
;
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
cfg
.
use_gpu
=
false
;
cfg
.
fraction_of_gpu_memory
=
0.1
;
cfg
.
pass_builder
()
->
TurnOnDebug
();
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
LOG
(
INFO
)
<<
"to test prediction"
;
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
}
}
...
@@ -274,31 +275,6 @@ TEST(Analyzer_rnn1, multi_thread) {
...
@@ -274,31 +275,6 @@ TEST(Analyzer_rnn1, multi_thread) {
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
4
/* multi_thread */
);
TestPrediction
(
cfg
,
input_slots_all
,
&
outputs
,
4
/* multi_thread */
);
}
}
bool
CompareTensors
(
const
framework
::
Scope
&
a_scope
,
const
framework
::
Scope
&
b_scope
,
const
std
::
vector
<
std
::
string
>
&
tensors
)
{
for
(
auto
&
x
:
tensors
)
{
auto
*
a_var
=
a_scope
.
FindVar
(
x
);
auto
*
b_var
=
b_scope
.
FindVar
(
x
);
if
(
a_var
&&
b_var
)
{
if
(
a_var
->
Type
()
==
typeid
(
framework
::
LoDTensor
)
||
a_var
->
Type
()
==
typeid
(
framework
::
Tensor
))
{
LOG
(
INFO
)
<<
"comparing tensor "
<<
x
;
auto
&
a_t
=
a_var
->
Get
<
framework
::
LoDTensor
>
();
auto
&
b_t
=
b_var
->
Get
<
framework
::
LoDTensor
>
();
if
(
!
inference
::
CompareTensor
(
a_t
,
b_t
))
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"tensor %s not match in two scopes"
,
x
);
}
}
else
{
LOG
(
INFO
)
<<
"skip no tensor "
<<
x
;
}
}
else
{
LOG
(
INFO
)
<<
"skip tensor "
<<
x
;
}
}
return
true
;
}
// Validate that the AnalysisPredictor + ZeroCopyTensor really works by testing
// Validate that the AnalysisPredictor + ZeroCopyTensor really works by testing
// on the complex RNN1 model.
// on the complex RNN1 model.
TEST
(
Analyzer_rnn1
,
ZeroCopy
)
{
TEST
(
Analyzer_rnn1
,
ZeroCopy
)
{
...
@@ -307,7 +283,6 @@ TEST(Analyzer_rnn1, ZeroCopy) {
...
@@ -307,7 +283,6 @@ TEST(Analyzer_rnn1, ZeroCopy) {
config
.
use_feed_fetch_ops
=
false
;
config
.
use_feed_fetch_ops
=
false
;
PaddlePlace
place
;
PaddlePlace
place
;
int
output_size
{
0
};
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
);
...
@@ -353,86 +328,22 @@ TEST(Analyzer_rnn1, ZeroCopy) {
...
@@ -353,86 +328,22 @@ TEST(Analyzer_rnn1, ZeroCopy) {
Timer
timer
;
Timer
timer
;
double
total_time
{
0
};
double
total_time
{
0
};
double
native_total_time
{
0
};
double
analysis_total_time
{
0.
};
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
timer
.
tic
();
timer
.
tic
();
predictor
->
ZeroCopyRun
();
predictor
->
ZeroCopyRun
();
total_time
+=
timer
.
toc
();
total_time
+=
timer
.
toc
();
}
}
LOG
(
INFO
)
<<
"ZeroCopy output: "
<<
DescribeZeroCopyTensor
(
*
output_tensor
);
auto
*
output_data
=
output_tensor
->
data
<
float
>
(
&
place
,
&
output_size
);
ASSERT_GT
(
output_size
,
0
);
// more than one output!
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
// Run native predictor.
timer
.
tic
();
ASSERT_TRUE
(
native_predictor
->
Run
(
native_inputs
.
front
(),
&
native_outputs
));
ASSERT_TRUE
(
native_predictor
->
Run
(
native_inputs
.
front
(),
&
native_outputs
));
native_total_time
+=
timer
.
toc
();
LOG
(
INFO
)
<<
"native output "
<<
DescribeTensor
(
native_outputs
.
front
());
}
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
int
output_size
{
0
};
timer
.
tic
();
auto
*
zero_copy_data
=
output_tensor
->
data
<
float
>
(
&
place
,
&
output_size
);
ASSERT_TRUE
(
auto
*
native_data
=
static_cast
<
float
*>
(
native_outputs
.
front
().
data
.
data
());
analysis_predictor
->
Run
(
native_inputs
.
front
(),
&
analysis_outputs
));
for
(
size_t
i
=
0
;
i
<
output_size
/
sizeof
(
float
);
i
++
)
{
analysis_total_time
+=
timer
.
toc
();
EXPECT_NEAR
(
zero_copy_data
[
i
],
native_data
[
i
],
1e-3
);
}
if
(
!
FLAGS_with_precision_check
)
{
return
;
}
int
native_output_size
=
VecReduceToInt
(
native_outputs
.
front
().
shape
);
EXPECT_EQ
(
native_output_size
,
output_size
);
// Compare tensors between analysis and zerocopy
auto
*
p0
=
static_cast
<
AnalysisPredictor
*>
(
predictor
.
get
());
auto
*
p1
=
static_cast
<
AnalysisPredictor
*>
(
analysis_predictor
.
get
());
auto
*
p2
=
static_cast
<
NativePaddlePredictor
*>
(
native_predictor
.
get
());
std
::
vector
<
std
::
string
>
tensor_names
;
for
(
auto
&
var_desc
:
p0
->
program
().
Block
(
0
).
AllVars
())
{
tensor_names
.
push_back
(
var_desc
->
Name
());
}
LOG
(
INFO
)
<<
"Comparing tensors"
;
ASSERT_TRUE
(
CompareTensors
(
*
p0
->
scope
(),
*
p1
->
scope
(),
{
"final_output.tmp_1"
}));
ASSERT_TRUE
(
CompareTensors
(
*
p0
->
scope
(),
*
p2
->
scope
(),
{
"final_output.tmp_1"
}));
LOG
(
INFO
)
<<
"output1 "
<<
inference
::
LoDTensorSummary
<
float
>
(
p0
->
scope
()
->
FindVar
(
"final_output.tmp_1"
)
->
Get
<
framework
::
LoDTensor
>
());
LOG
(
INFO
)
<<
"output2 "
<<
inference
::
LoDTensorSummary
<
float
>
(
p1
->
scope
()
->
FindVar
(
"final_output.tmp_1"
)
->
Get
<
framework
::
LoDTensor
>
());
LOG
(
INFO
)
<<
"output3 "
<<
inference
::
LoDTensorSummary
<
float
>
(
p2
->
scope
()
->
FindVar
(
"final_output.tmp_1"
)
->
Get
<
framework
::
LoDTensor
>
());
for
(
int
i
=
0
;
i
<
output_size
;
i
++
)
{
LOG
(
INFO
)
<<
output_data
[
i
]
<<
" "
<<
static_cast
<
float
*>
(
native_outputs
.
front
().
data
.
data
())[
i
]
<<
" "
<<
static_cast
<
float
*>
(
analysis_outputs
.
front
().
data
.
data
())[
i
];
EXPECT_NEAR
(
output_data
[
i
],
static_cast
<
float
*>
(
native_outputs
.
front
().
data
.
data
())[
i
],
1e-3
);
}
}
LOG
(
INFO
)
<<
"batch_size: "
<<
FLAGS_batch_size
;
LOG
(
INFO
)
<<
"zero average time: "
<<
total_time
/
(
FLAGS_repeat
*
FLAGS_batch_size
);
LOG
(
INFO
)
<<
"analysis average time: "
<<
analysis_total_time
/
(
FLAGS_repeat
*
FLAGS_batch_size
);
LOG
(
INFO
)
<<
"native average time: "
<<
native_total_time
/
(
FLAGS_repeat
*
FLAGS_batch_size
);
}
}
TEST
(
Analyzer_rnn1
,
ZeroCopyMultiThread
)
{
TEST
(
Analyzer_rnn1
,
ZeroCopyMultiThread
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
浏览文件 @
ddb12035
...
@@ -108,9 +108,7 @@ TEST(Analyzer_Text_Classification, compare_against_embedding_fc_lstm_fused) {
...
@@ -108,9 +108,7 @@ TEST(Analyzer_Text_Classification, compare_against_embedding_fc_lstm_fused) {
AnalysisConfig
cfg
;
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
// Enable embedding_fc_lstm_fuse_pass (disabled by default)
// Enable embedding_fc_lstm_fuse_pass (disabled by default)
auto
it
=
std
::
find
(
cfg
.
ir_passes
.
begin
(),
cfg
.
ir_passes
.
end
(),
cfg
.
pass_builder
()
->
InsertPass
(
2
,
"embedding_fc_lstm_fuse_pass"
);
"embedding_fc_lstm_fuse_pass"
);
if
(
it
!=
cfg
.
ir_passes
.
end
())
cfg
.
ir_passes
.
erase
(
it
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
...
...
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
浏览文件 @
ddb12035
...
@@ -58,7 +58,10 @@ void SetConfig(AnalysisConfig *cfg) {
...
@@ -58,7 +58,10 @@ void SetConfig(AnalysisConfig *cfg) {
cfg
->
enable_ir_optim
=
true
;
cfg
->
enable_ir_optim
=
true
;
cfg
->
specify_input_name
=
true
;
cfg
->
specify_input_name
=
true
;
// TODO(TJ): fix fusion gru
// TODO(TJ): fix fusion gru
cfg
->
ir_passes
.
push_back
(
"fc_gru_fuse_pass"
);
cfg
->
pass_builder
()
->
DeletePass
(
"fc_gru_fuse_pass"
);
#ifdef PADDLE_WITH_MKLDNN
cfg
->
EnableMKLDNN
();
#endif
}
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
...
@@ -84,7 +87,9 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
...
@@ -84,7 +87,9 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
void
profile
(
bool
use_mkldnn
=
false
)
{
void
profile
(
bool
use_mkldnn
=
false
)
{
AnalysisConfig
cfg
;
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
cfg
.
_use_mkldnn
=
use_mkldnn
;
if
(
use_mkldnn
)
{
cfg
.
EnableMKLDNN
();
}
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
...
@@ -125,7 +130,9 @@ TEST(Analyzer_vis, fuse_statis) {
...
@@ -125,7 +130,9 @@ TEST(Analyzer_vis, fuse_statis) {
void
compare
(
bool
use_mkldnn
=
false
)
{
void
compare
(
bool
use_mkldnn
=
false
)
{
AnalysisConfig
cfg
;
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
cfg
.
_use_mkldnn
=
use_mkldnn
;
if
(
use_mkldnn
)
{
cfg
.
EnableMKLDNN
();
}
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
(
&
input_slots_all
);
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
ddb12035
...
@@ -20,6 +20,7 @@
...
@@ -20,6 +20,7 @@
#include <thread> // NOLINT
#include <thread> // NOLINT
#include <vector>
#include <vector>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
...
@@ -88,22 +89,25 @@ size_t GetSize(const PaddleTensor &out) { return VecReduceToInt(out.shape); }
...
@@ -88,22 +89,25 @@ size_t GetSize(const PaddleTensor &out) { return VecReduceToInt(out.shape); }
std
::
unordered_map
<
std
::
string
,
int
>
GetFuseStatis
(
PaddlePredictor
*
predictor
,
std
::
unordered_map
<
std
::
string
,
int
>
GetFuseStatis
(
PaddlePredictor
*
predictor
,
int
*
num_ops
)
{
int
*
num_ops
)
{
std
::
unordered_map
<
std
::
string
,
int
>
res
;
auto
*
analysis_predictor
=
static_cast
<
AnalysisPredictor
*>
(
predictor
);
auto
*
analysis_predictor
=
static_cast
<
AnalysisPredictor
*>
(
predictor
);
auto
&
fuse_statis
=
analysis_predictor
->
analysis_argument
()
auto
*
fusion_status
=
.
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
analysis_predictor
->
analysis_argument
().
fusion_statis_ptr
();
framework
::
ir
::
kFuseStatisAttr
);
if
(
!
fusion_status
)
{
for
(
auto
&
item
:
fuse_statis
)
{
return
res
;
}
for
(
auto
&
item
:
*
fusion_status
)
{
LOG
(
INFO
)
<<
"fused "
<<
item
.
first
<<
" "
<<
item
.
second
;
LOG
(
INFO
)
<<
"fused "
<<
item
.
first
<<
" "
<<
item
.
second
;
}
}
int
num
=
0
;
int
num
=
0
;
for
(
auto
&
node
:
for
(
auto
&
node
:
analysis_predictor
->
analysis_argument
().
main_
dfg
->
nodes
.
n
odes
())
{
analysis_predictor
->
analysis_argument
().
main_
graph
().
N
odes
())
{
if
(
node
->
Is
Function
())
{
if
(
node
->
Is
Op
())
{
++
num
;
++
num
;
}
}
}
}
*
num_ops
=
num
;
*
num_ops
=
num
;
return
fuse_stati
s
;
return
*
fusion_statu
s
;
}
}
void
SetFakeImageInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
,
void
SetFakeImageInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
,
...
@@ -161,11 +165,12 @@ void TestMultiThreadPrediction(
...
@@ -161,11 +165,12 @@ void TestMultiThreadPrediction(
int
num_times
=
FLAGS_repeat
;
int
num_times
=
FLAGS_repeat
;
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
// TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled
// because AttentionLSTM's hard code nodeid will be damanged.
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
predictors
.
emplace_back
(
CreateTestPredictor
(
config
,
use_analysis
));
predictors
.
emplace_back
(
CreateTestPredictor
(
config
,
use_analysis
));
for
(
int
tid
=
1
;
tid
<
num_threads
;
++
tid
)
{
predictors
.
emplace_back
(
predictors
.
front
()
->
Clone
());
}
}
size_t
total_time
{
0
};
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
threads
.
emplace_back
([
&
,
tid
]()
{
threads
.
emplace_back
([
&
,
tid
]()
{
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
...
@@ -173,17 +178,21 @@ void TestMultiThreadPrediction(
...
@@ -173,17 +178,21 @@ void TestMultiThreadPrediction(
#endif
#endif
// Each thread should have local inputs and outputs.
// Each thread should have local inputs and outputs.
// The inputs of each thread are all the same.
// The inputs of each thread are all the same.
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs_tid
=
inputs
;
std
::
vector
<
PaddleTensor
>
outputs_tid
;
std
::
vector
<
PaddleTensor
>
outputs_tid
;
auto
&
predictor
=
predictors
[
tid
];
LOG
(
INFO
)
<<
"running thread "
<<
tid
;
Timer
timer
;
Timer
timer
;
timer
.
tic
();
timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
inputs_tid
.
size
();
j
++
)
{
for
(
const
auto
&
input
:
inputs
)
{
predictors
[
tid
]
->
Run
(
inputs_tid
[
j
],
&
outputs_tid
);
ASSERT_TRUE
(
predictor
->
Run
(
input
,
&
outputs_tid
)
);
}
}
}
}
PrintTime
(
batch_size
,
num_times
,
num_threads
,
tid
,
timer
.
toc
()
/
num_times
,
inputs_tid
.
size
());
auto
time
=
timer
.
toc
();
total_time
+=
time
;
PrintTime
(
batch_size
,
num_times
,
num_threads
,
tid
,
time
/
num_times
,
inputs
.
size
());
});
});
}
}
for
(
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
...
@@ -196,7 +205,7 @@ void TestPrediction(const AnalysisConfig &config,
...
@@ -196,7 +205,7 @@ void TestPrediction(const AnalysisConfig &config,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
,
bool
use_analysis
=
FLAGS_use_analysis
)
{
bool
use_analysis
=
FLAGS_use_analysis
)
{
LOG
(
INFO
)
<<
"use_analysis: "
<<
use_analysis
LOG
(
INFO
)
<<
"use_analysis: "
<<
use_analysis
<<
", use_mkldnn: "
<<
config
.
_use_mkldnn
;
<<
", use_mkldnn: "
<<
config
.
use_mkldnn
()
;
if
(
num_threads
==
1
)
{
if
(
num_threads
==
1
)
{
TestOneThreadPrediction
(
config
,
inputs
,
outputs
,
use_analysis
);
TestOneThreadPrediction
(
config
,
inputs
,
outputs
,
use_analysis
);
}
else
{
}
else
{
...
@@ -208,7 +217,7 @@ void TestPrediction(const AnalysisConfig &config,
...
@@ -208,7 +217,7 @@ void TestPrediction(const AnalysisConfig &config,
void
CompareNativeAndAnalysis
(
void
CompareNativeAndAnalysis
(
const
AnalysisConfig
&
config
,
const
AnalysisConfig
&
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
)
{
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
)
{
LOG
(
INFO
)
<<
"use_mkldnn: "
<<
config
.
_use_mkldnn
;
LOG
(
INFO
)
<<
"use_mkldnn: "
<<
config
.
use_mkldnn
()
;
std
::
vector
<
PaddleTensor
>
native_outputs
,
analysis_outputs
;
std
::
vector
<
PaddleTensor
>
native_outputs
,
analysis_outputs
;
TestOneThreadPrediction
(
config
,
inputs
,
&
native_outputs
,
false
);
TestOneThreadPrediction
(
config
,
inputs
,
&
native_outputs
,
false
);
TestOneThreadPrediction
(
config
,
inputs
,
&
analysis_outputs
,
true
);
TestOneThreadPrediction
(
config
,
inputs
,
&
analysis_outputs
,
true
);
...
...
paddle/fluid/inference/tests/api/trt_models_tester.cc
浏览文件 @
ddb12035
...
@@ -16,10 +16,13 @@
...
@@ -16,10 +16,13 @@
#include <glog/logging.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
paddle
{
using
paddle
::
contrib
::
MixedRT
Config
;
using
paddle
::
contrib
::
Analysis
Config
;
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
...
@@ -27,33 +30,24 @@ NativeConfig GetConfigNative() {
...
@@ -27,33 +30,24 @@ NativeConfig GetConfigNative() {
NativeConfig
config
;
NativeConfig
config
;
config
.
model_dir
=
FLAGS_dirname
;
config
.
model_dir
=
FLAGS_dirname
;
// LOG(INFO) << "dirname " << config.model_dir;
// LOG(INFO) << "dirname " << config.model_dir;
config
.
fraction_of_gpu_memory
=
0.
4
5
;
config
.
fraction_of_gpu_memory
=
0.
1
5
;
config
.
use_gpu
=
true
;
config
.
use_gpu
=
true
;
config
.
device
=
0
;
config
.
device
=
0
;
return
config
;
return
config
;
}
}
MixedRTConfig
GetConfigTRT
()
{
void
PrepareTRTConfig
(
AnalysisConfig
*
config
)
{
MixedRTConfig
config
;
config
->
model_dir
=
FLAGS_dirname
+
"/"
+
"mobilenet"
;
config
.
model_dir
=
FLAGS_dirname
;
config
->
fraction_of_gpu_memory
=
0.15
;
config
.
use_gpu
=
true
;
config
->
EnableTensorRtEngine
(
1
<<
10
,
5
);
config
.
fraction_of_gpu_memory
=
0.2
;
config
->
pass_builder
()
->
DeletePass
(
"conv_bn_fuse_pass"
);
config
.
device
=
0
;
config
->
pass_builder
()
->
DeletePass
(
"fc_fuse_pass"
);
config
.
max_batch_size
=
3
;
config
->
pass_builder
()
->
TurnOnDebug
();
return
config
;
}
}
void
CompareTensorRTWithFluid
(
int
batch_size
,
std
::
string
model_dirname
)
{
void
PrepareInputs
(
std
::
vector
<
PaddleTensor
>
*
tensors
,
int
batch_size
)
{
NativeConfig
config0
=
GetConfigNative
();
PADDLE_ENFORCE_EQ
(
tensors
->
size
(),
1UL
);
config0
.
model_dir
=
model_dirname
;
auto
&
tensor
=
tensors
->
front
();
MixedRTConfig
config1
=
GetConfigTRT
();
config1
.
model_dir
=
model_dirname
;
config1
.
max_batch_size
=
batch_size
;
auto
predictor0
=
CreatePaddlePredictor
<
NativeConfig
>
(
config0
);
auto
predictor1
=
CreatePaddlePredictor
<
MixedRTConfig
>
(
config1
);
// Prepare inputs
int
height
=
224
;
int
height
=
224
;
int
width
=
224
;
int
width
=
224
;
float
*
data
=
new
float
[
batch_size
*
3
*
height
*
width
];
float
*
data
=
new
float
[
batch_size
*
3
*
height
*
width
];
...
@@ -61,25 +55,34 @@ void CompareTensorRTWithFluid(int batch_size, std::string model_dirname) {
...
@@ -61,25 +55,34 @@ void CompareTensorRTWithFluid(int batch_size, std::string model_dirname) {
data
[
0
]
=
1.0
f
;
data
[
0
]
=
1.0
f
;
// Prepare inputs
// Prepare inputs
PaddleTensor
tensor
;
tensor
.
name
=
"input_0"
;
tensor
.
name
=
"input_0"
;
tensor
.
shape
=
std
::
vector
<
int
>
({
batch_size
,
3
,
height
,
width
});
tensor
.
shape
=
std
::
vector
<
int
>
({
batch_size
,
3
,
height
,
width
});
tensor
.
data
=
PaddleBuf
(
static_cast
<
void
*>
(
data
),
tensor
.
data
=
PaddleBuf
(
static_cast
<
void
*>
(
data
),
sizeof
(
float
)
*
(
batch_size
*
3
*
height
*
width
));
sizeof
(
float
)
*
(
batch_size
*
3
*
height
*
width
));
tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
,
tensor
);
}
void
CompareTensorRTWithFluid
(
int
batch_size
,
std
::
string
model_dirname
)
{
auto
config0
=
GetConfigNative
();
config0
.
model_dir
=
model_dirname
;
AnalysisConfig
config1
(
true
);
PrepareTRTConfig
(
&
config1
);
config1
.
model_dir
=
model_dirname
;
auto
predictor0
=
CreatePaddlePredictor
<
NativeConfig
>
(
config0
);
auto
predictor1
=
CreatePaddlePredictor
(
config1
);
// Prepare inputs
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
);
PrepareInputs
(
&
paddle_tensor_feeds
,
batch_size
);
// Prepare outputs
// Prepare outputs
std
::
vector
<
PaddleTensor
>
outputs0
;
std
::
vector
<
PaddleTensor
>
outputs0
;
std
::
vector
<
PaddleTensor
>
outputs1
;
std
::
vector
<
PaddleTensor
>
outputs1
;
CHECK
(
predictor0
->
Run
(
paddle_tensor_feeds
,
&
outputs0
));
CHECK
(
predictor0
->
Run
(
paddle_tensor_feeds
,
&
outputs0
));
CHECK
(
predictor1
->
Run
(
paddle_tensor_feeds
,
&
outputs1
,
batch_size
));
CHECK
(
predictor1
->
Run
(
paddle_tensor_feeds
,
&
outputs1
,
batch_size
));
// Get output.
ASSERT_EQ
(
outputs0
.
size
(),
1UL
);
ASSERT_EQ
(
outputs1
.
size
(),
1UL
);
const
size_t
num_elements
=
outputs0
.
front
().
data
.
length
()
/
sizeof
(
float
);
const
size_t
num_elements
=
outputs0
.
front
().
data
.
length
()
/
sizeof
(
float
);
const
size_t
num_elements1
=
outputs1
.
front
().
data
.
length
()
/
sizeof
(
float
);
const
size_t
num_elements1
=
outputs1
.
front
().
data
.
length
()
/
sizeof
(
float
);
EXPECT_EQ
(
num_elements
,
num_elements1
);
EXPECT_EQ
(
num_elements
,
num_elements1
);
...
@@ -94,15 +97,52 @@ void CompareTensorRTWithFluid(int batch_size, std::string model_dirname) {
...
@@ -94,15 +97,52 @@ void CompareTensorRTWithFluid(int batch_size, std::string model_dirname) {
}
}
TEST
(
trt_models_test
,
mobilenet
)
{
TEST
(
trt_models_test
,
mobilenet
)
{
CompareTensorRTWithFluid
(
1
,
FLAGS_dirname
+
"/mobilenet"
);
CompareTensorRTWithFluid
(
1
,
FLAGS_dirname
+
"/
"
+
"
mobilenet"
);
}
}
TEST
(
trt_models_test
,
resnet50
)
{
TEST
(
trt_models_test
,
resnet50
)
{
CompareTensorRTWithFluid
(
1
,
FLAGS_dirname
+
"/resnet50"
);
CompareTensorRTWithFluid
(
1
,
FLAGS_dirname
+
"/
"
+
"
resnet50"
);
}
}
TEST
(
trt_models_test
,
resnext50
)
{
TEST
(
trt_models_test
,
resnext50
)
{
CompareTensorRTWithFluid
(
1
,
FLAGS_dirname
+
"/resnext50"
);
CompareTensorRTWithFluid
(
1
,
FLAGS_dirname
+
"/"
+
"resnext50"
);
}
TEST
(
trt_models_test
,
raw_gpu
)
{
std
::
string
model_dir
=
FLAGS_dirname
+
"/"
+
"mobilenet"
;
auto
config0
=
GetConfigNative
();
config0
.
model_dir
=
model_dir
;
int
batch_size
=
2
;
AnalysisConfig
config1
(
true
);
config1
.
fraction_of_gpu_memory
=
0.1
;
config1
.
enable_ir_optim
=
true
;
config1
.
model_dir
=
model_dir
;
auto
predictor0
=
CreatePaddlePredictor
<
NativeConfig
>
(
config0
);
auto
predictor1
=
CreatePaddlePredictor
(
config1
);
// Prepare inputs
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
);
PrepareInputs
(
&
paddle_tensor_feeds
,
batch_size
);
// Prepare outputs
std
::
vector
<
PaddleTensor
>
outputs0
;
std
::
vector
<
PaddleTensor
>
outputs1
;
CHECK
(
predictor0
->
Run
(
paddle_tensor_feeds
,
&
outputs0
));
CHECK
(
predictor1
->
Run
(
paddle_tensor_feeds
,
&
outputs1
,
batch_size
));
const
size_t
num_elements
=
outputs0
.
front
().
data
.
length
()
/
sizeof
(
float
);
const
size_t
num_elements1
=
outputs1
.
front
().
data
.
length
()
/
sizeof
(
float
);
EXPECT_EQ
(
num_elements
,
num_elements1
);
auto
*
data0
=
static_cast
<
float
*>
(
outputs0
.
front
().
data
.
data
());
auto
*
data1
=
static_cast
<
float
*>
(
outputs1
.
front
().
data
.
data
());
ASSERT_GT
(
num_elements
,
0UL
);
for
(
size_t
i
=
0
;
i
<
std
::
min
(
num_elements
,
num_elements1
);
i
++
)
{
EXPECT_NEAR
(
data0
[
i
],
data1
[
i
],
1e-3
);
}
}
}
}
// namespace paddle
}
// namespace paddle
USE_PASS
(
tensorrt_subgraph_pass
);
paddle/fluid/memory/malloc.cc
浏览文件 @
ddb12035
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/memory/malloc.h"
...
@@ -21,6 +22,7 @@ limitations under the License. */
...
@@ -21,6 +22,7 @@ limitations under the License. */
#include "paddle/fluid/memory/detail/buddy_allocator.h"
#include "paddle/fluid/memory/detail/buddy_allocator.h"
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/string/printf.h"
DEFINE_bool
(
init_allocated_mem
,
false
,
DEFINE_bool
(
init_allocated_mem
,
false
,
"It is a mistake that the values of the memory allocated by "
"It is a mistake that the values of the memory allocated by "
...
@@ -137,12 +139,18 @@ void* Alloc<platform::CUDAPlace>(platform::CUDAPlace place, size_t size) {
...
@@ -137,12 +139,18 @@ void* Alloc<platform::CUDAPlace>(platform::CUDAPlace place, size_t size) {
platform
::
SetDeviceId
(
place
.
device
);
platform
::
SetDeviceId
(
place
.
device
);
size_t
avail
,
total
;
size_t
avail
,
total
;
platform
::
GpuMemoryUsage
(
&
avail
,
&
total
);
platform
::
GpuMemoryUsage
(
&
avail
,
&
total
);
LOG
(
WARNING
)
<<
"Cannot allocate "
<<
size
<<
" bytes in GPU "
LOG
(
WARNING
)
<<
"Cannot allocate "
<<
string
::
HumanReadableSize
(
size
)
<<
place
.
device
<<
", available "
<<
avail
<<
" bytes"
;
<<
" in GPU "
<<
place
.
device
<<
", available "
<<
string
::
HumanReadableSize
(
avail
);
LOG
(
WARNING
)
<<
"total "
<<
total
;
LOG
(
WARNING
)
<<
"total "
<<
total
;
LOG
(
WARNING
)
<<
"GpuMinChunkSize "
<<
buddy_allocator
->
GetMinChunkSize
();
LOG
(
WARNING
)
<<
"GpuMinChunkSize "
LOG
(
WARNING
)
<<
"GpuMaxChunkSize "
<<
buddy_allocator
->
GetMaxChunkSize
();
<<
string
::
HumanReadableSize
(
LOG
(
WARNING
)
<<
"GPU memory used: "
<<
Used
<
platform
::
CUDAPlace
>
(
place
);
buddy_allocator
->
GetMinChunkSize
());
LOG
(
WARNING
)
<<
"GpuMaxChunkSize "
<<
string
::
HumanReadableSize
(
buddy_allocator
->
GetMaxChunkSize
());
LOG
(
WARNING
)
<<
"GPU memory used: "
<<
string
::
HumanReadableSize
(
Used
<
platform
::
CUDAPlace
>
(
place
));
platform
::
SetDeviceId
(
cur_dev
);
platform
::
SetDeviceId
(
cur_dev
);
}
}
if
(
FLAGS_init_allocated_mem
)
{
if
(
FLAGS_init_allocated_mem
)
{
...
...
paddle/fluid/operators/auc_op.cc
浏览文件 @
ddb12035
...
@@ -53,7 +53,7 @@ class AucOp : public framework::OperatorWithKernel {
...
@@ -53,7 +53,7 @@ class AucOp : public framework::OperatorWithKernel {
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Predict"
)
->
type
()),
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Predict"
)
->
type
()),
ctx
.
device_context
());
platform
::
CPUPlace
());
}
}
};
};
...
...
paddle/fluid/operators/load_op.cc
浏览文件 @
ddb12035
...
@@ -40,8 +40,9 @@ class LoadOp : public framework::OperatorBase {
...
@@ -40,8 +40,9 @@ class LoadOp : public framework::OperatorBase {
auto
out_var_name
=
Output
(
"Out"
);
auto
out_var_name
=
Output
(
"Out"
);
auto
*
out_var
=
scope
.
FindVar
(
out_var_name
);
auto
*
out_var
=
scope
.
FindVar
(
out_var_name
);
PADDLE_ENFORCE
(
out_var
!=
nullptr
,
"Output variable %s cannot be found"
,
PADDLE_ENFORCE
(
out_var
!=
nullptr
,
out_var_name
);
"Output variable %s cannot be found in scope %p"
,
out_var_name
,
&
scope
);
if
(
out_var
->
IsType
<
framework
::
LoDTensor
>
())
{
if
(
out_var
->
IsType
<
framework
::
LoDTensor
>
())
{
LoadLodTensor
(
fin
,
place
,
out_var
);
LoadLodTensor
(
fin
,
place
,
out_var
);
...
...
paddle/fluid/operators/lookup_sparse_table_op.cc
浏览文件 @
ddb12035
...
@@ -45,6 +45,7 @@ class LookupSparseTableOp : public framework::OperatorBase {
...
@@ -45,6 +45,7 @@ class LookupSparseTableOp : public framework::OperatorBase {
auto
out_var
=
scope
.
FindVar
(
Output
(
"Out"
));
auto
out_var
=
scope
.
FindVar
(
Output
(
"Out"
));
auto
w_var
=
scope
.
FindVar
(
Input
(
"W"
));
auto
w_var
=
scope
.
FindVar
(
Input
(
"W"
));
auto
ids_var
=
scope
.
FindVar
(
Input
(
"Ids"
));
auto
ids_var
=
scope
.
FindVar
(
Input
(
"Ids"
));
auto
is_test
=
Attr
<
bool
>
(
"is_test"
);
PADDLE_ENFORCE
(
out_var
->
IsType
<
framework
::
LoDTensor
>
(),
PADDLE_ENFORCE
(
out_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The type of Out var should be LodTensor."
);
"The type of Out var should be LodTensor."
);
...
@@ -65,7 +66,7 @@ class LookupSparseTableOp : public framework::OperatorBase {
...
@@ -65,7 +66,7 @@ class LookupSparseTableOp : public framework::OperatorBase {
PADDLE_ENFORCE_EQ
(
framework
::
ToDataType
(
w_t
->
value
().
type
()),
PADDLE_ENFORCE_EQ
(
framework
::
ToDataType
(
w_t
->
value
().
type
()),
framework
::
proto
::
VarType
::
FP32
,
framework
::
proto
::
VarType
::
FP32
,
"The sparse table only support FP32"
);
"The sparse table only support FP32"
);
w_t
->
Get
(
ids_t
,
out_t
,
true
);
w_t
->
Get
(
ids_t
,
out_t
,
true
,
is_test
);
}
}
};
};
...
@@ -91,6 +92,10 @@ class LookupSparseTableOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -91,6 +92,10 @@ class LookupSparseTableOpMaker : public framework::OpProtoAndCheckerMaker {
"(bool default false)"
"(bool default false)"
"Whether create new value if for nonexistent key."
)
"Whether create new value if for nonexistent key."
)
.
SetDefault
(
true
);
.
SetDefault
(
true
);
AddAttr
<
bool
>
(
"is_test"
,
"In test mode, lookup_sparse_table will "
"return a 0 for unknown id"
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Lookup Sprase Tablel Operator.
Lookup Sprase Tablel Operator.
...
...
paddle/fluid/operators/lrn_op.cc
浏览文件 @
ddb12035
...
@@ -14,6 +14,7 @@ limitations under the License. */
...
@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/lrn_op.h"
#include "paddle/fluid/operators/lrn_op.h"
#include <string>
#include <string>
#include "paddle/fluid/operators/math/blas.h"
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
#endif
...
@@ -29,34 +30,43 @@ struct LRNFunctor<platform::CPUDeviceContext, T> {
...
@@ -29,34 +30,43 @@ struct LRNFunctor<platform::CPUDeviceContext, T> {
const
framework
::
Tensor
&
input
,
framework
::
Tensor
*
out
,
const
framework
::
Tensor
&
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
mid
,
int
N
,
int
C
,
int
H
,
int
W
,
int
n
,
framework
::
Tensor
*
mid
,
int
N
,
int
C
,
int
H
,
int
W
,
int
n
,
T
k
,
T
alpha
,
T
beta
)
{
T
k
,
T
alpha
,
T
beta
)
{
auto
x_v
=
framework
::
EigenVector
<
T
>::
Flatten
(
input
);
const
T
*
idata
=
input
.
data
<
T
>
();
auto
place
=
ctx
.
GetPlace
();
const
int
start
=
-
(
n
-
1
)
/
2
;
auto
blas
=
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
ctx
);
const
int
end
=
start
+
n
;
T
*
odata
=
out
->
mutable_data
<
T
>
(
place
);
T
*
mdata
=
mid
->
mutable_data
<
T
>
(
place
);
auto
e_mid
=
framework
::
EigenTensor
<
T
,
4
>::
From
(
*
mid
);
Tensor
squared
;
e_mid
=
e_mid
.
constant
(
k
);
T
*
sdata
=
squared
.
mutable_data
<
T
>
({
1
,
C
+
n
-
1
,
H
,
W
},
place
);
std
::
memset
(
sdata
,
0
,
sizeof
(
T
)
*
squared
.
numel
());
auto
e_x
=
framework
::
EigenTensor
<
T
,
4
>::
From
(
input
);
for
(
int
i
=
0
;
i
<
mid
->
numel
();
++
i
)
{
for
(
int
m
=
0
;
m
<
N
;
m
++
)
{
mdata
[
i
]
=
k
;
for
(
int
i
=
0
;
i
<
C
;
i
++
)
{
for
(
int
c
=
start
;
c
<
end
;
c
++
)
{
int
ch
=
i
+
c
;
if
(
ch
>=
0
&&
ch
<
C
)
{
auto
s
=
e_mid
.
slice
(
Eigen
::
array
<
int
,
4
>
({{
m
,
i
,
0
,
0
}}),
Eigen
::
array
<
int
,
4
>
({{
1
,
1
,
H
,
W
}}));
auto
r
=
e_x
.
slice
(
Eigen
::
array
<
int
,
4
>
({{
m
,
ch
,
0
,
0
}}),
Eigen
::
array
<
int
,
4
>
({{
1
,
1
,
H
,
W
}}));
s
+=
alpha
*
r
.
square
();
}
}
int
img_size
=
H
*
W
;
int
fea_size
=
C
*
img_size
;
int
pre_pad
=
(
n
-
1
)
/
2
;
// compute batches one by one
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
blas
.
VSQR
(
fea_size
,
idata
+
i
*
fea_size
,
sdata
+
pre_pad
*
img_size
);
// init the first channel of mid
for
(
int
c
=
0
;
c
<
n
;
++
c
)
{
blas
.
AXPY
(
img_size
,
alpha
,
sdata
+
c
*
img_size
,
mdata
+
i
*
fea_size
);
}
}
for
(
int
c
=
1
;
c
<
C
;
++
c
)
{
// copy previous scale
int
mid_offset
=
i
*
fea_size
+
c
*
img_size
;
std
::
memcpy
(
mdata
+
mid_offset
,
mdata
+
mid_offset
-
img_size
,
img_size
*
sizeof
(
T
));
// add last
blas
.
AXPY
(
img_size
,
alpha
,
sdata
+
(
c
+
n
-
1
)
*
img_size
,
mdata
+
mid_offset
);
// sub rest
blas
.
AXPY
(
img_size
,
-
alpha
,
sdata
+
(
c
-
1
)
*
img_size
,
mdata
+
mid_offset
);
}
}
}
}
// compute the final output
auto
out_e
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
out
);
blas
.
VPOW
(
mid
->
numel
(),
mdata
,
-
beta
,
odata
);
out_e
=
x_v
*
e_mid
.
reshape
(
Eigen
::
DSizes
<
int
,
1
>
(
e_mid
.
size
())).
pow
(
-
be
ta
);
blas
.
VMUL
(
mid
->
numel
(),
odata
,
idata
,
oda
ta
);
}
}
};
};
template
struct
LRNFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
struct
LRNFunctor
<
platform
::
CPUDeviceContext
,
float
>;
...
@@ -156,6 +166,9 @@ class LRNOp : public framework::OperatorWithKernel {
...
@@ -156,6 +166,9 @@ class LRNOp : public framework::OperatorWithKernel {
auto
x_dim
=
ctx
->
GetInputDim
(
"X"
);
auto
x_dim
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE_EQ
(
x_dim
.
size
(),
4
,
"Input(X)'rank of LRNOp should be 4."
);
PADDLE_ENFORCE_EQ
(
x_dim
.
size
(),
4
,
"Input(X)'rank of LRNOp should be 4."
);
int
n
=
ctx
->
Attrs
().
Get
<
int
>
(
"n"
);
PADDLE_ENFORCE
(
n
>
0
&&
n
%
2
==
1
,
"n should be positive odd value"
);
ctx
->
SetOutputDim
(
"Out"
,
x_dim
);
ctx
->
SetOutputDim
(
"Out"
,
x_dim
);
ctx
->
ShareLoD
(
"X"
,
/*->*/
"Out"
);
ctx
->
ShareLoD
(
"X"
,
/*->*/
"Out"
);
ctx
->
SetOutputDim
(
"MidOut"
,
x_dim
);
ctx
->
SetOutputDim
(
"MidOut"
,
x_dim
);
...
...
paddle/fluid/operators/lrn_op.h
浏览文件 @
ddb12035
...
@@ -60,7 +60,6 @@ class LRNKernel : public framework::OpKernel<T> {
...
@@ -60,7 +60,6 @@ class LRNKernel : public framework::OpKernel<T> {
T
beta
=
ctx
.
Attr
<
float
>
(
"beta"
);
T
beta
=
ctx
.
Attr
<
float
>
(
"beta"
);
T
k
=
ctx
.
Attr
<
float
>
(
"k"
);
T
k
=
ctx
.
Attr
<
float
>
(
"k"
);
PADDLE_ENFORCE
(
n
>
0
,
"n should >= 0"
);
PADDLE_ENFORCE
(
alpha
>=
0.0
,
"alpha should >= 0.0"
);
PADDLE_ENFORCE
(
alpha
>=
0.0
,
"alpha should >= 0.0"
);
PADDLE_ENFORCE
(
beta
>=
0.0
,
"beta should >= 0.0"
);
PADDLE_ENFORCE
(
beta
>=
0.0
,
"beta should >= 0.0"
);
PADDLE_ENFORCE
(
k
>=
0.0
,
"k should >= 0.0"
);
PADDLE_ENFORCE
(
k
>=
0.0
,
"k should >= 0.0"
);
...
...
paddle/fluid/operators/math/blas.h
浏览文件 @
ddb12035
...
@@ -152,6 +152,12 @@ class Blas {
...
@@ -152,6 +152,12 @@ class Blas {
template
<
typename
T
>
template
<
typename
T
>
void
VEXP
(
int
n
,
const
T
*
x
,
T
*
y
)
const
;
void
VEXP
(
int
n
,
const
T
*
x
,
T
*
y
)
const
;
template
<
typename
T
>
void
VSQR
(
int
n
,
const
T
*
x
,
T
*
y
)
const
;
template
<
typename
T
>
void
VPOW
(
int
n
,
const
T
*
x
,
T
alpha
,
T
*
y
)
const
;
template
<
typename
T
>
template
<
typename
T
>
void
GEMV
(
bool
trans_a
,
int
M
,
int
N
,
T
alpha
,
const
T
*
A
,
const
T
*
B
,
T
beta
,
void
GEMV
(
bool
trans_a
,
int
M
,
int
N
,
T
alpha
,
const
T
*
A
,
const
T
*
B
,
T
beta
,
T
*
C
)
const
;
T
*
C
)
const
;
...
@@ -238,6 +244,16 @@ class BlasT : private Blas<DeviceContext> {
...
@@ -238,6 +244,16 @@ class BlasT : private Blas<DeviceContext> {
Base
()
->
template
VEXP
<
T
>(
args
...);
Base
()
->
template
VEXP
<
T
>(
args
...);
}
}
template
<
typename
...
ARGS
>
void
VSQR
(
ARGS
...
args
)
const
{
Base
()
->
template
VSQR
<
T
>(
args
...);
}
template
<
typename
...
ARGS
>
void
VPOW
(
ARGS
...
args
)
const
{
Base
()
->
template
VPOW
<
T
>(
args
...);
}
template
<
typename
...
ARGS
>
template
<
typename
...
ARGS
>
void
GEMV
(
ARGS
...
args
)
const
{
void
GEMV
(
ARGS
...
args
)
const
{
Base
()
->
template
GEMV
<
T
>(
args
...);
Base
()
->
template
GEMV
<
T
>(
args
...);
...
...
paddle/fluid/operators/math/blas_impl.h
浏览文件 @
ddb12035
...
@@ -12,6 +12,7 @@
...
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#pragma once
#pragma once
#include <cmath>
#include <limits>
#include <limits>
#include <vector>
#include <vector>
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/math_function.h"
...
@@ -102,6 +103,16 @@ struct CBlas<float> {
...
@@ -102,6 +103,16 @@ struct CBlas<float> {
static
void
VEXP
(
ARGS
...
args
)
{
static
void
VEXP
(
ARGS
...
args
)
{
platform
::
dynload
::
vsExp
(
args
...);
platform
::
dynload
::
vsExp
(
args
...);
}
}
template
<
typename
...
ARGS
>
static
void
VSQR
(
ARGS
...
args
)
{
platform
::
dynload
::
vsSqr
(
args
...);
}
template
<
typename
...
ARGS
>
static
void
VPOW
(
ARGS
...
args
)
{
platform
::
dynload
::
vsPowx
(
args
...);
}
};
};
template
<
>
template
<
>
...
@@ -182,6 +193,16 @@ struct CBlas<double> {
...
@@ -182,6 +193,16 @@ struct CBlas<double> {
static
void
VEXP
(
ARGS
...
args
)
{
static
void
VEXP
(
ARGS
...
args
)
{
platform
::
dynload
::
vdExp
(
args
...);
platform
::
dynload
::
vdExp
(
args
...);
}
}
template
<
typename
...
ARGS
>
static
void
VSQR
(
ARGS
...
args
)
{
platform
::
dynload
::
vdSqr
(
args
...);
}
template
<
typename
...
ARGS
>
static
void
VPOW
(
ARGS
...
args
)
{
platform
::
dynload
::
vdPowx
(
args
...);
}
};
};
#else
#else
...
@@ -241,6 +262,8 @@ struct CBlas<platform::float16> {
...
@@ -241,6 +262,8 @@ struct CBlas<platform::float16> {
}
}
static
void
VMUL
(...)
{
PADDLE_THROW
(
"float16 VMUL not supported on CPU"
);
}
static
void
VMUL
(...)
{
PADDLE_THROW
(
"float16 VMUL not supported on CPU"
);
}
static
void
VEXP
(...)
{
PADDLE_THROW
(
"float16 VEXP not supported on CPU"
);
}
static
void
VEXP
(...)
{
PADDLE_THROW
(
"float16 VEXP not supported on CPU"
);
}
static
void
VSQR
(...)
{
PADDLE_THROW
(
"float16 VSQR not supported on CPU"
);
}
static
void
VPOW
(...)
{
PADDLE_THROW
(
"float16 VPOW not supported on CPU"
);
}
static
void
DOT
(...)
{
PADDLE_THROW
(
"float16 DOT not supported on CPU"
);
};
static
void
DOT
(...)
{
PADDLE_THROW
(
"float16 DOT not supported on CPU"
);
};
static
void
SCAL
(...)
{
PADDLE_THROW
(
"float16 SCAL not supported on CPU"
);
};
static
void
SCAL
(...)
{
PADDLE_THROW
(
"float16 SCAL not supported on CPU"
);
};
#ifdef PADDLE_WITH_MKLML
#ifdef PADDLE_WITH_MKLML
...
@@ -398,6 +421,31 @@ void Blas<platform::CPUDeviceContext>::VEXP(int n, const T *x, T *y) const {
...
@@ -398,6 +421,31 @@ void Blas<platform::CPUDeviceContext>::VEXP(int n, const T *x, T *y) const {
#endif
#endif
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
platform
::
CPUDeviceContext
>::
VSQR
(
int
n
,
const
T
*
x
,
T
*
y
)
const
{
#ifdef PADDLE_WITH_MKLML
CBlas
<
T
>::
VSQR
(
n
,
x
,
y
);
#else
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
y
[
i
]
=
std
::
sqrt
(
x
[
i
]);
}
#endif
}
template
<
>
template
<
typename
T
>
void
Blas
<
platform
::
CPUDeviceContext
>::
VPOW
(
int
n
,
const
T
*
x
,
T
a
,
T
*
y
)
const
{
#ifdef PADDLE_WITH_MKLML
CBlas
<
T
>::
VPOW
(
n
,
x
,
a
,
y
);
#else
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
y
[
i
]
=
std
::
pow
(
x
[
i
],
a
);
}
#endif
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
T
Blas
<
platform
::
CPUDeviceContext
>::
DOT
(
int
n
,
const
T
*
x
,
const
T
*
y
)
const
{
T
Blas
<
platform
::
CPUDeviceContext
>::
DOT
(
int
n
,
const
T
*
x
,
const
T
*
y
)
const
{
...
...
paddle/fluid/operators/mul_op.cc
浏览文件 @
ddb12035
...
@@ -56,7 +56,8 @@ class MulOp : public framework::OperatorWithKernel {
...
@@ -56,7 +56,8 @@ class MulOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ
(
x_mat_dims
[
1
],
y_mat_dims
[
0
],
PADDLE_ENFORCE_EQ
(
x_mat_dims
[
1
],
y_mat_dims
[
0
],
"First matrix's width must be equal with second matrix's "
"First matrix's width must be equal with second matrix's "
"height. %s, %s"
);
"height. %s, %s"
,
x_mat_dims
[
1
],
y_mat_dims
[
0
]);
std
::
vector
<
int64_t
>
output_dims
;
std
::
vector
<
int64_t
>
output_dims
;
output_dims
.
reserve
(
output_dims
.
reserve
(
static_cast
<
size_t
>
(
x_num_col_dims
+
y_dims
.
size
()
-
y_num_col_dims
));
static_cast
<
size_t
>
(
x_num_col_dims
+
y_dims
.
size
()
-
y_num_col_dims
));
...
...
paddle/fluid/operators/nce_op.cc
浏览文件 @
ddb12035
...
@@ -69,7 +69,7 @@ class NCEOp : public framework::OperatorWithKernel {
...
@@ -69,7 +69,7 @@ class NCEOp : public framework::OperatorWithKernel {
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
ctx
.
Get
Place
());
platform
::
CPU
Place
());
}
}
};
};
...
@@ -174,7 +174,7 @@ class NCEOpGrad : public framework::OperatorWithKernel {
...
@@ -174,7 +174,7 @@ class NCEOpGrad : public framework::OperatorWithKernel {
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
ctx
.
Get
Place
());
platform
::
CPU
Place
());
}
}
};
};
...
...
paddle/fluid/operators/sgd_op.h
浏览文件 @
ddb12035
...
@@ -109,8 +109,6 @@ class SGDOpKernel : public framework::OpKernel<T> {
...
@@ -109,8 +109,6 @@ class SGDOpKernel : public framework::OpKernel<T> {
const
auto
*
grad_data
=
grad
.
value
().
data
<
T
>
();
const
auto
*
grad_data
=
grad
.
value
().
data
<
T
>
();
auto
*
out_data
=
param_out
->
mutable_value
()
->
data
<
T
>
();
auto
*
out_data
=
param_out
->
mutable_value
()
->
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
grad
.
rows
().
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
grad
.
rows
().
size
();
i
++
)
{
PADDLE_ENFORCE
(
grad
.
rows
()[
i
]
<
grad
.
height
(),
"Input rows index should less than height"
);
int64_t
id_index
=
param_out
->
AutoGrownIndex
(
grad
.
rows
()[
i
],
false
);
int64_t
id_index
=
param_out
->
AutoGrownIndex
(
grad
.
rows
()[
i
],
false
);
PADDLE_ENFORCE_GE
(
id_index
,
static_cast
<
int64_t
>
(
0
),
PADDLE_ENFORCE_GE
(
id_index
,
static_cast
<
int64_t
>
(
0
),
"id should be in the table"
);
"id should be in the table"
);
...
...
paddle/fluid/platform/dynload/mklml.h
浏览文件 @
ddb12035
...
@@ -76,6 +76,10 @@ extern void* mklml_dso_handle;
...
@@ -76,6 +76,10 @@ extern void* mklml_dso_handle;
__macro(vdMul); \
__macro(vdMul); \
__macro(vsExp); \
__macro(vsExp); \
__macro(vdExp); \
__macro(vdExp); \
__macro(vsSqr); \
__macro(vdSqr); \
__macro(vsPowx); \
__macro(vdPowx); \
__macro(MKL_Set_Num_Threads)
__macro(MKL_Set_Num_Threads)
MKLML_ROUTINE_EACH
(
DECLARE_DYNAMIC_LOAD_MKLML_WRAP
);
MKLML_ROUTINE_EACH
(
DECLARE_DYNAMIC_LOAD_MKLML_WRAP
);
...
...
paddle/fluid/string/printf.h
浏览文件 @
ddb12035
...
@@ -72,6 +72,7 @@
...
@@ -72,6 +72,7 @@
#include <iostream>
#include <iostream>
#include <sstream>
#include <sstream>
#include <string>
#include <string>
#include <vector>
#include "tinyformat/tinyformat.h" // https://github.com/c42f/tinyformat
#include "tinyformat/tinyformat.h" // https://github.com/c42f/tinyformat
...
@@ -102,5 +103,22 @@ void Printf(const char* fmt, const Args&... args) {
...
@@ -102,5 +103,22 @@ void Printf(const char* fmt, const Args&... args) {
Fprintf
(
std
::
cout
,
fmt
,
args
...);
Fprintf
(
std
::
cout
,
fmt
,
args
...);
}
}
template
<
typename
T
>
std
::
string
HumanReadableSize
(
T
size
)
{
size_t
i
=
0
;
double
f_size
=
static_cast
<
double
>
(
size
);
double
orig
=
f_size
;
const
std
::
vector
<
std
::
string
>
units
(
{
"B"
,
"kB"
,
"MB"
,
"GB"
,
"TB"
,
"PB"
,
"EB"
,
"ZB"
,
"YB"
});
while
(
f_size
>
1024
)
{
f_size
/=
1024
;
i
++
;
}
if
(
i
>=
units
.
size
())
{
return
Sprintf
(
"%fB"
,
orig
);
}
return
Sprintf
(
"%f%s"
,
f_size
,
units
[
i
]);
}
}
// namespace string
}
// namespace string
}
// namespace paddle
}
// namespace paddle
python/paddle/fluid/tests/unittests/test_lookup_sparse_table_op.py
浏览文件 @
ddb12035
...
@@ -80,6 +80,33 @@ class TestLookupSpraseTable(OpTest):
...
@@ -80,6 +80,33 @@ class TestLookupSpraseTable(OpTest):
assert
(
result_array2
[
3
]
==
w_array
[
6
]).
all
()
assert
(
result_array2
[
3
]
==
w_array
[
6
]).
all
()
assert
(
result_array2
[
4
]
==
w_array
[
7
]).
all
()
assert
(
result_array2
[
4
]
==
w_array
[
7
]).
all
()
# create and run lookup_table operator
test_lookup_table
=
Operator
(
"lookup_sparse_table"
,
W
=
'W'
,
Ids
=
'Ids'
,
Out
=
'Out'
,
min
=-
5.0
,
max
=
10.0
,
seed
=
10
,
is_test
=
True
)
ids
=
scope
.
var
(
"Ids"
).
get_tensor
()
unknown_id
=
[
44
,
22
,
33
]
ids_array2
=
np
.
array
([
4
,
2
,
3
,
7
,
100000
]
+
unknown_id
).
astype
(
"int64"
)
ids
.
set
(
ids_array2
,
place
)
test_lookup_table
.
run
(
scope
,
place
)
result_array2
=
np
.
array
(
out_tensor
)
assert
(
result_array2
[
0
]
==
w_array
[
5
]).
all
()
assert
(
result_array2
[
1
]
==
w_array
[
1
]).
all
()
assert
(
result_array2
[
2
]
==
w_array
[
2
]).
all
()
assert
(
result_array2
[
3
]
==
w_array
[
6
]).
all
()
assert
(
result_array2
[
4
]
==
w_array
[
7
]).
all
()
for
i
in
[
5
,
6
,
7
]:
assert
np
.
all
(
result_array2
[
i
]
==
0
)
def
test_w_is_selected_rows
(
self
):
def
test_w_is_selected_rows
(
self
):
places
=
[
core
.
CPUPlace
()]
places
=
[
core
.
CPUPlace
()]
# currently only support CPU
# currently only support CPU
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录