Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
ad0dfb17
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
ad0dfb17
编写于
1月 09, 2020
作者:
石
石晓伟
提交者:
GitHub
1月 09, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Feature] Lite subgraph (#22114)
上级
7d10edc5
变更
39
隐藏空白更改
内联
并排
Showing
39 changed file
with
1909 addition
and
11 deletion
+1909
-11
CMakeLists.txt
CMakeLists.txt
+1
-0
cmake/external/lite.cmake
cmake/external/lite.cmake
+87
-0
cmake/flags.cmake
cmake/flags.cmake
+0
-5
cmake/system.cmake
cmake/system.cmake
+5
-0
cmake/third_party.cmake
cmake/third_party.cmake
+4
-0
paddle/fluid/inference/CMakeLists.txt
paddle/fluid/inference/CMakeLists.txt
+4
-0
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+6
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+11
-0
paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
+9
-0
paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc
.../fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc
+336
-0
paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.h
...e/fluid/inference/analysis/ir_passes/lite_subgraph_pass.h
+45
-0
paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass_tester.cc
...inference/analysis/ir_passes/lite_subgraph_pass_tester.cc
+59
-0
paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc
paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc
+51
-1
paddle/fluid/inference/analysis/ir_passes/subgraph_util.h
paddle/fluid/inference/analysis/ir_passes/subgraph_util.h
+13
-2
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+33
-0
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+7
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+14
-0
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+7
-1
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+1
-0
paddle/fluid/inference/lite/CMakeLists.txt
paddle/fluid/inference/lite/CMakeLists.txt
+5
-0
paddle/fluid/inference/lite/engine.cc
paddle/fluid/inference/lite/engine.cc
+64
-0
paddle/fluid/inference/lite/engine.h
paddle/fluid/inference/lite/engine.h
+55
-0
paddle/fluid/inference/lite/op_teller.cc
paddle/fluid/inference/lite/op_teller.cc
+92
-0
paddle/fluid/inference/lite/op_teller.h
paddle/fluid/inference/lite/op_teller.h
+70
-0
paddle/fluid/inference/lite/tensor_utils.cc
paddle/fluid/inference/lite/tensor_utils.cc
+181
-0
paddle/fluid/inference/lite/tensor_utils.h
paddle/fluid/inference/lite/tensor_utils.h
+33
-0
paddle/fluid/inference/lite/test_engine.cc
paddle/fluid/inference/lite/test_engine.cc
+132
-0
paddle/fluid/inference/lite/test_tensor_utils.cc
paddle/fluid/inference/lite/test_tensor_utils.cc
+116
-0
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+6
-1
paddle/fluid/inference/tests/api/lite_mul_model_test.cc
paddle/fluid/inference/tests/api/lite_mul_model_test.cc
+58
-0
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+5
-0
paddle/fluid/operators/lite/CMakeLists.txt
paddle/fluid/operators/lite/CMakeLists.txt
+2
-0
paddle/fluid/operators/lite/lite_engine_op.cc
paddle/fluid/operators/lite/lite_engine_op.cc
+44
-0
paddle/fluid/operators/lite/lite_engine_op.h
paddle/fluid/operators/lite/lite_engine_op.h
+110
-0
paddle/fluid/operators/lite/lite_engine_op_test.cc
paddle/fluid/operators/lite/lite_engine_op_test.cc
+115
-0
paddle/fluid/operators/lite/ut_helper.h
paddle/fluid/operators/lite/ut_helper.h
+111
-0
paddle/fluid/platform/gpu_info.cc
paddle/fluid/platform/gpu_info.cc
+11
-0
paddle/fluid/platform/gpu_info.h
paddle/fluid/platform/gpu_info.h
+3
-0
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+3
-1
未找到文件。
CMakeLists.txt
浏览文件 @
ad0dfb17
...
...
@@ -86,6 +86,7 @@ option(WITH_HIGH_LEVEL_API_TEST "Test fluid python high-level api interface"
option
(
PY_VERSION
"Compile PaddlePaddle with python3 support"
${
PY_VERSION
}
)
option
(
WITH_DGC
"Use DGC(Deep Gradient Compression) or not"
${
WITH_DISTRIBUTE
}
)
option
(
SANITIZER_TYPE
"Choose the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined"
OFF
)
option
(
WITH_LITE
"Compile Paddle Fluid with Lite Engine"
OFF
)
# PY_VERSION
if
(
NOT PY_VERSION
)
...
...
cmake/external/lite.cmake
0 → 100644
浏览文件 @
ad0dfb17
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if
(
NOT LINUX OR NOT WITH_MKL
)
message
(
"Paddle-lite will not build because the required Linux and MKL do not exist."
)
set
(
WITH_LITE OFF
)
return
()
endif
()
if
(
NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR
)
include
(
ExternalProject
)
set
(
LITE_PROJECT extern_lite
)
set
(
LITE_SOURCES_DIR
${
THIRD_PARTY_PATH
}
/lite
)
set
(
LITE_INSTALL_DIR
${
THIRD_PARTY_PATH
}
/install/lite
)
# No quotes, so cmake can resolve it as a command with arguments.
set
(
LITE_BUILD_COMMAND $
(
MAKE
)
-j
)
set
(
LITE_OPTIONAL_ARGS -DWITH_MKL=ON
-DLITE_WITH_CUDA=
${
WITH_GPU
}
-DWITH_MKLDNN=OFF
-DLITE_WITH_X86=ON
-DLITE_WITH_PROFILE=OFF
-DWITH_LITE=OFF
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF
-DWITH_PYTHON=OFF
-DWITH_TESTING=ON
-DLITE_BUILD_EXTRA=ON
-DCUDNN_ROOT=
${
CUDNN_ROOT
}
-DLITE_WITH_ARM=OFF
)
ExternalProject_Add
(
${
LITE_PROJECT
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
GIT_REPOSITORY
"https://github.com/PaddlePaddle/Paddle-Lite.git"
GIT_TAG 947cda26637d46dc23f4e39d2b52e7d9a1fa6eef
PREFIX
${
LITE_SOURCES_DIR
}
UPDATE_COMMAND
""
BUILD_COMMAND
${
LITE_BUILD_COMMAND
}
INSTALL_COMMAND
""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_CXX_FLAGS=
${
LITE_CMAKE_CXX_FLAGS
}
-DCMAKE_CXX_FLAGS_RELEASE=
${
CMAKE_CXX_FLAGS_RELEASE
}
-DCMAKE_CXX_FLAGS_DEBUG=
${
CMAKE_CXX_FLAGS_DEBUG
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_C_FLAGS_DEBUG=
${
CMAKE_C_FLAGS_DEBUG
}
-DCMAKE_C_FLAGS_RELEASE=
${
CMAKE_C_FLAGS_RELEASE
}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=
${
THIRD_PARTY_BUILD_TYPE
}
${
EXTERNAL_OPTIONAL_ARGS
}
${
LITE_OPTIONAL_ARGS
}
)
ExternalProject_Get_property
(
${
LITE_PROJECT
}
BINARY_DIR
)
ExternalProject_Get_property
(
${
LITE_PROJECT
}
SOURCE_DIR
)
set
(
LITE_BINARY_DIR
${
BINARY_DIR
}
)
set
(
LITE_SOURCE_DIR
${
SOURCE_DIR
}
)
endif
()
message
(
STATUS
"Paddle-lite BINARY_DIR:
${
LITE_BINARY_DIR
}
"
)
message
(
STATUS
"Paddle-lite SOURCE_DIR:
${
LITE_SOURCE_DIR
}
"
)
include_directories
(
${
LITE_SOURCE_DIR
}
)
include_directories
(
${
LITE_BINARY_DIR
}
)
function
(
external_lite_static_libs alias path
)
add_library
(
${
alias
}
STATIC IMPORTED GLOBAL
)
SET_PROPERTY
(
TARGET
${
alias
}
PROPERTY IMPORTED_LOCATION
${
path
}
)
if
(
LITE_PROJECT
)
add_dependencies
(
${
alias
}
${
LITE_PROJECT
}
)
endif
()
endfunction
()
external_lite_static_libs
(
lite_full_static
${
LITE_BINARY_DIR
}
/lite/api/libapi_full_static.a
)
add_definitions
(
-DPADDLE_WITH_LITE
)
cmake/flags.cmake
浏览文件 @
ad0dfb17
...
...
@@ -207,11 +207,6 @@ if(LINUX)
${
GPU_COMMON_FLAGS
}
)
endif
(
LINUX
)
if
(
UNIX AND NOT APPLE
)
# except apple from nix*Os family
set
(
LINUX TRUE
)
endif
(
UNIX AND NOT APPLE
)
foreach
(
flag
${
COMMON_FLAGS
}
)
safe_set_cflag
(
CMAKE_C_FLAGS
${
flag
}
)
safe_set_cxxflag
(
CMAKE_CXX_FLAGS
${
flag
}
)
...
...
cmake/system.cmake
浏览文件 @
ad0dfb17
...
...
@@ -20,6 +20,11 @@
# for instance, protobuf libs path is <install_dir>/lib64
# on CentOS, but <install_dir>/lib on other systems.
if
(
UNIX AND NOT APPLE
)
# except apple from nix*Os family
set
(
LINUX TRUE
)
endif
(
UNIX AND NOT APPLE
)
IF
(
WIN32
)
SET
(
HOST_SYSTEM
"win32"
)
ELSE
(
WIN32
)
...
...
cmake/third_party.cmake
浏览文件 @
ad0dfb17
...
...
@@ -284,4 +284,8 @@ if(WITH_DGC)
list
(
APPEND third_party_deps extern_dgc
)
endif
()
if
(
WITH_LITE
)
include
(
external/lite
)
endif
(
WITH_LITE
)
add_custom_target
(
third_party DEPENDS
${
third_party_deps
}
)
paddle/fluid/inference/CMakeLists.txt
浏览文件 @
ad0dfb17
...
...
@@ -21,6 +21,10 @@ if (ANAKIN_SUBGRAPH)
add_subdirectory
(
anakin
)
endif
()
if
(
WITH_LITE
)
add_subdirectory
(
lite
)
endif
()
get_property
(
fluid_modules GLOBAL PROPERTY FLUID_MODULES
)
get_property
(
cuda_modules GLOBAL PROPERTY CUDA_MODULES
)
get_property
(
fluid_third_partys GLOBAL PROPERTY FLUID_THRID_PARTYS
)
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
ad0dfb17
...
...
@@ -197,6 +197,12 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
anakin_ops_filter
,
AnakinOpsFilter
,
std
::
vector
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
lite_passes_filter
,
LitePassesFilter
,
std
::
vector
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
lite_ops_filter
,
LiteOpsFilter
,
std
::
vector
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
lite_precision_mode
,
LitePrecisionMode
,
AnalysisConfig
::
Precision
);
// Memory optimized related.
DECL_ARGUMENT_FIELD
(
enable_memory_optim
,
EnableMemoryOptim
,
bool
);
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
ad0dfb17
...
...
@@ -128,6 +128,17 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"program"
,
new
framework
::
ProgramDesc
*
(
&
argument
->
main_program
()));
}
if
(
pass_name
==
"lite_subgraph_pass"
)
{
bool
enable_int8
=
argument
->
lite_precision_mode
()
==
AnalysisConfig
::
Precision
::
kInt8
;
pass
->
Set
(
"program"
,
new
framework
::
ProgramDesc
*
(
&
argument
->
main_program
()));
pass
->
Set
(
"lite_ops_filter"
,
new
std
::
vector
<
std
::
string
>
(
argument
->
lite_ops_filter
()));
pass
->
Set
(
"predictor_id"
,
new
int
(
argument
->
predictor_id
()));
pass
->
Set
(
"enable_int8"
,
new
bool
(
enable_int8
));
pass
->
Set
(
"use_gpu"
,
new
bool
(
argument
->
use_gpu
()));
}
if
(
pass_name
==
"anakin_subgraph_pass"
)
{
pass
->
Set
(
"program"
,
new
framework
::
ProgramDesc
*
(
&
argument
->
main_program
()));
...
...
paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
浏览文件 @
ad0dfb17
...
...
@@ -23,3 +23,12 @@ if (ANAKIN_SUBGRAPH)
file
(
APPEND
${
pass_file
}
"USE_PASS(anakin_subgraph_pass);
\n
"
)
set
(
INFER_IR_PASSES
${
INFER_IR_PASSES
}
anakin_subgraph_pass CACHE INTERNAL
""
)
endif
()
if
(
WITH_LITE
)
cc_library
(
lite_subgraph_pass SRCS lite_subgraph_pass.cc DEPS
${
analysis_deps
}
subgraph_util lite_op_teller
)
set
(
analysis_deps
${
analysis_deps
}
subgraph_util lite_subgraph_pass CACHE INTERNAL
""
)
set
(
pass_file
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/paddle_inference_pass.h
)
file
(
APPEND
${
pass_file
}
"USE_PASS(lite_subgraph_pass);
\n
"
)
set
(
INFER_IR_PASSES
${
INFER_IR_PASSES
}
lite_subgraph_pass CACHE INTERNAL
""
)
cc_test
(
lite_subgraph_pass_tester SRCS lite_subgraph_pass_tester.cc DEPS lite_subgraph_pass gtest glog
)
endif
()
paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc
0 → 100644
浏览文件 @
ad0dfb17
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <fstream>
#include <iostream>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/lite/op_teller.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/subgraph_detector.h"
#include "paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.h"
#include "paddle/fluid/string/pretty_log.h"
#include "paddle/fluid/inference/lite/engine.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
using
framework
::
ir
::
Node
;
using
framework
::
ir
::
Agent
;
using
framework
::
ir
::
SubGraphFuser
;
using
framework
::
ir
::
Graph
;
namespace
lite
{
std
::
string
UniqueKey
(
const
std
::
vector
<
std
::
string
>&
engine_inputs
,
const
std
::
vector
<
std
::
string
>&
engine_outputs
,
const
std
::
string
&
id
)
{
std
::
string
engine_hash_key
=
""
;
for
(
auto
name
:
engine_inputs
)
{
engine_hash_key
+=
name
;
}
for
(
auto
name
:
engine_outputs
)
{
engine_hash_key
+=
name
;
}
engine_hash_key
+=
id
;
auto
engine_key
=
std
::
to_string
(
std
::
hash
<
std
::
string
>
()(
engine_hash_key
));
return
engine_key
;
}
std
::
vector
<
std
::
string
>
IOVarsFilter
(
const
std
::
vector
<
Node
*>&
nodes
)
{
std
::
set
<
std
::
string
>
names
;
for
(
const
auto
&
node
:
nodes
)
{
if
(
node
->
IsVar
()
&&
!
node
->
Var
()
->
Persistable
())
{
names
.
insert
(
node
->
Name
());
}
}
return
std
::
vector
<
std
::
string
>
(
names
.
begin
(),
names
.
end
());
}
void
StrToBinaryFile
(
const
std
::
string
&
path
,
const
std
::
string
&
str
)
{
std
::
ofstream
file
(
path
.
c_str
(),
std
::
ios
::
binary
);
file
.
write
(
str
.
c_str
(),
str
.
size
());
file
.
close
();
}
void
ModifyHostSubgraphOps
(
framework
::
ProgramDesc
*
host_program
,
framework
::
BlockDesc
*
host_sub_block
,
const
std
::
vector
<
framework
::
OpDesc
*>&
subgraph_ops
)
{
for
(
auto
*
op_desc
:
subgraph_ops
)
{
auto
*
sub_block_op
=
host_sub_block
->
AppendOp
();
sub_block_op
->
CopyFrom
(
*
op_desc
);
if
(
op_desc
->
HasAttr
(
"sub_block"
))
{
int32_t
global_sub_id
=
host_sub_block
->
ID
();
auto
*
op_sub_block
=
host_program
->
MutableBlock
(
op_desc
->
GetBlockAttrId
(
"sub_block"
));
op_sub_block
->
Proto
()
->
set_parent_idx
(
global_sub_id
);
}
}
}
void
ModifyHostProgram
(
framework
::
ProgramDesc
*
host_program
,
framework
::
BlockDesc
*
host_sub_block
,
const
std
::
unordered_set
<
Node
*>&
io_var_nodes
,
const
std
::
vector
<
framework
::
OpDesc
*>&
subgraph_ops
)
{
for
(
auto
*
var_node
:
io_var_nodes
)
{
auto
*
sub_block_var
=
host_sub_block
->
Var
(
var_node
->
Name
());
sub_block_var
->
Proto
()
->
CopyFrom
(
*
var_node
->
Var
()
->
Proto
());
}
ModifyHostSubgraphOps
(
host_program
,
host_sub_block
,
subgraph_ops
);
}
void
AppendLiteSubBlocks
(
const
std
::
vector
<
framework
::
OpDesc
*>&
subgraph_ops
,
framework
::
ProgramDesc
*
engine_program
,
framework
::
ProgramDesc
*
host_program
,
const
int32_t
host_sub_id
)
{
std
::
unordered_map
<
int32_t
,
int32_t
>
sub_blocks_map
;
std
::
unordered_set
<
int32_t
>
copied_host_ids
;
sub_blocks_map
[
host_sub_id
]
=
framework
::
kRootBlockIndex
;
std
::
function
<
void
(
const
std
::
vector
<
framework
::
OpDesc
*>&
)
>
append_sub_blocks
;
append_sub_blocks
=
[
&
](
const
std
::
vector
<
framework
::
OpDesc
*>&
ops
)
{
for
(
auto
*
op_desc
:
ops
)
{
if
(
op_desc
->
HasAttr
(
"sub_block"
))
{
int32_t
host_op_sub_id
=
op_desc
->
GetBlockAttrId
(
"sub_block"
);
if
(
copied_host_ids
.
count
(
host_op_sub_id
))
continue
;
size_t
engine_block_size
=
engine_program
->
Size
();
auto
*
host_op_sub_block
=
host_program
->
MutableBlock
(
host_op_sub_id
);
auto
*
engine_op_sub_block
=
engine_program
->
AppendBlock
(
*
(
op_desc
->
Block
()));
for
(
auto
*
var
:
host_op_sub_block
->
AllVars
())
{
auto
*
engine_var
=
engine_op_sub_block
->
Var
(
var
->
Name
());
engine_var
->
Proto
()
->
CopyFrom
(
*
var
->
Proto
());
}
for
(
auto
*
op
:
host_op_sub_block
->
AllOps
())
{
auto
*
engine_op
=
engine_op_sub_block
->
AppendOp
();
engine_op
->
Proto
()
->
CopyFrom
(
*
op
->
Proto
());
}
sub_blocks_map
[
host_op_sub_id
]
=
engine_block_size
;
append_sub_blocks
(
host_op_sub_block
->
AllOps
());
}
}
};
append_sub_blocks
(
subgraph_ops
);
for
(
size_t
i
=
0
;
i
<
engine_program
->
Size
();
i
++
)
{
for
(
auto
*
op_desc
:
engine_program
->
Block
(
i
).
AllOps
())
{
if
(
op_desc
->
HasAttr
(
"sub_block"
))
{
int32_t
id
=
op_desc
->
GetBlockAttrId
(
"sub_block"
);
op_desc
->
SetAttr
(
"sub_block"
,
sub_blocks_map
[
id
]);
}
}
}
}
// The modification of pass should be a process of framework::desc
// (initial) -> proto::desc (flush) -> framework::desc (final).
// Ir::Graph is limited to changing the main block, so the sub block
// needs to be processed here.
void
ModifyEngineProgram
(
Node
*
merged_node
,
framework
::
ProgramDesc
*
host_program
,
framework
::
ProgramDesc
*
engine_program
,
const
int32_t
host_sub_block_id
,
const
std
::
unordered_set
<
Node
*>&
io_var_nodes
,
const
std
::
vector
<
framework
::
OpDesc
*>&
subgraph_ops
)
{
// 1. Fill the main block of lite program.
framework
::
BlockDesc
*
engine_global_block
=
engine_program
->
MutableBlock
(
framework
::
kRootBlockIndex
);
PrependFeedOps
(
engine_global_block
,
IOVarsFilter
(
merged_node
->
inputs
));
for
(
auto
*
var_node
:
io_var_nodes
)
{
framework
::
VarDesc
*
sub_block_var
=
engine_global_block
->
Var
(
var_node
->
Name
());
sub_block_var
->
Proto
()
->
CopyFrom
(
*
var_node
->
Var
()
->
Proto
());
}
for
(
auto
*
op_desc
:
subgraph_ops
)
{
auto
*
sub_block_op
=
engine_global_block
->
AppendOp
();
sub_block_op
->
CopyFrom
(
*
op_desc
);
}
PrependFetchOps
(
engine_global_block
,
IOVarsFilter
(
merged_node
->
outputs
));
// 2. Append sub blocks in the lite program.
AppendLiteSubBlocks
(
subgraph_ops
,
engine_program
,
host_program
,
host_sub_block_id
);
}
void
OrganizeProgram
(
Node
*
merged_node
,
framework
::
ProgramDesc
*
host_program
,
framework
::
ProgramDesc
*
engine_program
,
std
::
vector
<
std
::
string
>*
repetitive_params
)
{
std
::
vector
<
framework
::
ir
::
Node
*>&
subgraph
=
*
Agent
(
merged_node
).
subgraph
();
PADDLE_ENFORCE_EQ
(
subgraph
.
empty
(),
false
,
platform
::
errors
::
NotFound
(
"No subgraph found in lite subgraph pass. Please use "
"the full model call from Analysis Predictor."
));
const
framework
::
BlockDesc
&
host_global_block
=
host_program
->
Block
(
framework
::
kRootBlockIndex
);
framework
::
BlockDesc
*
host_sub_block
=
host_program
->
AppendBlock
(
host_global_block
);
string
::
PrettyLogDetail
(
"--- detect a sub-graph with %d nodes"
,
subgraph
.
size
());
std
::
unordered_set
<
Node
*>
io_var_nodes
=
GetRelatedIOVarNodes
(
subgraph
);
for
(
const
auto
*
node
:
io_var_nodes
)
{
VLOG
(
3
)
<<
"IO Variable Name: "
<<
node
->
Name
();
}
std
::
vector
<
framework
::
OpDesc
*>
subgraph_ops
;
for
(
auto
*
op_node
:
subgraph
)
{
subgraph_ops
.
push_back
(
op_node
->
Op
());
}
ModifyHostProgram
(
host_program
,
host_sub_block
,
io_var_nodes
,
subgraph_ops
);
ModifyEngineProgram
(
merged_node
,
host_program
,
engine_program
,
host_sub_block
->
ID
(),
io_var_nodes
,
subgraph_ops
);
*
repetitive_params
=
ExtractParameters
(
io_var_nodes
,
true
);
for
(
const
auto
&
param
:
*
repetitive_params
)
{
VLOG
(
3
)
<<
"Repetitive param: "
<<
param
;
}
host_program
->
Flush
();
engine_program
->
Flush
();
}
}
// namespace lite
void
LiteSubgraphPass
::
SetUpEngine
(
framework
::
ProgramDesc
*
program
,
const
std
::
vector
<
std
::
string
>&
repetitive_params
,
const
std
::
string
&
unique_key
,
bool
dump_model
)
const
{
inference
::
lite
::
EngineConfig
config
;
auto
*
scope
=
param_scope
();
// When the pass is started, only the persistent variables of the
// main block are read. Fluid seems to allow persistence variables
// in the sub block, but they are controlled by context, so the
// support is suspended here.
auto
serialize_params
=
[](
std
::
string
*
str
,
framework
::
Scope
*
scope
,
const
std
::
vector
<
std
::
string
>&
params
)
{
std
::
ostringstream
os
;
platform
::
CPUDeviceContext
ctx
;
for
(
const
auto
&
param
:
params
)
{
VLOG
(
3
)
<<
"Serialize param: "
<<
param
;
PADDLE_ENFORCE_NOT_NULL
(
scope
->
FindVar
(
param
),
platform
::
errors
::
NotFound
(
"Block should already have a '%s' variable"
,
param
));
auto
*
tensor
=
scope
->
FindVar
(
param
)
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
SerializeToStream
(
os
,
*
tensor
,
ctx
);
}
*
str
=
os
.
str
();
};
bool
use_gpu
=
Get
<
bool
>
(
"use_gpu"
);
bool
enable_int8
=
Get
<
bool
>
(
"enable_int8"
);
lite_api
::
TargetType
target_type
=
use_gpu
?
TARGET
(
kCUDA
)
:
TARGET
(
kX86
);
paddle
::
lite_api
::
PrecisionType
precision_type
=
enable_int8
?
PRECISION
(
kInt8
)
:
PRECISION
(
kInt64
);
serialize_params
(
&
config
.
param
,
scope
,
repetitive_params
);
config
.
model
=
program
->
Proto
()
->
SerializeAsString
();
config
.
valid_places
=
{
paddle
::
lite
::
Place
({
target_type
,
precision_type
}),
paddle
::
lite
::
Place
({
target_type
,
PRECISION
(
kFloat
)}),
paddle
::
lite
::
Place
({
TARGET
(
kHost
),
PRECISION
(
kFloat
)}),
};
if
(
dump_model
)
{
lite
::
StrToBinaryFile
(
"./model.bin"
,
config
.
model
);
lite
::
StrToBinaryFile
(
"./param.bin"
,
config
.
param
);
}
inference
::
Singleton
<
inference
::
lite
::
EngineManager
>::
Global
().
Create
(
unique_key
,
config
);
}
void
LiteSubgraphPass
::
BuildOperator
(
Node
*
merged_node
,
framework
::
ProgramDesc
*
global_program
,
std
::
vector
<
std
::
string
>*
repetitive_params
)
const
{
framework
::
ProgramDesc
engine_program
;
const
std
::
string
id
=
std
::
to_string
(
Get
<
int
>
(
"predictor_id"
));
const
std
::
vector
<
std
::
string
>
input_names
=
lite
::
IOVarsFilter
(
merged_node
->
inputs
);
const
std
::
vector
<
std
::
string
>
output_names
=
lite
::
IOVarsFilter
(
merged_node
->
outputs
);
const
std
::
string
unique_key
=
lite
::
UniqueKey
(
input_names
,
output_names
,
id
);
lite
::
OrganizeProgram
(
merged_node
,
global_program
,
&
engine_program
,
repetitive_params
);
SetUpEngine
(
&
engine_program
,
*
repetitive_params
,
unique_key
);
auto
*
op_desc
=
merged_node
->
Op
();
op_desc
->
SetInput
(
"Xs"
,
input_names
);
op_desc
->
SetOutput
(
"Ys"
,
output_names
);
op_desc
->
SetType
(
"lite_engine"
);
op_desc
->
SetAttr
(
"engine_key"
,
unique_key
);
op_desc
->
SetAttr
(
"enable_int8"
,
Get
<
bool
>
(
"enable_int8"
));
op_desc
->
SetAttr
(
"use_gpu"
,
Get
<
bool
>
(
"use_gpu"
));
}
void
LiteSubgraphPass
::
ApplyImpl
(
framework
::
ir
::
Graph
*
graph
)
const
{
framework
::
ir
::
FusePassBase
::
Init
(
"lite_subgraph_pass"
,
graph
);
framework
::
ProgramDesc
*
global_program
=
Get
<
framework
::
ProgramDesc
*>
(
"program"
);
auto
&
lite_ops_filter
=
Get
<
std
::
vector
<
std
::
string
>>
(
"lite_ops_filter"
);
auto
teller
=
[
&
lite_ops_filter
](
const
Node
*
node
)
{
if
(
!
node
->
IsOp
()
||
!
node
->
Op
())
return
false
;
else
if
(
node
->
Op
()
->
Type
()
==
"feed"
||
node
->
Op
()
->
Type
()
==
"fetch"
)
return
false
;
else
if
(
std
::
find
(
lite_ops_filter
.
begin
(),
lite_ops_filter
.
end
(),
node
->
Op
()
->
Type
())
!=
lite_ops_filter
.
end
())
return
false
;
return
inference
::
lite
::
OpTeller
::
Global
().
Tell
(
node
->
Op
()
->
Type
(),
*
node
->
Op
());
};
SubGraphFuser
fuser
(
graph
,
teller
,
0
/* min_subgraph_size */
,
"lite_engine"
);
fuser
();
std
::
vector
<
std
::
string
>
repetitive_params
;
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsOp
()
&&
!
Agent
(
node
).
subgraph
()
->
empty
())
{
BuildOperator
(
node
,
global_program
,
&
repetitive_params
);
std
::
unordered_set
<
const
Node
*>
nodes2remove
(
Agent
(
node
).
subgraph
()
->
begin
(),
Agent
(
node
).
subgraph
()
->
end
());
framework
::
ir
::
GraphSafeRemoveNodes
(
graph
,
nodes2remove
);
}
}
std
::
unordered_set
<
const
Node
*>
nodes2remove
;
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsOp
()
&&
Agent
(
node
).
deleted
())
{
nodes2remove
.
insert
(
node
);
}
}
framework
::
ir
::
GraphSafeRemoveNodes
(
graph
,
nodes2remove
);
graph
->
Set
(
framework
::
ir
::
kRepetitiveParamAttr
,
new
std
::
vector
<
std
::
string
>
(
repetitive_params
));
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
REGISTER_PASS
(
lite_subgraph_pass
,
paddle
::
inference
::
analysis
::
LiteSubgraphPass
);
paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.h
0 → 100644
浏览文件 @
ad0dfb17
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <paddle/fluid/framework/ir/fuse_pass_base.h>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/analysis/ir_passes/subgraph_util.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
class
LiteSubgraphPass
:
public
framework
::
ir
::
FusePassBase
{
public:
void
ApplyImpl
(
framework
::
ir
::
Graph
*
graph
)
const
override
;
private:
void
BuildOperator
(
framework
::
ir
::
Node
*
merged_node
,
framework
::
ProgramDesc
*
global_program
,
std
::
vector
<
std
::
string
>*
repetitive_params
)
const
;
void
SetUpEngine
(
framework
::
ProgramDesc
*
program
,
const
std
::
vector
<
std
::
string
>&
repetitive_params
,
const
std
::
string
&
unique_key
,
bool
dump_model
=
false
)
const
;
};
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass_tester.cc
0 → 100644
浏览文件 @
ad0dfb17
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.h"
#include <gtest/gtest.h>
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/inference/lite/op_teller.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
namespace
lite
{
void
StrToBinaryFile
(
const
std
::
string
&
path
,
const
std
::
string
&
str
);
void
ModifyHostSubgraphOps
(
framework
::
ProgramDesc
*
host_program
,
framework
::
BlockDesc
*
host_sub_block
,
const
std
::
vector
<
framework
::
OpDesc
*>&
subgraph_ops
);
void
AppendLiteSubBlocks
(
const
std
::
vector
<
framework
::
OpDesc
*>&
subgraph_ops
,
framework
::
ProgramDesc
*
engine_program
,
framework
::
ProgramDesc
*
host_program
,
const
int32_t
host_sub_id
);
}
TEST
(
LiteSubgraphPass
,
basic
)
{
framework
::
ProgramDesc
host_program
;
framework
::
ProgramDesc
engine_program
;
framework
::
BlockDesc
*
host_main_block
=
host_program
.
MutableBlock
(
0
);
framework
::
BlockDesc
*
host_sub_block
=
host_program
.
AppendBlock
(
*
host_main_block
);
framework
::
OpDesc
*
host_while_op
=
host_main_block
->
AppendOp
();
host_main_block
->
Var
(
"var_main"
);
host_sub_block
->
Var
(
"var_sub"
);
host_while_op
->
SetType
(
"while"
);
host_while_op
->
SetAttr
(
"sub_block"
,
host_sub_block
);
framework
::
OpDesc
*
host_sub_block_op
=
host_sub_block
->
AppendOp
();
host_sub_block_op
->
SetType
(
"leaky_relu"
);
CHECK
(
inference
::
lite
::
OpTeller
::
Global
().
Tell
(
"while"
,
*
host_while_op
))
<<
"Lite operator teller test failed."
;
lite
::
AppendLiteSubBlocks
({
host_while_op
},
&
engine_program
,
&
host_program
,
host_sub_block
->
ID
());
lite
::
ModifyHostSubgraphOps
(
&
host_program
,
host_sub_block
,
{
host_while_op
});
lite
::
StrToBinaryFile
(
"./"
,
"test"
);
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc
浏览文件 @
ad0dfb17
...
...
@@ -26,7 +26,7 @@ namespace analysis {
using
framework
::
ir
::
Node
;
std
::
vector
<
std
::
string
>
ExtractParameters
(
const
std
::
unordered_set
<
Node
*>
&
nodes
)
{
const
std
::
unordered_set
<
Node
*>
&
nodes
,
bool
sorted
)
{
// We can judge whether a variable is a parameter by
// its presistable property, but sometimes the presistable
// of the feed op output is true, so we have to identify it.
...
...
@@ -50,9 +50,59 @@ std::vector<std::string> ExtractParameters(
parameters
.
push_back
(
node
->
Name
());
}
}
if
(
sorted
)
{
std
::
sort
(
parameters
.
begin
(),
parameters
.
end
());
parameters
.
erase
(
std
::
unique
(
parameters
.
begin
(),
parameters
.
end
()),
parameters
.
end
());
}
return
parameters
;
}
std
::
unordered_set
<
Node
*>
GetRelatedIOVarNodes
(
const
std
::
vector
<
Node
*>
&
nodes
)
{
std
::
unordered_set
<
Node
*>
io_nodes
;
for
(
const
auto
&
node
:
nodes
)
{
if
(
!
node
->
IsOp
())
continue
;
for
(
const
auto
&
in
:
node
->
inputs
)
{
io_nodes
.
insert
(
in
);
}
for
(
const
auto
&
out
:
node
->
outputs
)
{
io_nodes
.
insert
(
out
);
}
}
return
io_nodes
;
}
void
PrependFeedOps
(
framework
::
BlockDesc
*
global_block
,
const
std
::
vector
<
std
::
string
>
&
feed_target_names
,
std
::
string
feed_holder_name
)
{
framework
::
VarDesc
*
feed_var
=
global_block
->
Var
(
feed_holder_name
);
feed_var
->
SetType
(
paddle
::
framework
::
proto
::
VarType
::
FEED_MINIBATCH
);
feed_var
->
SetPersistable
(
true
);
for
(
size_t
i
=
0
;
i
<
feed_target_names
.
size
();
i
++
)
{
framework
::
OpDesc
*
feed_op
=
global_block
->
AppendOp
();
feed_op
->
SetType
(
"feed"
);
feed_op
->
SetInput
(
"X"
,
{
feed_holder_name
});
feed_op
->
SetOutput
(
"Out"
,
{
feed_target_names
[
i
]});
feed_op
->
SetAttr
(
"col"
,
static_cast
<
int
>
(
i
));
}
}
void
PrependFetchOps
(
framework
::
BlockDesc
*
global_block
,
const
std
::
vector
<
std
::
string
>
&
fetch_target_names
,
std
::
string
fetch_holder_name
)
{
framework
::
VarDesc
*
fetch_var
=
global_block
->
Var
(
fetch_holder_name
);
fetch_var
->
SetType
(
paddle
::
framework
::
proto
::
VarType
::
FETCH_LIST
);
fetch_var
->
SetPersistable
(
true
);
for
(
size_t
i
=
0
;
i
<
fetch_target_names
.
size
();
i
++
)
{
framework
::
OpDesc
*
fetch_op
=
global_block
->
AppendOp
();
fetch_op
->
SetType
(
"fetch"
);
fetch_op
->
SetInput
(
"X"
,
{
fetch_target_names
[
i
]});
fetch_op
->
SetOutput
(
"Out"
,
{
fetch_holder_name
});
fetch_op
->
SetAttr
(
"col"
,
static_cast
<
int
>
(
i
));
}
}
void
RenameAndGetOutputs
(
const
std
::
vector
<
framework
::
ir
::
Node
*>
&
subgraph_nodes
,
framework
::
BlockDesc
*
block_desc
,
...
...
paddle/fluid/inference/analysis/ir_passes/subgraph_util.h
浏览文件 @
ad0dfb17
...
...
@@ -30,10 +30,21 @@ limitations under the License. */
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
using
framework
::
ir
::
Node
;
std
::
vector
<
std
::
string
>
ExtractParameters
(
const
std
::
unordered_set
<
Node
*>
&
nodes
);
const
std
::
unordered_set
<
framework
::
ir
::
Node
*>
&
nodes
,
bool
sorted
=
false
);
std
::
unordered_set
<
framework
::
ir
::
Node
*>
GetRelatedIOVarNodes
(
const
std
::
vector
<
framework
::
ir
::
Node
*>
&
nodes
);
void
PrependFeedOps
(
framework
::
BlockDesc
*
global_block
,
const
std
::
vector
<
std
::
string
>
&
feed_target_names
,
std
::
string
feed_holder_name
=
"feed"
);
void
PrependFetchOps
(
framework
::
BlockDesc
*
global_block
,
const
std
::
vector
<
std
::
string
>
&
fetch_target_names
,
std
::
string
fetch_holder_name
=
"fetch"
);
void
RenameAndGetOutputs
(
const
std
::
vector
<
framework
::
ir
::
Node
*>
&
subgraph_nodes
,
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
ad0dfb17
...
...
@@ -23,6 +23,7 @@
namespace
paddle
{
extern
const
std
::
vector
<
std
::
string
>
kTRTSubgraphPasses
;
extern
const
std
::
vector
<
std
::
string
>
kAnakinSubgraphPasses
;
extern
const
std
::
vector
<
std
::
string
>
kLiteSubgraphPasses
;
PassStrategy
*
AnalysisConfig
::
pass_builder
()
const
{
if
(
!
pass_builder_
.
get
())
{
...
...
@@ -128,6 +129,11 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
anakin_passes_filter_
);
CP_MEMBER
(
anakin_ops_filter_
);
CP_MEMBER
(
use_lite_
);
CP_MEMBER
(
lite_precision_mode_
);
CP_MEMBER
(
lite_passes_filter_
);
CP_MEMBER
(
lite_ops_filter_
);
// profile related.
CP_MEMBER
(
with_profile_
);
...
...
@@ -351,6 +357,20 @@ void AnalysisConfig::Update() {
}
}
if
(
use_lite_
)
{
#ifndef PADDLE_WITH_LITE
LOG
(
WARNING
)
<<
"You tried to enable the lite subgraph "
"but did not have the option -DWITH_LITE compiled."
;
#endif
pass_builder
()
->
ClearPasses
();
for
(
const
auto
&
pass
:
kLiteSubgraphPasses
)
{
if
(
std
::
find
(
lite_passes_filter_
.
begin
(),
lite_passes_filter_
.
end
(),
pass
)
==
lite_passes_filter_
.
end
())
{
pass_builder
()
->
AppendPass
(
pass
);
}
}
}
if
(
ir_debug_
)
{
pass_builder
()
->
TurnOnDebug
();
}
...
...
@@ -395,6 +415,8 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss
<<
cpu_math_library_num_threads_
;
ss
<<
use_anakin_
;
ss
<<
anakin_min_subgraph_size_
;
ss
<<
use_lite_
;
return
ss
.
str
();
}
...
...
@@ -484,6 +506,17 @@ void AnalysisConfig::EnableAnakinEngine(
Update
();
}
void
AnalysisConfig
::
EnableLiteEngine
(
AnalysisConfig
::
Precision
precision_mode
,
const
std
::
vector
<
std
::
string
>
&
passes_filter
,
const
std
::
vector
<
std
::
string
>
&
ops_filter
)
{
use_lite_
=
true
;
lite_precision_mode_
=
precision_mode
;
lite_passes_filter_
=
passes_filter
;
lite_ops_filter_
=
ops_filter
;
Update
();
}
void
AnalysisConfig
::
PartiallyRelease
()
{
prog_file_
.
clear
();
prog_file_
.
shrink_to_fit
();
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
ad0dfb17
...
...
@@ -428,6 +428,13 @@ void AnalysisPredictor::PrepareArgument() {
LOG
(
INFO
)
<<
"Anakin subgraph engine is enabled"
;
}
if
(
config_
.
lite_engine_enabled
())
{
argument_
.
SetLitePrecisionMode
(
config_
.
lite_precision_mode_
);
argument_
.
SetLitePassesFilter
(
config_
.
lite_passes_filter_
);
argument_
.
SetLiteOpsFilter
(
config_
.
lite_ops_filter_
);
LOG
(
INFO
)
<<
"Lite subgraph engine is enabled"
;
}
if
(
config_
.
use_mkldnn_
)
{
LOG
(
INFO
)
<<
"MKLDNN is enabled"
;
argument_
.
SetMKLDNNEnabledOpTypes
(
config_
.
mkldnn_enabled_op_types_
);
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
ad0dfb17
...
...
@@ -171,10 +171,19 @@ struct AnalysisConfig {
std
::
vector
<
std
::
string
>
passes_filter
=
{},
std
::
vector
<
std
::
string
>
ops_filter
=
{});
void
EnableLiteEngine
(
AnalysisConfig
::
Precision
precision_mode
=
Precision
::
kFloat32
,
const
std
::
vector
<
std
::
string
>&
passes_filter
=
{},
const
std
::
vector
<
std
::
string
>&
ops_filter
=
{});
/** A boolean state indicating whether the Anakin sub-graph engine is used.
*/
bool
anakin_engine_enabled
()
const
{
return
use_anakin_
;
}
/** A boolean state indicating whether the Lite sub-graph engine is used.
*/
bool
lite_engine_enabled
()
const
{
return
use_lite_
;
}
/** \brief Control whether to debug IR graph analysis phase.
*
* This will generate DOT files for visualizing the computation graph after
...
...
@@ -350,6 +359,11 @@ struct AnalysisConfig {
std
::
vector
<
std
::
string
>
anakin_passes_filter_
;
std
::
vector
<
std
::
string
>
anakin_ops_filter_
;
bool
use_lite_
{
false
};
std
::
vector
<
std
::
string
>
lite_passes_filter_
;
std
::
vector
<
std
::
string
>
lite_ops_filter_
;
Precision
lite_precision_mode_
;
// mkldnn related.
int
mkldnn_cache_capacity_
{
0
};
bool
use_mkldnn_quantizer_
{
false
};
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
ad0dfb17
...
...
@@ -105,6 +105,12 @@ const std::vector<std::string> kAnakinSubgraphPasses({
"fc_gru_fuse_pass"
,
//
});
const
std
::
vector
<
std
::
string
>
kLiteSubgraphPasses
({
#ifdef PADDLE_WITH_LITE
"lite_subgraph_pass"
,
#endif
});
GpuPassStrategy
::
GpuPassStrategy
()
:
PassStrategy
({})
{
passes_
.
assign
({
// "identity_scale_op_clean_pass", //
...
...
@@ -123,7 +129,7 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
"conv_elementwise_add2_act_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
#endif //
"transpose_flatten_concat_fuse_pass"
,
"transpose_flatten_concat_fuse_pass"
,
//
// following pass should be located in the last, since it will
// work on all fused ops.
"runtime_context_cache_pass"
...
...
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
ad0dfb17
...
...
@@ -163,5 +163,6 @@ class GpuPassStrategy : public PassStrategy {
extern
const
std
::
vector
<
std
::
string
>
kTRTSubgraphPasses
;
extern
const
std
::
vector
<
std
::
string
>
kAnakinSubgraphPasses
;
extern
const
std
::
vector
<
std
::
string
>
kLiteSubgraphPasses
;
}
// namespace paddle
paddle/fluid/inference/lite/CMakeLists.txt
0 → 100644
浏览文件 @
ad0dfb17
cc_library
(
lite_op_teller SRCS op_teller.cc DEPS lite_full_static framework_proto device_context boost xxhash
)
cc_library
(
lite_engine SRCS engine.cc DEPS lite_full_static framework_proto
)
cc_library
(
lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy lite_full_static framework_proto boost
)
cc_test
(
test_lite_engine SRCS test_engine.cc DEPS lite_engine protobuf framework_proto glog gtest analysis
)
cc_test
(
test_lite_tensor_utils SRCS test_tensor_utils.cc DEPS lite_engine lite_tensor_utils
)
paddle/fluid/inference/lite/engine.cc
0 → 100644
浏览文件 @
ad0dfb17
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef PADDLE_WITH_CUDA
#define LITE_WITH_CUDA 1
#endif
#include "paddle/fluid/inference/lite/engine.h"
#include "lite/core/context.h"
#include "lite/core/device_info.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
namespace
paddle
{
namespace
inference
{
namespace
lite
{
bool
EngineManager
::
Empty
()
const
{
return
engines_
.
size
()
==
0
;
}
bool
EngineManager
::
Has
(
const
std
::
string
&
name
)
const
{
if
(
engines_
.
count
(
name
)
==
0
)
{
return
false
;
}
return
engines_
.
at
(
name
).
get
()
!=
nullptr
;
}
paddle
::
lite
::
Predictor
*
EngineManager
::
Get
(
const
std
::
string
&
name
)
const
{
return
engines_
.
at
(
name
).
get
();
}
paddle
::
lite
::
Predictor
*
EngineManager
::
Create
(
const
std
::
string
&
name
,
const
EngineConfig
&
cfg
)
{
auto
*
p
=
new
paddle
::
lite
::
Predictor
();
#ifdef PADDLE_WITH_CUDA
paddle
::
lite
::
Env
<
TARGET
(
kCUDA
)
>::
Init
();
#endif
p
->
Build
(
""
,
cfg
.
model
,
cfg
.
param
,
cfg
.
valid_places
,
cfg
.
neglected_passes
,
cfg
.
model_type
,
cfg
.
model_from_memory
);
engines_
[
name
].
reset
(
p
);
return
p
;
}
void
EngineManager
::
DeleteAll
()
{
for
(
auto
&
item
:
engines_
)
{
item
.
second
.
reset
(
nullptr
);
}
}
}
// namespace lite
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/lite/engine.h
0 → 100644
浏览文件 @
ad0dfb17
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "lite/api/cxx_api.h"
namespace
paddle
{
namespace
inference
{
namespace
lite
{
struct
EngineConfig
{
std
::
string
model
;
std
::
string
param
;
paddle
::
lite
::
Place
prefer_place
;
std
::
vector
<
paddle
::
lite
::
Place
>
valid_places
;
std
::
vector
<
std
::
string
>
neglected_passes
;
lite_api
::
LiteModelType
model_type
{
lite_api
::
LiteModelType
::
kProtobuf
};
bool
model_from_memory
{
true
};
};
class
EngineManager
{
public:
bool
Empty
()
const
;
bool
Has
(
const
std
::
string
&
name
)
const
;
paddle
::
lite
::
Predictor
*
Get
(
const
std
::
string
&
name
)
const
;
paddle
::
lite
::
Predictor
*
Create
(
const
std
::
string
&
name
,
const
EngineConfig
&
cfg
);
void
DeleteAll
();
private:
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
paddle
::
lite
::
Predictor
>>
engines_
;
};
}
// namespace lite
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/lite/op_teller.cc
0 → 100644
浏览文件 @
ad0dfb17
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <map>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/lite/op_teller.h"
#include "lite/core/op_registry.h"
namespace
paddle
{
namespace
inference
{
namespace
lite
{
// Just tell by the op_types.
struct
SimpleOpTeller
:
public
Teller
{
SimpleOpTeller
()
{
const
std
::
map
<
std
::
string
,
std
::
string
>&
op2path
=
OpKernelInfoCollector
::
Global
().
GetOp2PathDict
();
auto
is_non_inst
=
[](
const
std
::
string
&
op
)
->
bool
{
const
std
::
vector
<
std
::
string
>
ops
=
{
"feed"
,
"fetch"
,
"while"
};
return
std
::
find
(
ops
.
begin
(),
ops
.
end
(),
op
)
!=
ops
.
end
();
};
for
(
const
auto
&
op
:
op2path
)
{
if
(
!
is_non_inst
(
op
.
first
))
{
ops_
.
insert
(
op
.
first
);
}
}
}
bool
operator
()(
const
std
::
string
&
op_type
,
const
framework
::
OpDesc
&
op_desc
)
override
{
return
ops_
.
count
(
op_type
);
}
private:
std
::
unordered_set
<
std
::
string
>
ops_
{};
};
struct
SingleBlockOpTeller
:
public
Teller
{
SingleBlockOpTeller
()
{
ops_
.
insert
(
"while"
);
}
bool
operator
()(
const
std
::
string
&
op_type
,
const
framework
::
OpDesc
&
op_desc
)
override
{
if
(
ops_
.
count
(
op_type
))
{
SimpleOpTeller
supported
;
const
int
id
=
op_desc
.
GetBlockAttrId
(
"sub_block"
);
const
framework
::
BlockDesc
&
block_desc
=
op_desc
.
Block
()
->
Program
()
->
Block
(
id
);
const
std
::
vector
<
framework
::
OpDesc
*>&
ops_sub_block
=
block_desc
.
AllOps
();
for
(
auto
*
op
:
ops_sub_block
)
{
if
(
!
supported
(
op
->
Type
(),
*
op
)
&&
!
this
->
operator
()(
op
->
Type
(),
*
op
))
{
return
false
;
}
}
return
true
;
}
return
false
;
}
private:
std
::
unordered_set
<
std
::
string
>
ops_
;
};
bool
OpTeller
::
Tell
(
const
std
::
string
&
op_type
,
const
framework
::
OpDesc
&
desc
)
{
for
(
auto
&
teller
:
tellers_
)
{
if
((
*
teller
)(
op_type
,
desc
))
return
true
;
}
return
false
;
}
OpTeller
::
OpTeller
()
{
tellers_
.
emplace_back
(
new
SimpleOpTeller
);
tellers_
.
emplace_back
(
new
SingleBlockOpTeller
);
}
}
// namespace lite
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/lite/op_teller.h
0 → 100644
浏览文件 @
ad0dfb17
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/op_desc.h"
namespace
paddle
{
namespace
inference
{
namespace
lite
{
/*
* Single Op teller definition.
* One can override this and define a more complex tell logic, considerring more
* issues such as op_desc.
*/
struct
Teller
{
virtual
bool
operator
()(
const
std
::
string
&
op_type
,
const
framework
::
OpDesc
&
desc
)
=
0
;
virtual
~
Teller
()
=
default
;
};
/*
* A real example:
*
* struct SomeTeller : public Teller {
* bool operator()(const std::string& op_type,
* const framework::OpDesc& desc) override {
* return op_type == "fc" && desc.Inputs().size() == 2;
* }
*};
*/
/*
* class OpTeller helps to tell whether a fluid
* operator can be transformed to a TensorRT layer.
*/
class
OpTeller
{
public:
static
OpTeller
&
Global
()
{
static
std
::
unique_ptr
<
OpTeller
>
x
(
new
OpTeller
);
return
*
x
;
}
bool
Tell
(
const
std
::
string
&
op_type
,
const
framework
::
OpDesc
&
desc
);
private:
OpTeller
();
private:
std
::
vector
<
std
::
unique_ptr
<
Teller
>>
tellers_
;
};
}
// namespace lite
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/lite/tensor_utils.cc
0 → 100644
浏览文件 @
ad0dfb17
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/lite/tensor_utils.h"
#include <map>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/inference/lite/engine.h"
namespace
paddle
{
namespace
inference
{
namespace
lite
{
namespace
utils
{
using
paddle
::
lite_api
::
TargetType
;
using
paddle
::
lite_api
::
PrecisionType
;
using
paddle
::
lite_api
::
DataLayoutType
;
template
<
typename
DstLoD
,
typename
SrcLoD
>
void
SetLoD
(
DstLoD
*
dst
,
const
SrcLoD
&
src
)
{
dst
->
reserve
(
src
.
size
());
dst
->
clear
();
for
(
auto
&&
v
:
src
)
{
dst
->
emplace_back
(
v
);
}
}
template
void
SetLoD
<
paddle
::
lite
::
LoD
,
framework
::
LoD
>(
paddle
::
lite
::
LoD
*
dst
,
const
framework
::
LoD
&
src
);
template
void
SetLoD
<
framework
::
LoD
,
paddle
::
lite
::
LoD
>(
framework
::
LoD
*
dst
,
const
paddle
::
lite
::
LoD
&
src
);
platform
::
Place
GetNativePlace
(
const
TargetType
&
type
,
int
id
=
0
)
{
switch
(
type
)
{
case
TargetType
::
kHost
:
case
TargetType
::
kX86
:
return
platform
::
CPUPlace
();
case
TargetType
::
kCUDA
:
return
platform
::
CUDAPlace
(
id
);
default:
LOG
(
FATAL
)
<<
"Error target type."
;
return
platform
::
Place
();
}
}
TargetType
GetLiteTargetType
(
const
platform
::
Place
&
place
)
{
if
(
platform
::
is_cpu_place
(
place
))
{
return
TargetType
::
kHost
;
}
return
TargetType
::
kCUDA
;
}
PrecisionType
GetLitePrecisionType
(
framework
::
proto
::
VarType
::
Type
type
)
{
switch
(
type
)
{
case
framework
::
proto
::
VarType_Type_FP32
:
return
PrecisionType
::
kFloat
;
case
framework
::
proto
::
VarType_Type_INT8
:
return
PrecisionType
::
kInt8
;
case
framework
::
proto
::
VarType_Type_INT32
:
return
PrecisionType
::
kInt32
;
case
framework
::
proto
::
VarType_Type_INT64
:
return
PrecisionType
::
kInt64
;
default:
LOG
(
FATAL
)
<<
"Error precision type."
;
return
PrecisionType
::
kUnk
;
}
}
framework
::
proto
::
VarType
::
Type
GetNativePrecisionType
(
const
PrecisionType
&
type
)
{
switch
(
type
)
{
case
PrecisionType
::
kFloat
:
return
framework
::
proto
::
VarType_Type_FP32
;
case
PrecisionType
::
kInt8
:
return
framework
::
proto
::
VarType_Type_INT8
;
case
PrecisionType
::
kInt32
:
return
framework
::
proto
::
VarType_Type_INT32
;
case
PrecisionType
::
kInt64
:
return
framework
::
proto
::
VarType_Type_INT64
;
default:
LOG
(
FATAL
)
<<
"Error precision type."
;
return
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
-
1
);
}
}
framework
::
DataLayout
GetNativeLayoutType
(
const
DataLayoutType
&
type
)
{
switch
(
type
)
{
case
DataLayoutType
::
kNCHW
:
return
framework
::
DataLayout
::
kNCHW
;
default:
LOG
(
FATAL
)
<<
"Error layout type."
;
return
static_cast
<
framework
::
DataLayout
>
(
-
1
);
}
}
void
MemoryCopyAsync
(
const
platform
::
Place
&
dst_place
,
void
*
dst_data
,
const
platform
::
Place
&
src_place
,
const
void
*
src_data
,
const
size_t
size
,
const
platform
::
DeviceContext
&
ctx
)
{
const
platform
::
CPUPlace
cpu_place
;
if
(
platform
::
is_cpu_place
(
dst_place
)
&&
platform
::
is_cpu_place
(
src_place
))
{
memory
::
Copy
(
cpu_place
,
dst_data
,
cpu_place
,
src_data
,
size
);
}
else
{
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
is_cpu_place
(
dst_place
)
&&
platform
::
is_gpu_place
(
src_place
))
{
LOG
(
FATAL
)
<<
"lite::MemoryCopy GPU->CPU is not yet implemented."
;
}
else
if
(
platform
::
is_gpu_place
(
dst_place
)
&&
platform
::
is_cpu_place
(
src_place
))
{
LOG
(
FATAL
)
<<
"lite::MemoryCopy CPU->GPU is not yet implemented."
;
}
else
if
(
platform
::
is_gpu_place
(
dst_place
)
&&
platform
::
is_gpu_place
(
src_place
))
{
auto
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
src_place
);
memory
::
Copy
(
gpu_place
,
dst_data
,
gpu_place
,
src_data
,
size
,
static_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
).
stream
());
}
#else
LOG
(
FATAL
)
<<
"You must define PADDLE_WITH_CUDA for using CUDAPlace."
;
#endif
}
}
void
InitDstTensor
(
paddle
::
lite
::
Tensor
*
dst
,
const
framework
::
LoDTensor
&
src
)
{
// Currently, Lite needs to explicitly specify the target type of
// the input tensor.
constexpr
int
empty_size
=
0
;
dst
->
mutable_data
(
GetLiteTargetType
(
src
.
place
()),
empty_size
);
dst
->
set_precision
(
GetLitePrecisionType
(
src
.
type
()));
SetLoD
(
dst
->
mutable_lod
(),
src
.
lod
());
}
void
InitDstTensor
(
framework
::
LoDTensor
*
dst
,
const
paddle
::
lite
::
Tensor
&
src
)
{
constexpr
framework
::
proto
::
VarType
::
Type
dtype
=
framework
::
proto
::
VarType_Type_FP32
;
dst
->
mutable_data
(
inference
::
lite
::
utils
::
GetNativePlace
(
src
.
target
()),
dtype
);
SetLoD
(
dst
->
mutable_lod
(),
src
.
lod
());
}
template
<
>
void
TensorCopyAsync
(
paddle
::
lite
::
Tensor
*
dst
,
const
framework
::
LoDTensor
&
src
,
const
platform
::
DeviceContext
&
ctx
)
{
InitDstTensor
(
dst
,
src
);
const
platform
::
Place
&
src_place
=
src
.
place
();
const
platform
::
Place
&
dst_place
=
GetNativePlace
(
dst
->
target
());
const
size_t
bytes
=
static_cast
<
size_t
>
(
src
.
numel
())
*
framework
::
SizeOfType
(
src
.
type
());
dst
->
Resize
(
framework
::
vectorize
(
src
.
dims
()));
const
void
*
src_data
=
src
.
data
<
void
>
();
void
*
dst_data
=
dst
->
mutable_data
(
bytes
);
MemoryCopyAsync
(
dst_place
,
dst_data
,
src_place
,
src_data
,
bytes
,
ctx
);
}
template
<
>
void
TensorCopyAsync
(
framework
::
LoDTensor
*
dst
,
const
paddle
::
lite
::
Tensor
&
src
,
const
platform
::
DeviceContext
&
ctx
)
{
InitDstTensor
(
dst
,
src
);
const
platform
::
Place
&
src_place
=
GetNativePlace
(
src
.
target
());
const
platform
::
Place
&
dst_place
=
dst
->
place
();
dst
->
Resize
(
paddle
::
framework
::
make_ddim
(
src
.
dims
().
Vectorize
()));
const
size_t
bytes
=
static_cast
<
size_t
>
(
src
.
numel
())
*
framework
::
SizeOfType
(
dst
->
type
());
const
void
*
src_data
=
src
.
raw_data
();
// When Lite is ready, the source type needs to be modified here.
void
*
dst_data
=
dst
->
mutable_data
(
dst_place
,
dst
->
type
());
MemoryCopyAsync
(
dst_place
,
dst_data
,
src_place
,
src_data
,
bytes
,
ctx
);
}
}
// namespace utils
}
// namespace lite
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/lite/tensor_utils.h
0 → 100644
浏览文件 @
ad0dfb17
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/api/paddle_place.h"
#include "lite/core/tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
namespace
paddle
{
namespace
inference
{
namespace
lite
{
namespace
utils
{
template
<
typename
DstTensor
,
typename
SrcTensor
>
void
TensorCopyAsync
(
DstTensor
*
dst
,
const
SrcTensor
&
src
,
const
platform
::
DeviceContext
&
ctx
);
}
// namespace utils
}
// namespace lite
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/lite/test_engine.cc
0 → 100644
浏览文件 @
ad0dfb17
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "paddle/fluid/inference/lite/engine.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/operators/lite/ut_helper.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
namespace
paddle
{
namespace
inference
{
namespace
lite
{
using
inference
::
lite
::
AddTensorToBlockDesc
;
using
inference
::
lite
::
CreateTensor
;
using
inference
::
lite
::
serialize_params
;
void
make_fake_model
(
std
::
string
*
model
,
std
::
string
*
param
)
{
framework
::
ProgramDesc
program
;
LOG
(
INFO
)
<<
"program.block size is "
<<
program
.
Size
();
auto
*
block_
=
program
.
Proto
()
->
mutable_blocks
(
0
);
LOG
(
INFO
)
<<
"create block desc"
;
framework
::
BlockDesc
block_desc
(
&
program
,
block_
);
auto
*
feed0
=
block_desc
.
AppendOp
();
feed0
->
SetType
(
"feed"
);
feed0
->
SetInput
(
"X"
,
{
"feed"
});
feed0
->
SetOutput
(
"Out"
,
{
"x"
});
feed0
->
SetAttr
(
"col"
,
0
);
auto
*
feed1
=
block_desc
.
AppendOp
();
feed1
->
SetType
(
"feed"
);
feed1
->
SetInput
(
"X"
,
{
"feed"
});
feed1
->
SetOutput
(
"Out"
,
{
"y"
});
feed1
->
SetAttr
(
"col"
,
1
);
LOG
(
INFO
)
<<
"create elementwise_add op"
;
auto
*
elt_add
=
block_desc
.
AppendOp
();
elt_add
->
SetType
(
"elementwise_add"
);
elt_add
->
SetInput
(
"X"
,
std
::
vector
<
std
::
string
>
({
"x"
}));
elt_add
->
SetInput
(
"Y"
,
std
::
vector
<
std
::
string
>
({
"y"
}));
elt_add
->
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
"z"
}));
elt_add
->
SetAttr
(
"axis"
,
-
1
);
LOG
(
INFO
)
<<
"create fetch op"
;
auto
*
fetch
=
block_desc
.
AppendOp
();
fetch
->
SetType
(
"fetch"
);
fetch
->
SetInput
(
"X"
,
std
::
vector
<
std
::
string
>
({
"z"
}));
fetch
->
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
"out"
}));
fetch
->
SetAttr
(
"col"
,
0
);
// Set inputs' variable shape in BlockDesc
AddTensorToBlockDesc
(
block_
,
"x"
,
std
::
vector
<
int64_t
>
({
2
,
4
}),
true
);
AddTensorToBlockDesc
(
block_
,
"y"
,
std
::
vector
<
int64_t
>
({
2
,
4
}),
true
);
AddTensorToBlockDesc
(
block_
,
"z"
,
std
::
vector
<
int64_t
>
({
2
,
4
}),
false
);
AddTensorToBlockDesc
(
block_
,
"out"
,
std
::
vector
<
int64_t
>
({
2
,
4
}),
false
);
*
block_
->
add_ops
()
=
*
feed0
->
Proto
();
*
block_
->
add_ops
()
=
*
feed1
->
Proto
();
*
block_
->
add_ops
()
=
*
elt_add
->
Proto
();
*
block_
->
add_ops
()
=
*
fetch
->
Proto
();
framework
::
Scope
scope
;
#ifdef PADDLE_WITH_CUDA
platform
::
CUDAPlace
place
;
platform
::
CUDADeviceContext
ctx
(
place
);
#else
platform
::
CPUPlace
place
;
platform
::
CPUDeviceContext
ctx
(
place
);
#endif
// Prepare variables.
std
::
vector
<
std
::
string
>
repetitive_params
{
"x"
,
"y"
};
CreateTensor
(
&
scope
,
"x"
,
std
::
vector
<
int64_t
>
({
2
,
4
}));
CreateTensor
(
&
scope
,
"y"
,
std
::
vector
<
int64_t
>
({
2
,
4
}));
ASSERT_EQ
(
block_
->
ops_size
(),
4
);
*
model
=
program
.
Proto
()
->
SerializeAsString
();
serialize_params
(
param
,
&
scope
,
repetitive_params
);
}
TEST
(
EngineManager
,
engine
)
{
ASSERT_EQ
(
inference
::
Singleton
<
inference
::
lite
::
EngineManager
>::
Global
().
Empty
(),
true
);
inference
::
lite
::
EngineConfig
config
;
make_fake_model
(
&
(
config
.
model
),
&
(
config
.
param
));
LOG
(
INFO
)
<<
"prepare config"
;
const
std
::
string
unique_key
(
"engine_0"
);
config
.
model_from_memory
=
true
;
config
.
valid_places
=
{
#ifdef PADDLE_WITH_CUDA
paddle
::
lite
::
Place
({
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)}),
#endif
paddle
::
lite
::
Place
({
TARGET
(
kX86
),
PRECISION
(
kFloat
)}),
paddle
::
lite
::
Place
({
TARGET
(
kHost
),
PRECISION
(
kAny
)}),
};
LOG
(
INFO
)
<<
"Create EngineManager"
;
inference
::
Singleton
<
inference
::
lite
::
EngineManager
>::
Global
().
Create
(
unique_key
,
config
);
LOG
(
INFO
)
<<
"Create EngineManager done"
;
ASSERT_EQ
(
inference
::
Singleton
<
inference
::
lite
::
EngineManager
>::
Global
().
Empty
(),
false
);
ASSERT_EQ
(
inference
::
Singleton
<
inference
::
lite
::
EngineManager
>::
Global
().
Has
(
unique_key
),
true
);
paddle
::
lite
::
Predictor
*
engine_0
=
inference
::
Singleton
<
inference
::
lite
::
EngineManager
>::
Global
().
Get
(
unique_key
);
CHECK_NOTNULL
(
engine_0
);
inference
::
Singleton
<
inference
::
lite
::
EngineManager
>::
Global
().
DeleteAll
();
CHECK
(
inference
::
Singleton
<
inference
::
lite
::
EngineManager
>::
Global
().
Get
(
unique_key
)
==
nullptr
)
<<
"the engine_0 should be nullptr"
;
}
}
// namespace lite
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/lite/test_tensor_utils.cc
0 → 100644
浏览文件 @
ad0dfb17
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/inference/lite/tensor_utils.h"
namespace
paddle
{
namespace
inference
{
namespace
lite
{
namespace
utils
{
using
paddle
::
lite_api
::
TargetType
;
using
paddle
::
lite_api
::
PrecisionType
;
using
paddle
::
lite_api
::
DataLayoutType
;
TEST
(
LiteEngineOp
,
GetNativePlace
)
{
::
testing
::
FLAGS_gtest_death_test_style
=
"threadsafe"
;
platform
::
Place
GetNativePlace
(
const
TargetType
&
type
,
int
id
=
0
);
EXPECT_TRUE
(
platform
::
is_cpu_place
(
GetNativePlace
(
TargetType
::
kHost
)));
EXPECT_TRUE
(
platform
::
is_gpu_place
(
GetNativePlace
(
TargetType
::
kCUDA
)));
ASSERT_DEATH
(
GetNativePlace
(
TargetType
::
kUnk
),
""
);
}
TEST
(
LiteEngineOp
,
GetLiteTargetType
)
{
TargetType
GetLiteTargetType
(
const
platform
::
Place
&
place
);
ASSERT_EQ
(
GetLiteTargetType
(
platform
::
CPUPlace
()),
TargetType
::
kHost
);
ASSERT_EQ
(
GetLiteTargetType
(
platform
::
CUDAPlace
(
0
)),
TargetType
::
kCUDA
);
}
TEST
(
LiteEngineOp
,
GetLitePrecisionType
)
{
::
testing
::
FLAGS_gtest_death_test_style
=
"threadsafe"
;
PrecisionType
GetLitePrecisionType
(
framework
::
proto
::
VarType
::
Type
type
);
ASSERT_EQ
(
GetLitePrecisionType
(
framework
::
proto
::
VarType_Type_FP32
),
PrecisionType
::
kFloat
);
ASSERT_EQ
(
GetLitePrecisionType
(
framework
::
proto
::
VarType_Type_INT8
),
PrecisionType
::
kInt8
);
ASSERT_EQ
(
GetLitePrecisionType
(
framework
::
proto
::
VarType_Type_INT32
),
PrecisionType
::
kInt32
);
ASSERT_DEATH
(
GetLitePrecisionType
(
framework
::
proto
::
VarType_Type_SELECTED_ROWS
),
""
);
}
TEST
(
LiteEngineOp
,
GetNativePrecisionType
)
{
::
testing
::
FLAGS_gtest_death_test_style
=
"threadsafe"
;
framework
::
proto
::
VarType
::
Type
GetNativePrecisionType
(
const
PrecisionType
&
type
);
ASSERT_EQ
(
GetNativePrecisionType
(
PrecisionType
::
kFloat
),
framework
::
proto
::
VarType_Type_FP32
);
ASSERT_EQ
(
GetNativePrecisionType
(
PrecisionType
::
kInt8
),
framework
::
proto
::
VarType_Type_INT8
);
ASSERT_EQ
(
GetNativePrecisionType
(
PrecisionType
::
kInt32
),
framework
::
proto
::
VarType_Type_INT32
);
ASSERT_DEATH
(
GetNativePrecisionType
(
PrecisionType
::
kUnk
),
""
);
}
TEST
(
LiteEngineOp
,
GetNativeLayoutType
)
{
::
testing
::
FLAGS_gtest_death_test_style
=
"threadsafe"
;
framework
::
DataLayout
GetNativeLayoutType
(
const
DataLayoutType
&
type
);
ASSERT_EQ
(
GetNativeLayoutType
(
DataLayoutType
::
kNCHW
),
framework
::
DataLayout
::
kNCHW
);
ASSERT_DEATH
(
GetNativeLayoutType
(
DataLayoutType
::
kNHWC
),
""
);
}
void
test_tensor_copy
(
const
platform
::
DeviceContext
&
ctx
)
{
// Create LoDTensor.
std
::
vector
<
float
>
vector
({
1
,
2
,
3
,
4
});
framework
::
LoDTensor
lod_tensor
;
framework
::
TensorFromVector
(
vector
,
&
lod_tensor
);
framework
::
LoD
lod
({{
0
,
2
,
4
}});
lod_tensor
.
Resize
({
4
,
1
});
lod_tensor
.
set_lod
(
lod
);
// Create lite::Tensor and copy.
paddle
::
lite
::
Tensor
lite_tensor
;
TensorCopyAsync
(
&
lite_tensor
,
lod_tensor
,
ctx
);
// Copy to LoDTensor.
framework
::
LoDTensor
lod_tensor_n
;
TensorCopyAsync
(
&
lod_tensor_n
,
lite_tensor
,
ctx
);
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
platform
::
GpuStreamSync
(
static_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
).
stream
());
}
#endif
std
::
vector
<
float
>
result
;
TensorToVector
(
lod_tensor_n
,
&
result
);
ASSERT_EQ
(
result
,
vector
);
ASSERT_EQ
(
lod_tensor_n
.
lod
(),
lod_tensor
.
lod
());
}
TEST
(
LiteEngineOp
,
TensorCopyAsync
)
{
auto
*
ctx_cpu
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
platform
::
CPUPlace
());
test_tensor_copy
(
*
ctx_cpu
);
#ifdef PADDLE_WITH_CUDA
auto
*
ctx_gpu
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
platform
::
CUDAPlace
(
0
));
test_tensor_copy
(
*
ctx_gpu
);
#endif
}
}
// namespace utils
}
// namespace lite
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
ad0dfb17
...
...
@@ -352,7 +352,6 @@ if(WITH_GPU AND TENSORRT_FOUND)
inference_analysis_test
(
test_analyzer_capi_gpu SRCS analyzer_capi_gpu_tester.cc
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
paddle_fluid_c
ARGS --infer_model=
${
TRT_MODEL_INSTALL_DIR
}
/trt_inference_test_models
)
set
(
TRT_MODEL_QUANT_RESNET_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/quant_small_model"
)
if
(
NOT EXISTS
${
TRT_MODEL_QUANT_RESNET_DIR
}
)
inference_download_and_uncompress
(
${
INFERENCE_DEMO_INSTALL_DIR
}
${
INFERENCE_URL
}
/tensorrt_test
"quant_small_model.tar.gz"
)
...
...
@@ -362,6 +361,12 @@ if(WITH_GPU AND TENSORRT_FOUND)
ARGS --infer_model=
${
TRT_MODEL_QUANT_RESNET_DIR
}
)
endif
()
set
(
LITE_MODEL_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/lite"
)
download_data
(
${
LITE_MODEL_INSTALL_DIR
}
"mul_model_fp32.tgz"
)
inference_analysis_test
(
lite_mul_model_test SRCS lite_mul_model_test.cc
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
ARGS --infer_model=
${
LITE_MODEL_INSTALL_DIR
}
)
inference_analysis_test
(
test_analyzer_capi SRCS analyzer_capi_tester.cc
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
paddle_fluid_c
ARGS --infer_model=
${
RESNET50_MODEL_DIR
}
/model
)
...
...
paddle/fluid/inference/tests/api/lite_mul_model_test.cc
0 → 100644
浏览文件 @
ad0dfb17
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <cmath>
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
inference
{
TEST
(
AnalysisPredictor
,
use_gpu
)
{
std
::
string
model_dir
=
FLAGS_infer_model
+
"/"
+
"mul_model"
;
AnalysisConfig
config
;
config
.
EnableUseGpu
(
100
,
0
);
config
.
SetModel
(
model_dir
);
config
.
EnableLiteEngine
(
paddle
::
AnalysisConfig
::
Precision
::
kFloat32
);
std
::
vector
<
PaddleTensor
>
inputs
;
auto
predictor
=
CreatePaddlePredictor
(
config
);
std
::
vector
<
float
>
input
({
1
});
PaddleTensor
in
;
in
.
shape
=
{
1
,
1
};
in
.
data
=
PaddleBuf
(
static_cast
<
void
*>
(
input
.
data
()),
1
*
sizeof
(
float
));
in
.
dtype
=
PaddleDType
::
FLOAT32
;
inputs
.
emplace_back
(
in
);
std
::
vector
<
PaddleTensor
>
outputs
;
ASSERT_TRUE
(
predictor
->
Run
(
inputs
,
&
outputs
));
const
std
::
vector
<
float
>
truth_values
=
{
-
0.00621776
,
-
0.00620937
,
0.00990623
,
-
0.0039817
,
-
0.00074315
,
0.61229795
,
-
0.00491806
,
-
0.00068755
,
0.18409646
,
0.30090684
};
const
size_t
expected_size
=
1
;
EXPECT_EQ
(
outputs
.
size
(),
expected_size
);
float
*
data_o
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
());
for
(
size_t
j
=
0
;
j
<
outputs
[
0
].
data
.
length
()
/
sizeof
(
float
);
++
j
)
{
EXPECT_LT
(
std
::
abs
(
data_o
[
j
]
-
truth_values
[
j
]),
10e-6
);
}
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
ad0dfb17
...
...
@@ -38,7 +38,12 @@ if (ANAKIN_SUBGRAPH)
add_subdirectory
(
anakin
)
endif
()
if
(
WITH_LITE
)
add_subdirectory
(
lite
)
endif
()
SET
(
OP_HEADER_DEPS xxhash executor
)
if
(
WITH_GPU
)
SET
(
OP_HEADER_DEPS
${
OP_HEADER_DEPS
}
cub
)
endif
()
...
...
paddle/fluid/operators/lite/CMakeLists.txt
0 → 100644
浏览文件 @
ad0dfb17
op_library
(
lite_engine_op DEPS lite_engine lite_tensor_utils
)
cc_test
(
test_lite_engine_op SRCS lite_engine_op_test.cc DEPS lite_engine_op analysis
)
paddle/fluid/operators/lite/lite_engine_op.cc
0 → 100644
浏览文件 @
ad0dfb17
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/lite/lite_engine_op.h"
#include <string>
#include <vector>
namespace
paddle
{
namespace
operators
{
class
LiteEngineOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"Xs"
,
"A list of inputs."
).
AsDuplicable
();
AddOutput
(
"Ys"
,
"A list of outputs."
).
AsDuplicable
();
AddAttr
<
std
::
string
>
(
"engine_key"
,
"The engine_key here is used to distinguish different Lite Engines"
);
AddComment
(
"Lite engine operator."
);
}
};
class
LiteInferVarType
:
public
framework
::
VarTypeInference
{
public:
void
operator
()(
framework
::
InferVarTypeContext
*
ctx
)
const
override
{}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
lite_engine
,
ops
::
LiteEngineOp
,
ops
::
LiteEngineOpMaker
);
paddle/fluid/operators/lite/lite_engine_op.h
0 → 100644
浏览文件 @
ad0dfb17
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <fstream>
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/inference/lite/engine.h"
#include "paddle/fluid/inference/lite/tensor_utils.h"
#include "paddle/fluid/inference/utils/singleton.h"
namespace
paddle
{
namespace
operators
{
class
LiteEngineOp
:
public
framework
::
OperatorBase
{
private:
std
::
vector
<
std
::
string
>
in_names_
;
std
::
vector
<
std
::
string
>
out_names_
;
paddle
::
lite
::
Predictor
*
engine_
;
framework
::
proto
::
VarType
::
Type
precision_
;
bool
use_gpu_
;
public:
LiteEngineOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
framework
::
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{
in_names_
=
Inputs
(
"Xs"
);
out_names_
=
Outputs
(
"Ys"
);
engine_
=
inference
::
Singleton
<
inference
::
lite
::
EngineManager
>::
Global
().
Get
(
Attr
<
std
::
string
>
(
"engine_key"
));
if
(
Attr
<
bool
>
(
"enable_int8"
))
{
precision_
=
framework
::
proto
::
VarType_Type_INT8
;
}
else
{
precision_
=
framework
::
proto
::
VarType_Type_FP32
;
}
use_gpu_
=
Attr
<
bool
>
(
"use_gpu"
);
}
protected:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
override
{
Execute
(
scope
,
dev_place
);
}
void
Execute
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
{
const
platform
::
DeviceContext
*
ctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
dev_place
);
for
(
size_t
i
=
0
;
i
<
in_names_
.
size
();
i
++
)
{
const
framework
::
LoDTensor
&
src_t
=
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
scope
,
in_names_
[
i
]);
paddle
::
lite
::
Tensor
*
dst_t
=
engine_
->
GetInput
(
i
);
VLOG
(
3
)
<<
"fluid -> lite: "
<<
in_names_
[
i
];
inference
::
lite
::
utils
::
TensorCopyAsync
(
dst_t
,
src_t
,
*
ctx
);
}
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
is_gpu_place
(
dev_place
))
{
platform
::
GpuStreamSync
(
static_cast
<
const
platform
::
CUDADeviceContext
*>
(
ctx
)
->
stream
());
}
#endif
VLOG
(
3
)
<<
"lite engine run"
;
engine_
->
Run
();
VLOG
(
3
)
<<
"lite engine run done"
;
for
(
size_t
i
=
0
;
i
<
out_names_
.
size
();
i
++
)
{
const
paddle
::
lite
::
Tensor
&
src_t
=
*
(
engine_
->
GetOutput
(
i
));
framework
::
LoDTensor
*
dst_t
=
&
inference
::
analysis
::
GetFromScope
<
framework
::
LoDTensor
>
(
scope
,
out_names_
[
i
]);
VLOG
(
3
)
<<
"lite -> fluid: "
<<
out_names_
[
i
];
inference
::
lite
::
utils
::
TensorCopyAsync
(
dst_t
,
src_t
,
*
ctx
);
}
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
is_gpu_place
(
dev_place
))
{
platform
::
GpuStreamSync
(
static_cast
<
const
platform
::
CUDADeviceContext
*>
(
ctx
)
->
stream
());
}
#endif
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/lite/lite_engine_op_test.cc
0 → 100644
浏览文件 @
ad0dfb17
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/operators/lite/lite_engine_op.h"
#include "paddle/fluid/operators/lite/ut_helper.h"
USE_NO_KERNEL_OP
(
lite_engine
)
using
paddle
::
inference
::
lite
::
AddTensorToBlockDesc
;
using
paddle
::
inference
::
lite
::
CreateTensor
;
using
paddle
::
inference
::
lite
::
serialize_params
;
namespace
paddle
{
namespace
operators
{
TEST
(
LiteEngineOp
,
engine_op
)
{
framework
::
ProgramDesc
program
;
auto
*
block_
=
program
.
Proto
()
->
mutable_blocks
(
0
);
framework
::
BlockDesc
block_desc
(
&
program
,
block_
);
auto
*
feed0
=
block_desc
.
AppendOp
();
feed0
->
SetType
(
"feed"
);
feed0
->
SetInput
(
"X"
,
{
"feed"
});
feed0
->
SetOutput
(
"Out"
,
{
"x"
});
feed0
->
SetAttr
(
"col"
,
0
);
auto
*
feed1
=
block_desc
.
AppendOp
();
feed1
->
SetType
(
"feed"
);
feed1
->
SetInput
(
"X"
,
{
"feed"
});
feed1
->
SetOutput
(
"Out"
,
{
"y"
});
feed1
->
SetAttr
(
"col"
,
1
);
LOG
(
INFO
)
<<
"create elementwise_add op"
;
auto
*
elt_add
=
block_desc
.
AppendOp
();
elt_add
->
SetType
(
"elementwise_add"
);
elt_add
->
SetInput
(
"X"
,
std
::
vector
<
std
::
string
>
({
"x"
}));
elt_add
->
SetInput
(
"Y"
,
std
::
vector
<
std
::
string
>
({
"y"
}));
elt_add
->
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
"z"
}));
elt_add
->
SetAttr
(
"axis"
,
-
1
);
LOG
(
INFO
)
<<
"create fetch op"
;
auto
*
fetch
=
block_desc
.
AppendOp
();
fetch
->
SetType
(
"fetch"
);
fetch
->
SetInput
(
"X"
,
std
::
vector
<
std
::
string
>
({
"z"
}));
fetch
->
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
"out"
}));
fetch
->
SetAttr
(
"col"
,
0
);
// Set inputs' variable shape in BlockDesc
AddTensorToBlockDesc
(
block_
,
"x"
,
std
::
vector
<
int64_t
>
({
2
,
4
}),
true
);
AddTensorToBlockDesc
(
block_
,
"y"
,
std
::
vector
<
int64_t
>
({
2
,
4
}),
true
);
AddTensorToBlockDesc
(
block_
,
"z"
,
std
::
vector
<
int64_t
>
({
2
,
4
}),
false
);
AddTensorToBlockDesc
(
block_
,
"out"
,
std
::
vector
<
int64_t
>
({
2
,
4
}),
false
);
*
block_
->
add_ops
()
=
*
feed1
->
Proto
();
*
block_
->
add_ops
()
=
*
feed0
->
Proto
();
*
block_
->
add_ops
()
=
*
elt_add
->
Proto
();
*
block_
->
add_ops
()
=
*
fetch
->
Proto
();
framework
::
Scope
scope
;
#ifdef PADDLE_WITH_CUDA
platform
::
CUDAPlace
place
;
platform
::
CUDADeviceContext
ctx
(
place
);
#else
platform
::
CPUPlace
place
;
platform
::
CPUDeviceContext
ctx
(
place
);
#endif
// Prepare variables.
CreateTensor
(
&
scope
,
"x"
,
std
::
vector
<
int64_t
>
({
2
,
4
}),
false
);
CreateTensor
(
&
scope
,
"y"
,
std
::
vector
<
int64_t
>
({
2
,
4
}),
false
);
CreateTensor
(
&
scope
,
"out"
,
std
::
vector
<
int64_t
>
({
2
,
4
}),
false
);
ASSERT_EQ
(
block_
->
ops_size
(),
4
);
std
::
vector
<
std
::
string
>
repetitive_params
{
"x"
,
"y"
};
inference
::
lite
::
EngineConfig
config
;
config
.
valid_places
=
{
#ifdef PADDLE_WITH_CUDA
paddle
::
lite
::
Place
({
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)}),
#endif
paddle
::
lite
::
Place
({
TARGET
(
kHost
),
PRECISION
(
kAny
)}),
paddle
::
lite
::
Place
({
TARGET
(
kX86
),
PRECISION
(
kFloat
)}),
};
serialize_params
(
&
(
config
.
param
),
&
scope
,
repetitive_params
);
config
.
model
=
program
.
Proto
()
->
SerializeAsString
();
LOG
(
INFO
)
<<
"create lite_engine desc"
;
framework
::
OpDesc
engine_op_desc
(
nullptr
);
engine_op_desc
.
SetType
(
"lite_engine"
);
engine_op_desc
.
SetInput
(
"Xs"
,
std
::
vector
<
std
::
string
>
({
"x"
,
"y"
}));
engine_op_desc
.
SetOutput
(
"Ys"
,
std
::
vector
<
std
::
string
>
({
"out"
}));
std
::
string
engine_key
=
"engine_0"
;
engine_op_desc
.
SetAttr
(
"engine_key"
,
engine_key
);
engine_op_desc
.
SetAttr
(
"enable_int8"
,
false
);
engine_op_desc
.
SetAttr
(
"use_gpu"
,
true
);
engine_op_desc
.
SetBlockAttr
(
"sub_block"
,
&
block_desc
);
inference
::
Singleton
<
inference
::
lite
::
EngineManager
>::
Global
().
Create
(
engine_key
,
config
);
LOG
(
INFO
)
<<
"create engine op"
;
auto
engine_op
=
framework
::
OpRegistry
::
CreateOp
(
engine_op_desc
);
LOG
(
INFO
)
<<
"engine_op "
<<
engine_op
.
get
();
// Execute them.
LOG
(
INFO
)
<<
"engine_op run"
;
engine_op
->
Run
(
scope
,
place
);
LOG
(
INFO
)
<<
"done"
;
}
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/lite/ut_helper.h
0 → 100644
浏览文件 @
ad0dfb17
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/inference/analysis/helper.h"
namespace
paddle
{
namespace
inference
{
namespace
lite
{
void
AddTensorToBlockDesc
(
framework
::
proto
::
BlockDesc
*
block
,
const
std
::
string
&
name
,
const
std
::
vector
<
int64_t
>&
shape
,
bool
persistable
=
false
)
{
using
framework
::
proto
::
VarType
;
auto
*
var
=
block
->
add_vars
();
framework
::
VarDesc
desc
(
name
);
desc
.
SetType
(
VarType
::
LOD_TENSOR
);
desc
.
SetDataType
(
VarType
::
FP32
);
desc
.
SetShape
(
shape
);
desc
.
SetPersistable
(
persistable
);
*
var
=
*
desc
.
Proto
();
}
void
serialize_params
(
std
::
string
*
str
,
framework
::
Scope
*
scope
,
const
std
::
vector
<
std
::
string
>&
params
)
{
std
::
ostringstream
os
;
#ifdef PADDLE_WITH_CUDA
platform
::
CUDAPlace
place
;
platform
::
CUDADeviceContext
ctx
(
place
);
#else
platform
::
CPUDeviceContext
ctx
;
#endif
for
(
const
auto
&
param
:
params
)
{
PADDLE_ENFORCE_NOT_NULL
(
scope
->
FindVar
(
param
),
platform
::
errors
::
NotFound
(
"Block should already have a '%s' variable"
,
param
));
auto
*
tensor
=
scope
->
FindVar
(
param
)
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
SerializeToStream
(
os
,
*
tensor
,
ctx
);
}
*
str
=
os
.
str
();
}
/*
* Get a random float value between [low, high]
*/
float
random
(
float
low
,
float
high
)
{
// static std::random_device rd;
static
std
::
mt19937
mt
(
100
);
std
::
uniform_real_distribution
<
double
>
dist
(
low
,
high
);
return
dist
(
mt
);
}
void
RandomizeTensor
(
framework
::
LoDTensor
*
tensor
,
const
platform
::
Place
&
place
)
{
auto
dims
=
tensor
->
dims
();
size_t
num_elements
=
analysis
::
AccuDims
(
dims
,
dims
.
size
());
PADDLE_ENFORCE_GT
(
num_elements
,
0
,
platform
::
errors
::
InvalidArgument
(
"The input tensor dimension of the randomized tensor "
"function should be greater than zero."
));
platform
::
CPUPlace
cpu_place
;
framework
::
LoDTensor
temp_tensor
;
temp_tensor
.
Resize
(
dims
);
auto
*
temp_data
=
temp_tensor
.
mutable_data
<
float
>
(
cpu_place
);
for
(
size_t
i
=
0
;
i
<
num_elements
;
i
++
)
{
*
(
temp_data
+
i
)
=
random
(
0.
,
1.
);
}
TensorCopySync
(
temp_tensor
,
place
,
tensor
);
}
void
CreateTensor
(
framework
::
Scope
*
scope
,
const
std
::
string
&
name
,
const
std
::
vector
<
int64_t
>&
shape
,
bool
in_cuda
=
true
)
{
auto
*
var
=
scope
->
Var
(
name
);
auto
*
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
dims
=
framework
::
make_ddim
(
shape
);
tensor
->
Resize
(
dims
);
platform
::
Place
place
;
if
(
in_cuda
)
{
#ifdef PADDLE_WITH_CUDA
place
=
platform
::
CUDAPlace
(
0
);
#else
LOG
(
FATAL
)
<<
"You must define PADDLE_WITH_CUDA for using CUDAPlace."
;
#endif
}
else
{
place
=
platform
::
CPUPlace
();
}
RandomizeTensor
(
tensor
,
place
);
}
}
// namespace lite
}
// namespace inference
}
// namespace paddle
paddle/fluid/platform/gpu_info.cc
浏览文件 @
ad0dfb17
...
...
@@ -349,6 +349,16 @@ void GpuMemsetAsync(void *dst, int value, size_t count, cudaStream_t stream) {
error_code
,
CudaErrorWebsite
());
}
void
GpuStreamSync
(
cudaStream_t
stream
)
{
auto
error_code
=
cudaStreamSynchronize
(
stream
);
PADDLE_ENFORCE_CUDA_SUCCESS
(
error_code
,
platform
::
errors
::
External
(
"cudaStreamSynchronize failed in paddle::platform::GpuStreamSync "
"error code : %d, %s"
,
error_code
,
CudaErrorWebsite
()));
}
void
RaiseNonOutOfMemoryError
(
cudaError_t
*
status
)
{
if
(
*
status
==
cudaErrorMemoryAllocation
)
{
*
status
=
cudaSuccess
;
...
...
@@ -363,5 +373,6 @@ void RaiseNonOutOfMemoryError(cudaError_t *status) {
PADDLE_ENFORCE_CUDA_SUCCESS
(
*
status
);
}
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/gpu_info.h
浏览文件 @
ad0dfb17
...
...
@@ -101,6 +101,9 @@ void GpuMemcpyPeerSync(void *dst, int dst_device, const void *src,
//! Set memory dst with value count size asynchronously
void
GpuMemsetAsync
(
void
*
dst
,
int
value
,
size_t
count
,
cudaStream_t
stream
);
//! Blocks until stream has completed all operations.
void
GpuStreamSync
(
cudaStream_t
stream
);
//! Raise error if status is not cudaSuccess or OOM, otherwise reset status.
void
RaiseNonOutOfMemoryError
(
cudaError_t
*
status
);
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
ad0dfb17
...
...
@@ -203,6 +203,7 @@ function cmake_base() {
-DPY_VERSION=
${
PY_VERSION
:-
2
.7
}
-DCMAKE_INSTALL_PREFIX=
${
INSTALL_PREFIX
:-
/paddle/build
}
-DWITH_GRPC=
${
grpc_flag
}
-DWITH_LITE=
${
WITH_LITE
:-
OFF
}
========================================
EOF
# Disable UNITTEST_USE_VIRTUALENV in docker because
...
...
@@ -234,7 +235,8 @@ EOF
-DINFERENCE_DEMO_INSTALL_DIR
=
${
INFERENCE_DEMO_INSTALL_DIR
}
\
-DPY_VERSION
=
${
PY_VERSION
:-
2
.7
}
\
-DCMAKE_INSTALL_PREFIX
=
${
INSTALL_PREFIX
:-
/paddle/build
}
\
-DWITH_GRPC
=
${
grpc_flag
}
-DWITH_GRPC
=
${
grpc_flag
}
\
-DWITH_LITE
=
${
WITH_LITE
:-
OFF
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录