Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
5f72a91b
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
337
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5f72a91b
编写于
10月 27, 2019
作者:
H
huzhiqiang
提交者:
GitHub
10月 27, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Tailoring cherrypick (#2263)
* cherry-pick model-tailoring into release/v2.1.0 test=develop
上级
71684f98
变更
23
隐藏空白更改
内联
并排
Showing
23 changed file
with
450 addition
and
120 deletion
+450
-120
CMakeLists.txt
CMakeLists.txt
+1
-0
cmake/lite.cmake
cmake/lite.cmake
+26
-1
lite/api/CMakeLists.txt
lite/api/CMakeLists.txt
+6
-1
lite/api/cxx_api.cc
lite/api/cxx_api.cc
+91
-5
lite/api/cxx_api.h
lite/api/cxx_api.h
+7
-4
lite/api/cxx_api_impl.cc
lite/api/cxx_api_impl.cc
+3
-2
lite/api/model_optimize_tool.cc
lite/api/model_optimize_tool.cc
+15
-1
lite/api/paddle_api.cc
lite/api/paddle_api.cc
+2
-1
lite/api/paddle_api.h
lite/api/paddle_api.h
+2
-1
lite/api/paddle_api_test.cc
lite/api/paddle_api_test.cc
+2
-2
lite/core/CMakeLists.txt
lite/core/CMakeLists.txt
+5
-0
lite/core/mir/fusion/conv_bn_fuse_pass.cc
lite/core/mir/fusion/conv_bn_fuse_pass.cc
+14
-6
lite/core/mir/fusion/conv_bn_fuser.cc
lite/core/mir/fusion/conv_bn_fuser.cc
+84
-43
lite/core/mir/fusion/conv_bn_fuser.h
lite/core/mir/fusion/conv_bn_fuser.h
+3
-2
lite/core/mir/fusion/conv_elementwise_fuse_pass.cc
lite/core/mir/fusion/conv_elementwise_fuse_pass.cc
+14
-7
lite/core/mir/fusion/conv_elementwise_fuser.cc
lite/core/mir/fusion/conv_elementwise_fuser.cc
+46
-35
lite/core/mir/fusion/conv_elementwise_fuser.h
lite/core/mir/fusion/conv_elementwise_fuser.h
+5
-3
lite/core/op_registry.h
lite/core/op_registry.h
+41
-1
lite/fluid/lod.h
lite/fluid/lod.h
+1
-1
lite/tools/build.sh
lite/tools/build.sh
+14
-0
lite/tools/cmake_tools/create_fake_kernel_registry.py
lite/tools/cmake_tools/create_fake_kernel_registry.py
+43
-1
lite/tools/cmake_tools/parse_kernel_registry.py
lite/tools/cmake_tools/parse_kernel_registry.py
+16
-2
lite/tools/cmake_tools/parse_op_registry.py
lite/tools/cmake_tools/parse_op_registry.py
+9
-1
未找到文件。
CMakeLists.txt
浏览文件 @
5f72a91b
...
...
@@ -70,6 +70,7 @@ lite_option(LITE_ON_TINY_PUBLISH "Publish tiny predictor lib." OFF)
lite_option
(
LITE_ON_MODEL_OPTIMIZE_TOOL
"Build the model optimize tool"
OFF
)
# publish options
lite_option
(
LITE_BUILD_EXTRA
"Enable extra algorithm support in Lite, both kernels and operators"
OFF
)
lite_option
(
LITE_BUILD_TAILOR
"Enable tailoring library according to model"
OFF
)
# TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter.
if
(
ANDROID OR IOS OR ARMLINUX
)
...
...
cmake/lite.cmake
浏览文件 @
5f72a91b
...
...
@@ -241,6 +241,10 @@ set(host_kernels CACHE INTERNAL "host kernels")
set
(
kernels_src_list
"
${
CMAKE_BINARY_DIR
}
/kernels_src_list.txt"
)
file
(
WRITE
${
kernels_src_list
}
""
)
# clean
if
(
LITE_BUILD_TAILOR
)
set
(
tailored_kernels_list_path
"
${
LITE_OPTMODEL_DIR
}
/.tailored_kernels_source_list"
)
file
(
STRINGS
${
tailored_kernels_list_path
}
tailored_kernels_list
)
endif
()
# add a kernel for some specific device
# device: one of (Host, ARM, X86, NPU, FPGA, OPENCL, CUDA)
# level: one of (basic, extra)
...
...
@@ -252,6 +256,15 @@ function(add_kernel TARGET device level)
ARGS
)
cmake_parse_arguments
(
args
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
if
(
LITE_BUILD_TAILOR
)
foreach
(
src
${
args_SRCS
}
)
list
(
FIND tailored_kernels_list
${
src
}
_index
)
if
(
${
_index
}
EQUAL -1
)
return
()
endif
()
endforeach
()
endif
()
if
(
"
${
level
}
"
STREQUAL
"extra"
AND
(
NOT LITE_BUILD_EXTRA
))
return
()
endif
()
...
...
@@ -338,6 +351,10 @@ endfunction()
set
(
ops CACHE INTERNAL
"ops"
)
set
(
ops_src_list
"
${
CMAKE_BINARY_DIR
}
/ops_src_list.txt"
)
file
(
WRITE
${
ops_src_list
}
""
)
# clean
if
(
LITE_BUILD_TAILOR
)
set
(
tailored_ops_list_path
"
${
LITE_OPTMODEL_DIR
}
/.tailored_ops_source_list"
)
file
(
STRINGS
${
tailored_ops_list_path
}
tailored_ops_list
)
endif
()
# add an operator
# level: one of (basic, extra)
function
(
add_operator TARGET level
)
...
...
@@ -348,16 +365,24 @@ function(add_operator TARGET level)
ARGS
)
cmake_parse_arguments
(
args
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
if
(
"
${
level
}
"
STREQUAL
"extra"
AND
(
NOT LITE_BUILD_EXTRA
))
return
()
endif
()
set
(
ops
"
${
ops
}
;
${
TARGET
}
"
CACHE INTERNAL
"source"
)
foreach
(
src
${
args_SRCS
}
)
if
(
LITE_BUILD_TAILOR
)
list
(
FIND tailored_ops_list
${
src
}
_index
)
if
(
${
_index
}
EQUAL -1
)
return
()
endif
()
endif
()
file
(
APPEND
${
ops_src_list
}
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
src
}
\n
"
)
endforeach
()
set
(
ops
"
${
ops
}
;
${
TARGET
}
"
CACHE INTERNAL
"source"
)
lite_cc_library
(
${
TARGET
}
SRCS
${
args_SRCS
}
DEPS
${
args_DEPS
}
X86_DEPS
${
args_X86_DEPS
}
...
...
lite/api/CMakeLists.txt
浏览文件 @
5f72a91b
...
...
@@ -9,12 +9,17 @@ if (LITE_ON_TINY_PUBLISH)
set
(
CMAKE_C_FLAGS_RELEASE
"-Os -DNDEBUG"
)
endif
()
set
(
light_lib_DEPS light_api paddle_api paddle_api_light optimizer
)
if
((
NOT LITE_ON_TINY_PUBLISH
)
AND
(
ARM_TARGET_OS STREQUAL
"android"
))
if
((
NOT LITE_ON_TINY_PUBLISH
)
AND
(
LITE_WITH_X86 OR
ARM_TARGET_OS STREQUAL
"android"
))
#full api dynamic library
add_library
(
paddle_full_api_shared SHARED
""
)
target_sources
(
paddle_full_api_shared PUBLIC
${
__lite_cc_files
}
paddle_api.cc light_api.cc cxx_api.cc cxx_api_impl.cc light_api_impl.cc
)
add_dependencies
(
paddle_full_api_shared op_list_h kernel_list_h framework_proto
)
target_link_libraries
(
paddle_full_api_shared framework_proto
)
add_dependencies
(
lite_compile_deps paddle_full_api_shared
)
if
(
LITE_WITH_X86
)
add_dependencies
(
paddle_full_api_shared xxhash
)
target_link_libraries
(
paddle_full_api_shared xxhash
)
endif
()
#light api dynamic library
lite_cc_library
(
paddle_light_api_shared MODULE
...
...
lite/api/cxx_api.cc
浏览文件 @
5f72a91b
...
...
@@ -15,6 +15,7 @@
#include "lite/api/cxx_api.h"
#include <algorithm>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>
...
...
@@ -23,8 +24,16 @@
namespace
paddle
{
namespace
lite
{
static
const
char
TAILORD_OPS_SOURCE_LIST_FILENAME
[]
=
".tailored_ops_source_list"
;
static
const
char
TAILORD_OPS_LIST_NAME
[]
=
".tailored_ops_list"
;
static
const
char
TAILORD_KERNELS_SOURCE_LIST_FILENAME
[]
=
".tailored_kernels_source_list"
;
static
const
char
TAILORD_KERNELS_LIST_NAME
[]
=
".tailored_kernels_list"
;
void
Predictor
::
SaveModel
(
const
std
::
string
&
dir
,
lite_api
::
LiteModelType
model_type
)
{
lite_api
::
LiteModelType
model_type
,
bool
record_info
)
{
if
(
!
program_
)
{
GenRuntimeProgram
();
}
...
...
@@ -40,6 +49,83 @@ void Predictor::SaveModel(const std::string &dir,
default:
LOG
(
FATAL
)
<<
"Unknown model type"
;
}
if
(
record_info
)
{
SaveOpKernelInfo
(
dir
);
}
}
void
Predictor
::
SaveOpKernelInfo
(
const
std
::
string
&
model_dir
)
{
std
::
set
<
std
::
string
>
ops_info
;
std
::
set
<
std
::
string
>
kernels_info
;
const
auto
&
instructions_
=
program_
->
instructions
();
for
(
auto
&
node
:
instructions_
)
{
// parse op type infomation
auto
op
=
node
.
op
()
->
op_info
();
ops_info
.
insert
(
op
->
Type
());
// parse kernel type information
std
::
string
kernel_type_str
=
node
.
kernel
()
->
op_type
()
+
","
+
TargetRepr
(
node
.
kernel
()
->
target
())
+
","
+
PrecisionRepr
(
node
.
kernel
()
->
precision
())
+
","
+
DataLayoutRepr
(
node
.
kernel
()
->
layout
())
+
","
+
node
.
kernel
()
->
alias
();
kernels_info
.
insert
(
kernel_type_str
);
}
// get souce_file name from op type and kernel type
auto
op2pathmap
=
OpKernelInfoCollector
::
Global
().
GetOp2PathDict
();
auto
kernel2pathmap
=
OpKernelInfoCollector
::
Global
().
GetKernel2PathDict
();
// write used op and kernel info into files
std
::
string
opf_path
=
model_dir
+
"/"
+
TAILORD_OPS_LIST_NAME
;
std
::
string
opf_source_path
=
model_dir
+
"/"
+
TAILORD_OPS_SOURCE_LIST_FILENAME
;
std
::
string
kpf_path
=
model_dir
+
"/"
+
TAILORD_KERNELS_LIST_NAME
;
std
::
string
kpf_source_path
=
model_dir
+
"/"
+
TAILORD_KERNELS_SOURCE_LIST_FILENAME
;
std
::
map
<
std
::
string
,
std
::
string
>
op2path
;
std
::
FILE
*
opf
=
std
::
fopen
(
opf_path
.
c_str
(),
"w"
);
std
::
FILE
*
opf_source
=
std
::
fopen
(
opf_source_path
.
c_str
(),
"w"
);
std
::
FILE
*
kpf
=
std
::
fopen
(
kpf_path
.
c_str
(),
"w"
);
std
::
FILE
*
kpf_source
=
std
::
fopen
(
kpf_source_path
.
c_str
(),
"w"
);
std
::
vector
<
std
::
string
>
opcompile
;
std
::
vector
<
std
::
string
>
kernelcompile
;
if
(
nullptr
==
opf
||
nullptr
==
opf_source
||
nullptr
==
opf
||
nullptr
==
kpf_source
)
{
LOG
(
FATAL
)
<<
"failed to create info file into: "
<<
model_dir
;
}
for
(
auto
op_info
=
ops_info
.
begin
();
op_info
!=
ops_info
.
end
();
op_info
++
)
{
fputs
(
op_info
->
c_str
(),
opf
);
fputc
(
'\n'
,
opf
);
std
::
string
op_path
=
op2pathmap
[
*
op_info
];
fputs
(
op_path
.
c_str
(),
opf_source
);
fputc
(
'\n'
,
opf_source
);
}
std
::
fclose
(
opf_source
);
std
::
fclose
(
opf
);
LOG
(
INFO
)
<<
"operators information of tailored model is stored into: "
<<
opf_path
;
// write Kernel_type and Kernel_path into file
for
(
auto
kernel_info
=
kernels_info
.
begin
();
kernel_info
!=
kernels_info
.
end
();
kernel_info
++
)
{
fputs
(
kernel_info
->
c_str
(),
kpf
);
fputc
(
'\n'
,
kpf
);
std
::
string
kernel_path
=
kernel2pathmap
[
*
kernel_info
];
fputs
(
kernel_path
.
c_str
(),
kpf_source
);
fputc
(
'\n'
,
kpf_source
);
if
(
kernel_path
==
"conv_compute.cc"
)
{
fputs
(
"conv_depthwise.cc
\n
conv_direct.cc
\n
conv_gemmlike.cc
\n
conv_"
"winograd.cc
\n
"
,
kpf_source
);
}
}
std
::
fclose
(
kpf_source
);
std
::
fclose
(
kpf
);
LOG
(
INFO
)
<<
"kernels information of tailored model is stored into: "
<<
kpf_path
;
}
lite
::
Tensor
*
Predictor
::
GetInput
(
size_t
offset
)
{
...
...
@@ -61,7 +147,7 @@ void Predictor::PrepareFeedFetch() {
auto
current_block
=
program_desc_
.
GetBlock
<
cpp
::
BlockDesc
>
(
0
);
std
::
vector
<
cpp
::
OpDesc
*>
feeds
;
std
::
vector
<
cpp
::
OpDesc
*>
fetchs
;
for
(
in
t
i
=
0
;
i
<
current_block
->
OpsSize
();
i
++
)
{
for
(
size_
t
i
=
0
;
i
<
current_block
->
OpsSize
();
i
++
)
{
auto
op
=
current_block
->
GetOp
<
cpp
::
OpDesc
>
(
i
);
if
(
op
->
Type
()
==
"feed"
)
{
feeds
.
push_back
(
op
);
...
...
@@ -71,11 +157,11 @@ void Predictor::PrepareFeedFetch() {
}
input_names_
.
resize
(
feeds
.
size
());
output_names_
.
resize
(
fetchs
.
size
());
for
(
in
t
i
=
0
;
i
<
feeds
.
size
();
i
++
)
{
for
(
size_
t
i
=
0
;
i
<
feeds
.
size
();
i
++
)
{
input_names_
[
feeds
[
i
]
->
GetAttr
<
int
>
(
"col"
)]
=
feeds
[
i
]
->
Output
(
"Out"
).
front
();
}
for
(
in
t
i
=
0
;
i
<
fetchs
.
size
();
i
++
)
{
for
(
size_
t
i
=
0
;
i
<
fetchs
.
size
();
i
++
)
{
output_names_
[
fetchs
[
i
]
->
GetAttr
<
int
>
(
"col"
)]
=
fetchs
[
i
]
->
Input
(
"X"
).
front
();
}
...
...
@@ -191,7 +277,7 @@ lite::Tensor *Predictor::GetInputByName(const std::string &name) {
if
(
element
==
input_names_
.
end
())
{
LOG
(
ERROR
)
<<
"Model do not have input named with: ["
<<
name
<<
"], model's inputs include:"
;
for
(
in
t
i
=
0
;
i
<
input_names_
.
size
();
i
++
)
{
for
(
size_
t
i
=
0
;
i
<
input_names_
.
size
();
i
++
)
{
LOG
(
ERROR
)
<<
"["
<<
input_names_
[
i
]
<<
"]"
;
}
return
nullptr
;
...
...
lite/api/cxx_api.h
浏览文件 @
5f72a91b
...
...
@@ -89,7 +89,9 @@ class LITE_API Predictor {
// This method is disabled in mobile, for unnecessary dependencies required.
void
SaveModel
(
const
std
::
string
&
dir
,
lite_api
::
LiteModelType
model_type
=
lite_api
::
LiteModelType
::
kProtobuf
);
lite_api
::
LiteModelType
model_type
=
lite_api
::
LiteModelType
::
kProtobuf
,
bool
record_info
=
false
);
void
SaveOpKernelInfo
(
const
std
::
string
&
model_dir
);
#ifdef LITE_WITH_TRAIN
void
Run
(
const
std
::
vector
<
framework
::
Tensor
>&
tensors
)
{
...
...
@@ -137,9 +139,10 @@ class CxxPaddleApiImpl : public lite_api::PaddlePredictor {
std
::
unique_ptr
<
lite_api
::
Tensor
>
GetInputByName
(
const
std
::
string
&
name
)
override
;
void
SaveOptimizedModel
(
const
std
::
string
&
model_dir
,
lite_api
::
LiteModelType
model_type
=
lite_api
::
LiteModelType
::
kProtobuf
)
override
;
void
SaveOptimizedModel
(
const
std
::
string
&
model_dir
,
lite_api
::
LiteModelType
model_type
=
lite_api
::
LiteModelType
::
kProtobuf
,
bool
record_info
=
false
)
override
;
private:
Predictor
raw_predictor_
;
...
...
lite/api/cxx_api_impl.cc
浏览文件 @
5f72a91b
...
...
@@ -65,8 +65,9 @@ std::unique_ptr<lite_api::Tensor> CxxPaddleApiImpl::GetInputByName(
}
void
CxxPaddleApiImpl
::
SaveOptimizedModel
(
const
std
::
string
&
model_dir
,
lite_api
::
LiteModelType
model_type
)
{
raw_predictor_
.
SaveModel
(
model_dir
,
model_type
);
lite_api
::
LiteModelType
model_type
,
bool
record_info
)
{
raw_predictor_
.
SaveModel
(
model_dir
,
model_type
,
record_info
);
}
}
// namespace lite
...
...
lite/api/model_optimize_tool.cc
浏览文件 @
5f72a91b
...
...
@@ -16,7 +16,10 @@
#ifdef PADDLE_WITH_TESTING
#include <gtest/gtest.h>
#endif
// "all_kernel_faked.cc" and "kernel_src_map.h" are created automatically during
// model_optimize_tool's compiling period
#include "all_kernel_faked.cc" // NOLINT
#include "kernel_src_map.h" // NOLINT
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
...
...
@@ -35,6 +38,11 @@ DEFINE_string(
"protobuf"
,
"store type of the output optimized model. protobuf/naive_buffer"
);
DEFINE_bool
(
display_kernels
,
false
,
"Display kernel information"
);
DEFINE_bool
(
record_tailoring_info
,
false
,
"Record kernels and operators information of the optimized model "
"for tailoring compiling, information are stored into optimized "
"model path as hidden files"
);
DEFINE_string
(
optimize_out
,
""
,
"path of the output optimized model"
);
DEFINE_string
(
valid_targets
,
"arm"
,
...
...
@@ -104,8 +112,14 @@ void Main() {
}
else
{
LOG
(
FATAL
)
<<
"Unsupported Model type :"
<<
FLAGS_optimize_out_type
;
}
OpKernelInfoCollector
::
Global
().
SetKernel2path
(
kernel2path_map
);
predictor
->
SaveOptimizedModel
(
FLAGS_optimize_out
,
model_type
);
predictor
->
SaveOptimizedModel
(
FLAGS_optimize_out
,
model_type
,
FLAGS_record_tailoring_info
);
if
(
FLAGS_record_tailoring_info
)
{
LOG
(
INFO
)
<<
"Record the information of tailored model into :"
<<
FLAGS_optimize_out
;
}
}
}
// namespace lite_api
...
...
lite/api/paddle_api.cc
浏览文件 @
5f72a91b
...
...
@@ -145,7 +145,8 @@ lod_t Tensor::lod() const { return ctensor(raw_tensor_)->lod(); }
void
Tensor
::
SetLoD
(
const
lod_t
&
lod
)
{
tensor
(
raw_tensor_
)
->
set_lod
(
lod
);
}
void
PaddlePredictor
::
SaveOptimizedModel
(
const
std
::
string
&
model_dir
,
LiteModelType
model_type
)
{
LiteModelType
model_type
,
bool
record_info
)
{
LOG
(
FATAL
)
<<
"The SaveOptimizedModel API is only supported by CxxConfig predictor."
;
}
...
...
lite/api/paddle_api.h
浏览文件 @
5f72a91b
...
...
@@ -97,7 +97,8 @@ class LITE_API PaddlePredictor {
/// CxxConfig, and the persisted model can be reused for MobileConfig.
virtual
void
SaveOptimizedModel
(
const
std
::
string
&
model_dir
,
LiteModelType
model_type
=
LiteModelType
::
kProtobuf
);
LiteModelType
model_type
=
LiteModelType
::
kProtobuf
,
bool
record_info
=
false
);
virtual
~
PaddlePredictor
()
=
default
;
};
...
...
lite/api/paddle_api_test.cc
浏览文件 @
5f72a91b
...
...
@@ -64,8 +64,8 @@ TEST(CxxApi, run) {
EXPECT_NEAR
(
out
[
1
],
-
28.8729
,
1e-3
);
predictor
->
SaveOptimizedModel
(
FLAGS_model_dir
+
".opt2"
);
predictor
->
SaveOptimizedModel
(
FLAGS_model_dir
+
".opt2.naive"
,
LiteModelType
::
kNaiveBuffer
);
predictor
->
SaveOptimizedModel
(
FLAGS_model_dir
+
".opt2.naive"
,
LiteModelType
::
kNaiveBuffer
,
true
);
}
// Demo1 for Mobile Devices :Load model from file and run
...
...
lite/core/CMakeLists.txt
浏览文件 @
5f72a91b
...
...
@@ -71,6 +71,8 @@ add_custom_command(
COMMAND python
${
CMAKE_SOURCE_DIR
}
/lite/tools/cmake_tools/parse_kernel_registry.py
${
kernels_src_list
}
${
CMAKE_SOURCE_DIR
}
/lite/api/paddle_use_kernels.h
"
${
LITE_OPTMODEL_DIR
}
/.tailored_kernels_list"
LITE_BUILD_TAILOR
OUTPUT kernels.h
# not a real path to the output to force it execute every time.
)
# A trick to generate the paddle_use_ops.h
...
...
@@ -78,6 +80,8 @@ add_custom_command(
COMMAND python
${
CMAKE_SOURCE_DIR
}
/lite/tools/cmake_tools/parse_op_registry.py
${
ops_src_list
}
${
CMAKE_SOURCE_DIR
}
/lite/api/paddle_use_ops.h
"
${
LITE_OPTMODEL_DIR
}
/.tailored_ops_list"
LITE_BUILD_TAILOR
OUTPUT ops.h
# not a real path to the output to force it execute every time.
)
# generate fake kernels for memory_optimize_tool
...
...
@@ -85,6 +89,7 @@ add_custom_command(
COMMAND python
${
CMAKE_SOURCE_DIR
}
/lite/tools/cmake_tools/create_fake_kernel_registry.py
${
kernels_src_list
}
${
CMAKE_BINARY_DIR
}
/all_kernel_faked.cc
${
CMAKE_BINARY_DIR
}
/kernel_src_map.h
OUTPUT all_kernel_faked.cc
# not a real path to the output to force it execute every time.
)
add_custom_target
(
op_list_h DEPENDS ops.h
)
...
...
lite/core/mir/fusion/conv_bn_fuse_pass.cc
浏览文件 @
5f72a91b
...
...
@@ -16,6 +16,7 @@
#include <memory>
#include <vector>
#include "lite/core/mir/fusion/conv_bn_fuser.h"
#include "lite/core/mir/graph_visualize_pass.h"
#include "lite/core/mir/pass_registry.h"
namespace
paddle
{
...
...
@@ -23,11 +24,19 @@ namespace lite {
namespace
mir
{
void
ConvBNFusePass
::
Apply
(
const
std
::
unique_ptr
<
SSAGraph
>&
graph
)
{
fusion
::
ConvBNFuser
fuser
(
"conv2d"
);
fuser
(
graph
.
get
());
// initialze fuser params
std
::
vector
<
bool
>
conv_has_bias_cases
{
true
,
false
};
std
::
vector
<
std
::
string
>
conv_type_cases
{
"conv2d"
,
"depthwise_conv2d"
};
fusion
::
ConvBNFuser
fuser2
(
"depthwise_conv2d"
);
fuser2
(
graph
.
get
());
// start fuse using params
for
(
auto
conv_has_bias
:
conv_has_bias_cases
)
{
for
(
auto
conv_type
:
conv_type_cases
)
{
VLOG
(
4
)
<<
"conv_has_bias:"
<<
conv_has_bias
<<
" conv_type:"
<<
conv_type
;
fusion
::
ConvBNFuser
fuser
(
conv_type
,
conv_has_bias
);
fuser
(
graph
.
get
());
}
}
}
}
// namespace mir
...
...
@@ -35,5 +44,4 @@ void ConvBNFusePass::Apply(const std::unique_ptr<SSAGraph>& graph) {
}
// namespace paddle
REGISTER_MIR_PASS
(
lite_conv_bn_fuse_pass
,
paddle
::
lite
::
mir
::
ConvBNFusePass
)
.
BindTargets
({
TARGET
(
kAny
)})
.
BindKernel
(
"elementwise_add"
);
.
BindTargets
({
TARGET
(
kAny
)});
lite/core/mir/fusion/conv_bn_fuser.cc
浏览文件 @
5f72a91b
...
...
@@ -14,6 +14,7 @@
#include "lite/core/mir/fusion/conv_bn_fuser.h"
#include <memory>
#include <unordered_set>
#include <vector>
namespace
paddle
{
...
...
@@ -30,7 +31,8 @@ void ConvBNFuser::BuildPattern() {
auto
*
conv
=
OpNode
(
"conv2d"
,
conv_type_
)
->
assert_is_op
(
conv_type_
);
auto
*
conv_out
=
VarNode
(
"conv_out"
)
->
assert_is_op_output
(
conv_type_
,
"Output"
)
->
assert_is_op_input
(
"batch_norm"
,
"X"
);
->
assert_is_op_input
(
"batch_norm"
,
"X"
)
->
AsIntermediate
();
auto
*
bn_scale
=
VarNode
(
"bn_scale"
)
->
assert_is_op_input
(
"batch_norm"
,
"Scale"
)
...
...
@@ -61,34 +63,30 @@ void ConvBNFuser::BuildPattern() {
->
assert_is_op_output
(
"batch_norm"
,
"SavedVariance"
)
->
AsIntermediate
();
conv
->
LinksFrom
({
conv_input
,
conv_weight
}).
LinksTo
({
conv_out
});
if
(
conv_has_bias_
)
{
auto
*
conv_bias
=
VarNode
(
"conv_bias"
)
->
assert_is_op_input
(
conv_type_
,
"Bias"
)
->
AsInput
()
->
AsIntermediate
();
conv
->
LinksFrom
({
conv_input
,
conv_weight
,
conv_bias
}).
LinksTo
({
conv_out
});
}
else
{
conv
->
LinksFrom
({
conv_input
,
conv_weight
}).
LinksTo
({
conv_out
});
}
bn
->
LinksFrom
({
conv_out
,
bn_scale
,
bn_bias
,
bn_mean
,
bn_var
})
.
LinksTo
({
bn_out
,
bn_mean_out
,
bn_saved_mean
,
bn_saved_var
,
bn_var_out
});
}
void
ConvBNFuser
::
InsertNewNode
(
SSAGraph
*
graph
,
const
key2nodes_t
&
matched
)
{
auto
op_desc
=
GenOpDesc
(
matched
);
auto
eltwise_op
=
LiteOpRegistry
::
Global
().
Create
(
"elementwise_add"
);
auto
conv_instruct
=
matched
.
at
(
"conv2d"
)
->
stmt
();
auto
conv_op_desc
=
conv_instruct
->
mutable_op_info
();
auto
conv
=
conv_instruct
->
op
();
auto
*
scope
=
conv
->
scope
();
auto
&
valid_places
=
conv
->
valid_places
();
auto
conv_weight_t
=
scope
->
FindVar
(
matched
.
at
(
"conv_weight"
)
->
arg
()
->
name
)
->
GetMutable
<
lite
::
Tensor
>
();
auto
conv_weight_dims
=
conv_weight_t
->
dims
();
size_t
weight_num
=
conv_weight_t
->
data_size
();
// bn
auto
bn_scale_t
=
scope
->
FindVar
(
matched
.
at
(
"bn_scale"
)
->
arg
()
->
name
)
->
GetMutable
<
lite
::
Tensor
>
();
size_t
bias_size
=
bn_scale_t
->
data_size
();
auto
bn_scale_d
=
bn_scale_t
->
mutable_data
<
float
>
();
CHECK_EQ
(
bias_size
,
static_cast
<
size_t
>
(
conv_weight_dims
[
0
]))
<<
"The BN bias's size should be equal to the size of the first "
<<
"dim size of the conv weights"
;
auto
bn_mean_t
=
scope
->
FindVar
(
matched
.
at
(
"bn_mean"
)
->
arg
()
->
name
)
->
GetMutable
<
lite
::
Tensor
>
();
auto
bn_mean_d
=
bn_mean_t
->
mutable_data
<
float
>
();
...
...
@@ -102,59 +100,102 @@ void ConvBNFuser::InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) {
auto
bn_bias_d
=
bn_bias_t
->
mutable_data
<
float
>
();
auto
eps
=
matched
.
at
(
"bn"
)
->
stmt
()
->
op_info
()
->
GetAttr
<
float
>
(
"epsilon"
);
auto
conv_op_desc
=
conv_instruct
->
mutable_op_info
();
// conv
auto
conv_weight_t
=
scope
->
FindVar
(
matched
.
at
(
"conv_weight"
)
->
arg
()
->
name
)
->
GetMutable
<
lite
::
Tensor
>
();
CHECK_EQ
(
static_cast
<
size_t
>
(
bn_scale_t
->
data_size
()),
static_cast
<
size_t
>
(
conv_weight_t
->
dims
()[
0
]))
<<
"The BN bias's size should be equal to the size of the first "
<<
"dim size of the conv weights"
;
size_t
weight_num
=
conv_weight_t
->
data_size
();
bool
enable_int8
=
conv_op_desc
->
HasAttr
(
"enable_int8"
)
?
true
:
false
;
// comupte BN alpha and beta
Tensor
alpha_tensor
,
beta_tensor
;
alpha_tensor
.
CopyDataFrom
(
*
bn_bias_t
);
beta_tensor
.
CopyDataFrom
(
*
bn_bias_t
);
auto
alpha_data
=
alpha_tensor
.
mutable_data
<
float
>
();
auto
beta_data
=
beta_tensor
.
mutable_data
<
float
>
();
int
h
=
bias_size
;
int
w
=
weight_num
/
bias_size
;
int
h
=
bn_scale_t
->
data_size
();
// h == bias_size == out channel num of conv weight
int
w
=
weight_num
/
(
bn_scale_t
->
data_size
());
// w = `conv_weight_num` / bias_size = in
// channel num of conv weight
ComputeAlphaAndBeta
(
bn_scale_d
,
bn_mean_d
,
bn_var_d
,
alpha_data
,
beta_data
,
eps
,
h
,
w
);
///////////////////////////////////////////////////////////////////////////////
// Compute ConvBNFuser
// Before fusion
//
// conv(x) = conv(x) = kx + z = y
// bn(y) = ay + b
//
// Note: `alpha_data` is a, `beta_data` is b from `ComputeAlphaAndBeta`
//
// After fusion:
//
// bn(conv(x)) = a(kx + z) + b = akx + az + b
//
// Note: h == bias_size == out channel num of conv weight
// w = `conv_weight_num` / bias_size = in channel num of conv weight
// little difference for int8
///////////////////////////////////////////////////////////////////////////////
if
(
enable_int8
)
{
PADDLE_ENFORCE
(
conv_op_desc
->
HasAttr
(
"weight_scale"
),
"INT8 mode: Conv should has weight_scale attr"
);
auto
conv_weight_d
=
conv_weight_t
->
mutable_data
<
int8_t
>
();
// compute new conv_weight for int8
auto
weight_scale
=
conv_op_desc
->
GetAttr
<
std
::
vector
<
float
>>
(
"weight_scale"
);
for
(
int
i
=
0
;
i
<
h
;
i
++
)
{
weight_scale
[
i
]
*=
alpha_data
[
i
];
for
(
unsigned
int
i
=
0
;
i
<
h
;
++
i
)
{
weight_scale
[
i
]
*=
fabsf
(
alpha_data
[
i
]);
if
(
alpha_data
[
i
]
<
0.
f
)
{
auto
ptr_row
=
conv_weight_d
+
i
*
w
;
for
(
unsigned
int
j
=
0
;
j
<
w
;
++
j
)
{
ptr_row
[
j
]
*=
-
1
;
}
}
}
// Interface like this should be abandoned.
conv_op_desc
->
SetAttr
(
"weight_scale"
,
weight_scale
);
auto
update_conv_desc
=
*
conv_instruct
->
mutable_op_info
();
conv_instruct
->
ResetOp
(
update_conv_desc
,
graph
->
valid_places
());
}
else
{
// compute new conv_weight
auto
conv_weight_d
=
conv_weight_t
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
h
;
i
++
)
{
for
(
int
j
=
0
;
j
<
w
;
j
++
)
{
for
(
unsigned
int
i
=
0
;
i
<
h
;
++
i
)
{
// n: conv2d output channels
for
(
unsigned
int
j
=
0
;
j
<
w
;
++
j
)
{
// w: conv2d input channels
conv_weight_d
[
i
*
w
+
j
]
*=
alpha_data
[
i
];
}
}
}
for
(
int
i
=
0
;
i
<
bias_size
;
i
++
)
{
// compute new conv_bias
if
(
conv_has_bias_
)
{
auto
conv_bias_t
=
scope
->
FindVar
(
matched
.
at
(
"conv_bias"
)
->
arg
()
->
name
)
->
GetMutable
<
lite
::
Tensor
>
();
auto
conv_bias_d
=
conv_bias_t
->
data
<
float
>
();
for
(
unsigned
int
i
=
0
;
i
<
bn_bias_t
->
data_size
();
++
i
)
{
// bias_size == h == conv2d output channls
bn_bias_d
[
i
]
+=
alpha_data
[
i
]
*
conv_bias_d
[
i
];
}
}
for
(
unsigned
int
i
=
0
;
i
<
bn_bias_t
->
data_size
();
++
i
)
{
bn_bias_d
[
i
]
+=
beta_data
[
i
];
}
eltwise_op
->
Attach
(
op_desc
,
scope
);
auto
*
new_op_node
=
graph
->
GraphCreateInstructNode
(
eltwise_op
,
valid_places
);
IR_NODE_LINK_TO
(
matched
.
at
(
"conv_out"
),
new_op_node
);
IR_NODE_LINK_TO
(
matched
.
at
(
"bn_bias"
),
new_op_node
);
IR_NODE_LINK_TO
(
new_op_node
,
matched
.
at
(
"bn_out"
));
}
cpp
::
OpDesc
ConvBNFuser
::
GenOpDesc
(
const
key2nodes_t
&
matched
)
{
cpp
::
OpDesc
op_desc
;
op_desc
.
SetType
(
"elementwise_add"
);
op_desc
.
SetInput
(
"X"
,
{
matched
.
at
(
"conv_out"
)
->
arg
()
->
name
});
op_desc
.
SetInput
(
"Y"
,
{
matched
.
at
(
"bn_bias"
)
->
arg
()
->
name
});
op_desc
.
SetOutput
(
"Out"
,
{
matched
.
at
(
"bn_out"
)
->
arg
()
->
name
});
op_desc
.
SetAttr
(
"axis"
,
1
);
return
op_desc
;
conv_op_desc
->
SetType
(
conv_type_
);
conv_op_desc
->
SetInput
(
"Input"
,
{
matched
.
at
(
"conv_input"
)
->
arg
()
->
name
});
conv_op_desc
->
SetInput
(
"Filter"
,
{
matched
.
at
(
"conv_weight"
)
->
arg
()
->
name
});
conv_op_desc
->
SetOutput
(
"Output"
,
{
matched
.
at
(
"bn_out"
)
->
arg
()
->
name
});
conv_op_desc
->
SetInput
(
"Bias"
,
{
matched
.
at
(
"bn_bias"
)
->
arg
()
->
name
});
// conv_bias
auto
update_conv_desc
=
*
conv_instruct
->
mutable_op_info
();
conv_instruct
->
ResetOp
(
update_conv_desc
,
graph
->
valid_places
());
IR_NODE_LINK_TO
(
matched
.
at
(
"bn_bias"
),
matched
.
at
(
"conv2d"
));
IR_OP_VAR_LINK
(
matched
.
at
(
"conv2d"
),
matched
.
at
(
"bn_out"
));
}
}
// namespace fusion
...
...
lite/core/mir/fusion/conv_bn_fuser.h
浏览文件 @
5f72a91b
...
...
@@ -27,12 +27,12 @@ namespace fusion {
class
ConvBNFuser
:
public
FuseBase
{
public:
explicit
ConvBNFuser
(
const
std
::
string
&
conv_type
)
:
conv_type_
(
conv_type
)
{}
explicit
ConvBNFuser
(
const
std
::
string
&
conv_type
,
const
bool
conv_has_bias
)
:
conv_type_
(
conv_type
),
conv_has_bias_
(
conv_has_bias
)
{}
void
BuildPattern
()
override
;
void
InsertNewNode
(
SSAGraph
*
graph
,
const
key2nodes_t
&
matched
)
override
;
private:
cpp
::
OpDesc
GenOpDesc
(
const
key2nodes_t
&
matched
)
override
;
void
ComputeAlphaAndBeta
(
float
*
scale_d
,
float
*
mean_d
,
float
*
var_d
,
...
...
@@ -51,6 +51,7 @@ class ConvBNFuser : public FuseBase {
private:
std
::
string
conv_type_
{
"conv2d"
};
bool
conv_has_bias_
{
false
};
};
}
// namespace fusion
...
...
lite/core/mir/fusion/conv_elementwise_fuse_pass.cc
浏览文件 @
5f72a91b
...
...
@@ -23,14 +23,21 @@ namespace lite {
namespace
mir
{
void
ConvElementwiseFusePass
::
Apply
(
const
std
::
unique_ptr
<
SSAGraph
>&
graph
)
{
fusion
::
ConvElementwiseFuser
fuser
(
"conv2d"
);
fuser
(
graph
.
get
());
// initialze fuser params
// note: `true` of conv_has_bias must as first pattern to match
std
::
vector
<
bool
>
conv_has_bias_cases
{
true
,
false
};
std
::
vector
<
std
::
string
>
conv_type_cases
{
"conv2d"
,
"depthwise_conv2d"
,
"conv2d_transpose"
};
fusion
::
ConvElementwiseFuser
depthwise_fuser
(
"depthwise_conv2d"
);
depthwise_fuser
(
graph
.
get
());
fusion
::
ConvElementwiseFuser
conv2d_transpose_fuser
(
"conv2d_transpose"
);
conv2d_transpose_fuser
(
graph
.
get
());
// start fuse using params
for
(
auto
conv_has_bias
:
conv_has_bias_cases
)
{
for
(
auto
conv_type
:
conv_type_cases
)
{
VLOG
(
4
)
<<
"conv_has_bias:"
<<
conv_has_bias
<<
" conv_type:"
<<
conv_type
;
fusion
::
ConvElementwiseFuser
fuser
(
conv_type
,
conv_has_bias
);
fuser
(
graph
.
get
());
}
}
}
}
// namespace mir
...
...
lite/core/mir/fusion/conv_elementwise_fuser.cc
浏览文件 @
5f72a91b
...
...
@@ -33,8 +33,7 @@ void ConvElementwiseFuser::BuildPattern() {
->
assert_is_persistable_var
();
// create op nodes
auto
*
conv2d
=
OpNode
(
"conv2d"
,
conv_type_
)
->
assert_is_op
(
conv_type_
)
->
AsIntermediate
();
auto
*
conv2d
=
OpNode
(
"conv2d"
,
conv_type_
)
->
assert_is_op
(
conv_type_
);
auto
*
add
=
OpNode
(
"add"
,
"elementwise_add"
)
->
assert_is_op
(
"elementwise_add"
)
->
AsIntermediate
();
...
...
@@ -51,6 +50,13 @@ void ConvElementwiseFuser::BuildPattern() {
// create topology.
std
::
vector
<
PMNode
*>
conv2d_inputs
{
filter
,
input
};
// consider a special case: conv with bias
if
(
conv_has_bias_
)
{
PMNode
*
conv_bias
=
VarNode
(
"conv_bias"
)
->
assert_is_op_input
(
conv_type_
,
"Bias"
)
->
AsIntermediate
();
conv2d_inputs
.
emplace_back
(
conv_bias
);
}
std
::
vector
<
PMNode
*>
add_inputs
{
conv2d_out
,
bias
};
conv2d_inputs
>>
*
conv2d
>>
*
conv2d_out
;
add_inputs
>>
*
add
>>
*
add_out
;
...
...
@@ -58,44 +64,49 @@ void ConvElementwiseFuser::BuildPattern() {
void
ConvElementwiseFuser
::
InsertNewNode
(
SSAGraph
*
graph
,
const
key2nodes_t
&
matched
)
{
auto
op_desc
=
GenOpDesc
(
matched
);
auto
conv_op
=
LiteOpRegistry
::
Global
().
Create
(
conv_type_
);
auto
conv_old
=
matched
.
at
(
"conv2d"
)
->
stmt
()
->
op
();
auto
*
scope
=
conv_old
->
scope
();
auto
&
valid_places
=
conv_old
->
valid_places
();
conv_op
->
Attach
(
op_desc
,
scope
);
auto
*
new_op_node
=
graph
->
GraphCreateInstructNode
(
conv_op
,
valid_places
);
auto
conv_instruct
=
matched
.
at
(
"conv2d"
)
->
stmt
();
auto
conv_op_desc
=
conv_instruct
->
mutable_op_info
();
auto
*
scope
=
conv_instruct
->
op
()
->
scope
();
IR_NODE_LINK_TO
(
matched
.
at
(
"input"
),
new_op_node
);
IR_NODE_LINK_TO
(
matched
.
at
(
"filter"
),
new_op_node
);
IR_NODE_LINK_TO
(
matched
.
at
(
"bias"
),
new_op_node
);
IR_NODE_LINK_TO
(
new_op_node
,
matched
.
at
(
"output"
));
}
/////////////////////////////////////////////////////////////////////////////////////
// ConvElementwiseFuser
// if `conv_bias` existed, store previous old `conv_bias` to
// `elemwise_bias`, and add `elementwise_add_bias` to `new_conv_bias`.
// if `conv_bias` not existed, set `elementwise_add_bias` as
// `new_conv_bias`.
/////////////////////////////////////////////////////////////////////////////////////
cpp
::
OpDesc
ConvElementwiseFuser
::
GenOpDesc
(
const
key2nodes_t
&
matched
)
{
auto
*
desc
=
matched
.
at
(
"conv2d"
)
->
stmt
()
->
op_info
();
if
(
conv_has_bias_
==
true
&&
conv_op_desc
->
HasInput
(
"Bias"
)
&&
conv_op_desc
->
Input
(
"Bias"
).
size
()
>
0
)
{
auto
conv_bias_var
=
scope
->
FindVar
(
conv_op_desc
->
Input
(
"Bias"
).
front
());
if
(
conv_bias_var
!=
nullptr
)
{
// conv bias
auto
conv_bias_t
=
&
(
conv_bias_var
->
Get
<
lite
::
Tensor
>
());
auto
conv_bias_d
=
conv_bias_t
->
data
<
float
>
();
cpp
::
OpDesc
op_desc
=
*
desc
;
op_desc
.
SetType
(
conv_type_
);
op_desc
.
SetInput
(
"Input"
,
{
matched
.
at
(
"input"
)
->
arg
()
->
name
});
op_desc
.
SetInput
(
"Filter"
,
{
matched
.
at
(
"filter"
)
->
arg
()
->
name
});
op_desc
.
SetInput
(
"Bias"
,
{
matched
.
at
(
"bias"
)
->
arg
()
->
name
});
op_desc
.
SetOutput
(
"Output"
,
{
matched
.
at
(
"output"
)
->
arg
()
->
name
});
// Other inputs. See operators/conv_op.h
std
::
vector
<
std
::
string
>
input_arg_names
=
desc
->
InputArgumentNames
();
// elementwise_add bias
auto
elementwise_add_bias_t
=
scope
->
FindVar
(
matched
.
at
(
"bias"
)
->
arg
()
->
name
)
->
GetMutable
<
lite
::
Tensor
>
();
auto
elementwise_add_bias_d
=
elementwise_add_bias_t
->
mutable_data
<
float
>
();
if
(
std
::
find
(
input_arg_names
.
begin
(),
input_arg_names
.
end
(),
"ResidualData"
)
!=
input_arg_names
.
end
())
{
op_desc
.
SetInput
(
"ResidualData"
,
desc
->
Input
(
"ResidualData"
));
for
(
unsigned
int
i
=
0
;
i
<
conv_bias_t
->
data_size
();
++
i
)
{
elementwise_add_bias_d
[
i
]
+=
conv_bias_d
[
i
];
}
}
}
// Only consider strides, padding, groups, dilations for now
op_desc
.
SetAttr
(
"strides"
,
desc
->
GetAttr
<
std
::
vector
<
int
>>
(
"strides"
));
op_desc
.
SetAttr
(
"paddings"
,
desc
->
GetAttr
<
std
::
vector
<
int
>>
(
"paddings"
));
op_desc
.
SetAttr
(
"groups"
,
desc
->
GetAttr
<
int
>
(
"groups"
));
op_desc
.
SetAttr
(
"dilations"
,
desc
->
GetAttr
<
std
::
vector
<
int
>>
(
"dilations"
));
return
op_desc
;
conv_op_desc
->
SetType
(
conv_type_
);
conv_op_desc
->
SetInput
(
"Input"
,
{
matched
.
at
(
"input"
)
->
arg
()
->
name
});
conv_op_desc
->
SetInput
(
"Filter"
,
{
matched
.
at
(
"filter"
)
->
arg
()
->
name
});
conv_op_desc
->
SetOutput
(
"Output"
,
{
matched
.
at
(
"output"
)
->
arg
()
->
name
});
conv_op_desc
->
SetInput
(
"Bias"
,
{
matched
.
at
(
"bias"
)
->
arg
()
->
name
});
auto
update_conv_desc
=
*
conv_instruct
->
mutable_op_info
();
conv_instruct
->
ResetOp
(
update_conv_desc
,
graph
->
valid_places
());
IR_NODE_LINK_TO
(
matched
.
at
(
"bias"
),
matched
.
at
(
"conv2d"
));
IR_OP_VAR_LINK
(
matched
.
at
(
"conv2d"
),
matched
.
at
(
"output"
));
}
}
// namespace fusion
...
...
lite/core/mir/fusion/conv_elementwise_fuser.h
浏览文件 @
5f72a91b
...
...
@@ -25,16 +25,18 @@ namespace fusion {
class
ConvElementwiseFuser
:
public
FuseBase
{
public:
explicit
ConvElementwiseFuser
(
const
std
::
string
&
conv_type
)
{
explicit
ConvElementwiseFuser
(
const
std
::
string
&
conv_type
,
const
bool
conv_has_bias
)
{
conv_type_
=
conv_type
;
conv_has_bias_
=
conv_has_bias
;
}
void
BuildPattern
()
override
;
void
InsertNewNode
(
SSAGraph
*
graph
,
const
key2nodes_t
&
matched
)
override
;
private:
cpp
::
OpDesc
GenOpDesc
(
const
key2nodes_t
&
matched
)
override
;
std
::
string
conv_type_
;
std
::
string
conv_type_
{
"conv2d"
}
;
bool
conv_has_bias_
{
false
}
;
};
}
// namespace fusion
...
...
lite/core/op_registry.h
浏览文件 @
5f72a91b
...
...
@@ -32,6 +32,43 @@
using
LiteType
=
paddle
::
lite
::
Type
;
class
OpKernelInfoCollector
{
public:
static
OpKernelInfoCollector
&
Global
()
{
static
auto
*
x
=
new
OpKernelInfoCollector
;
return
*
x
;
}
void
AddOp2path
(
const
std
::
string
&
op_name
,
const
std
::
string
&
op_path
)
{
size_t
index
=
op_path
.
find_last_of
(
'/'
);
if
(
index
!=
std
::
string
::
npos
)
{
op2path_
.
insert
(
std
::
pair
<
std
::
string
,
std
::
string
>
(
op_name
,
op_path
.
substr
(
index
+
1
)));
}
}
void
AddKernel2path
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
kernel_path
)
{
size_t
index
=
kernel_path
.
find_last_of
(
'/'
);
if
(
index
!=
std
::
string
::
npos
)
{
kernel2path_
.
insert
(
std
::
pair
<
std
::
string
,
std
::
string
>
(
kernel_name
,
kernel_path
.
substr
(
index
+
1
)));
}
}
void
SetKernel2path
(
const
std
::
map
<
std
::
string
,
std
::
string
>
&
kernel2path_map
)
{
kernel2path_
=
kernel2path_map
;
}
const
std
::
map
<
std
::
string
,
std
::
string
>
&
GetOp2PathDict
()
{
return
op2path_
;
}
const
std
::
map
<
std
::
string
,
std
::
string
>
&
GetKernel2PathDict
()
{
return
kernel2path_
;
}
private:
std
::
map
<
std
::
string
,
std
::
string
>
op2path_
;
std
::
map
<
std
::
string
,
std
::
string
>
kernel2path_
;
};
namespace
paddle
{
namespace
lite
{
...
...
@@ -59,7 +96,6 @@ class OpLiteRegistor : public Registor<OpClass> {
});
})
{}
};
template
<
TargetType
Target
,
PrecisionType
Precision
,
DataLayoutType
Layout
>
using
KernelRegistryForTarget
=
Factory
<
KernelLite
<
Target
,
Precision
,
Layout
>
,
std
::
unique_ptr
<
KernelBase
>>
;
...
...
@@ -287,6 +323,7 @@ class KernelRegistor : public lite::Registor<KernelType> {
static paddle::lite::OpLiteRegistor<OpClass> LITE_OP_REGISTER_INSTANCE( \
op_type__)(#op_type__); \
int touch_op_##op_type__() { \
OpKernelInfoCollector::Global().AddOp2path(#op_type__, __FILE__); \
return LITE_OP_REGISTER_INSTANCE(op_type__).Touch(); \
}
...
...
@@ -312,6 +349,9 @@ class KernelRegistor : public lite::Registor<KernelType> {
static KernelClass LITE_KERNEL_INSTANCE( \
op_type__, target__, precision__, layout__, alias__); \
int touch_##op_type__##target__##precision__##layout__##alias__() { \
OpKernelInfoCollector::Global().AddKernel2path( \
#op_type__ "," #target__ "," #precision__ "," #layout__ "," #alias__, \
__FILE__); \
LITE_KERNEL_INSTANCE(op_type__, target__, precision__, layout__, alias__) \
.Touch(); \
return 0; \
...
...
lite/fluid/lod.h
浏览文件 @
5f72a91b
...
...
@@ -21,7 +21,7 @@ namespace lite {
namespace
fluid
{
using
LoD
=
std
::
vector
<
std
::
vector
<
size_t
>>
;
LoD
ToAbsOffset
(
const
LoD
&
in
)
{
static
LoD
ToAbsOffset
(
const
LoD
&
in
)
{
// the lowest level stores relative offsets
if
(
in
.
empty
()
||
in
.
size
()
==
1
)
return
in
;
LoD
result
=
in
;
...
...
lite/tools/build.sh
浏览文件 @
5f72a91b
...
...
@@ -17,6 +17,8 @@ BUILD_EXTRA=OFF
BUILD_JAVA
=
ON
BUILD_PYTHON
=
OFF
BUILD_DIR
=
$(
pwd
)
OPTMODEL_DIR
=
""
BUILD_TAILOR
=
OFF
readonly
THIRDPARTY_TAR
=
https://paddle-inference-dist.bj.bcebos.com/PaddleLite/third-party-05b862.tar.gz
...
...
@@ -94,6 +96,8 @@ function make_tiny_publish_so {
-DLITE_ON_TINY_PUBLISH
=
ON
\
-DANDROID_STL_TYPE
=
$android_stl
\
-DLITE_BUILD_EXTRA
=
$BUILD_EXTRA
\
-DLITE_BUILD_TAILOR
=
$BUILD_TAILOR
\
-DLITE_OPTMODEL_DIR
=
$OPTMODEL_DIR
\
-DARM_TARGET_OS
=
${
os
}
-DARM_TARGET_ARCH_ABI
=
${
abi
}
-DARM_TARGET_LANG
=
${
lang
}
make publish_inference
-j
$NUM_PROC
...
...
@@ -133,6 +137,8 @@ function make_full_publish_so {
-DLITE_SHUTDOWN_LOG
=
ON
\
-DANDROID_STL_TYPE
=
$android_stl
\
-DLITE_BUILD_EXTRA
=
$BUILD_EXTRA
\
-DLITE_BUILD_TAILOR
=
$BUILD_TAILOR
\
-DLITE_OPTMODEL_DIR
=
$OPTMODEL_DIR
\
-DARM_TARGET_OS
=
${
os
}
-DARM_TARGET_ARCH_ABI
=
${
abi
}
-DARM_TARGET_LANG
=
${
lang
}
make publish_inference
-j4
...
...
@@ -317,6 +323,14 @@ function main {
BUILD_DIR
=
"
${
i
#*=
}
"
shift
;;
--opt_model_dir
=
*
)
OPTMODEL_DIR
=
"
${
i
#*=
}
"
shift
;;
--build_tailor
=
*
)
BUILD_TAILOR
=
"
${
i
#*=
}
"
shift
;;
tiny_publish
)
make_tiny_publish_so
$ARM_OS
$ARM_ABI
$ARM_LANG
$ANDROID_STL
shift
...
...
lite/tools/cmake_tools/create_fake_kernel_registry.py
浏览文件 @
5f72a91b
...
...
@@ -20,6 +20,7 @@ from utils import *
ops_list_path
=
sys
.
argv
[
1
]
dest_path
=
sys
.
argv
[
2
]
kernelmap_path
=
sys
.
argv
[
3
]
out_lines
=
[
'#pragma once'
,
...
...
@@ -47,6 +48,31 @@ class %s : public KernelLite<TARGET(%s), PRECISION(%s), DATALAYOUT(%s)> {
} // namespace paddle
'''
# create .h file to store kernel&source relationship
kernel_src_map_lines
=
[
'''
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include<map>
// ATTENTION This can only include in a .cc file.
const std::map<std::string, std::string> kernel2path_map{
'''
]
with
open
(
ops_list_path
)
as
f
:
...
...
@@ -99,7 +125,23 @@ with open(ops_list_path) as f:
out_lines
.
append
(
""
)
out_lines
.
append
(
gen_use_kernel_statement
(
k
.
op_type
,
k
.
target
,
k
.
precision
,
k
.
data_layout
,
k
.
alias
))
index
=
path
.
rindex
(
'/'
)
filename
=
path
[
index
+
1
:]
map_element
=
' {"%s,%s,%s,%s,%s", "%s"},'
%
(
k
.
op_type
,
k
.
target
,
k
.
precision
,
k
.
data_layout
,
k
.
alias
,
filename
.
strip
()
)
kernel_src_map_lines
.
append
(
map_element
)
with
open
(
dest_path
,
'w'
)
as
f
:
logging
.
info
(
"write kernel list to %s"
%
dest_path
)
f
.
write
(
'
\n
'
.
join
(
out_lines
))
with
open
(
kernelmap_path
,
'w'
)
as
fd
:
logging
.
info
(
"write kernel map to %s"
%
dest_path
)
kernel_src_map_lines
.
append
(
' {" ", " "}'
)
kernel_src_map_lines
.
append
(
'};'
)
fd
.
write
(
'
\n
'
.
join
(
kernel_src_map_lines
))
lite/tools/cmake_tools/parse_kernel_registry.py
浏览文件 @
5f72a91b
...
...
@@ -18,14 +18,19 @@ from ast import RegisterLiteKernelParser
ops_list_path
=
sys
.
argv
[
1
]
dest_path
=
sys
.
argv
[
2
]
minkernels_list_path
=
sys
.
argv
[
3
]
tailored
=
sys
.
argv
[
4
]
out_lines
=
[
'#pragma once'
,
'#include "paddle_lite_factory_helper.h"'
,
''
,
]
minlines
=
set
()
if
tailored
==
"ON"
:
with
open
(
minkernels_list_path
)
as
fd
:
for
line
in
fd
:
minlines
.
add
(
line
.
strip
())
with
open
(
ops_list_path
)
as
f
:
paths
=
set
([
path
for
path
in
f
])
for
path
in
paths
:
...
...
@@ -35,6 +40,15 @@ with open(ops_list_path) as f:
kernel_parser
.
parse
()
for
k
in
kernel_parser
.
kernels
:
kernel
=
"%s, %s, %s, %s, %s"
%
(
k
.
op_type
,
k
.
target
,
k
.
precision
,
k
.
data_layout
,
k
.
alias
,
)
if
tailored
==
"ON"
:
if
kernel
not
in
minlines
:
continue
key
=
"USE_LITE_KERNEL(%s, %s, %s, %s, %s);"
%
(
k
.
op_type
,
k
.
target
,
...
...
lite/tools/cmake_tools/parse_op_registry.py
浏览文件 @
5f72a91b
...
...
@@ -19,7 +19,8 @@ from ast import RegisterLiteOpParser
ops_list_path
=
sys
.
argv
[
1
]
dest_path
=
sys
.
argv
[
2
]
minops_list_path
=
sys
.
argv
[
3
]
tailored
=
sys
.
argv
[
4
]
out_lines
=
[
'#pragma once'
,
'#include "paddle_lite_factory_helper.h"'
,
...
...
@@ -30,6 +31,11 @@ paths = set()
for
line
in
open
(
ops_list_path
):
paths
.
add
(
line
.
strip
())
if
tailored
==
"ON"
:
minlines
=
set
()
with
open
(
minops_list_path
)
as
fd
:
for
line
in
fd
:
minlines
.
add
(
line
.
strip
())
for
path
in
paths
:
str_info
=
open
(
path
.
strip
()).
read
()
op_parser
=
RegisterLiteOpParser
(
str_info
)
...
...
@@ -37,6 +43,8 @@ for path in paths:
for
op
in
ops
:
if
"_grad"
in
op
:
continue
if
tailored
==
"ON"
:
if
op
not
in
minlines
:
continue
out
=
"USE_LITE_OP(%s);"
%
op
out_lines
.
append
(
out
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录