Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
c353397d
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2301
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c353397d
编写于
6月 12, 2019
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix and merge from github
b8572aa3
上级
d1904d11
变更
54
隐藏空白更改
内联
并排
Showing
54 changed file
with
480 addition
and
215 deletion
+480
-215
paddle/fluid/framework/op_desc.cc
paddle/fluid/framework/op_desc.cc
+1
-0
paddle/fluid/lite/CMakeLists.txt
paddle/fluid/lite/CMakeLists.txt
+45
-11
paddle/fluid/lite/api/cxx_api_test.cc
paddle/fluid/lite/api/cxx_api_test.cc
+7
-0
paddle/fluid/lite/api/light_api.h
paddle/fluid/lite/api/light_api.h
+4
-4
paddle/fluid/lite/arm/CMakeLists.txt
paddle/fluid/lite/arm/CMakeLists.txt
+0
-1
paddle/fluid/lite/arm/math/CMakeLists.txt
paddle/fluid/lite/arm/math/CMakeLists.txt
+3
-0
paddle/fluid/lite/core/CMakeLists.txt
paddle/fluid/lite/core/CMakeLists.txt
+4
-6
paddle/fluid/lite/core/context.h
paddle/fluid/lite/core/context.h
+5
-0
paddle/fluid/lite/core/hvy_tensor.h
paddle/fluid/lite/core/hvy_tensor.h
+12
-1
paddle/fluid/lite/core/kernel.h
paddle/fluid/lite/core/kernel.h
+1
-1
paddle/fluid/lite/core/lite_tensor.h
paddle/fluid/lite/core/lite_tensor.h
+8
-0
paddle/fluid/lite/core/mir/CMakeLists.txt
paddle/fluid/lite/core/mir/CMakeLists.txt
+30
-24
paddle/fluid/lite/core/mir/generate_program_pass.h
paddle/fluid/lite/core/mir/generate_program_pass.h
+1
-1
paddle/fluid/lite/core/mir/pass_manager.cc
paddle/fluid/lite/core/mir/pass_manager.cc
+1
-5
paddle/fluid/lite/core/mir/pass_manager.h
paddle/fluid/lite/core/mir/pass_manager.h
+1
-1
paddle/fluid/lite/core/mir/pattern_matcher.cc
paddle/fluid/lite/core/mir/pattern_matcher.cc
+49
-3
paddle/fluid/lite/core/mir/pattern_matcher.h
paddle/fluid/lite/core/mir/pattern_matcher.h
+28
-0
paddle/fluid/lite/core/mir/ssa_graph.cc
paddle/fluid/lite/core/mir/ssa_graph.cc
+12
-6
paddle/fluid/lite/core/mir/ssa_graph.h
paddle/fluid/lite/core/mir/ssa_graph.h
+5
-4
paddle/fluid/lite/core/mir/static_kernel_pick_pass.cc
paddle/fluid/lite/core/mir/static_kernel_pick_pass.cc
+2
-0
paddle/fluid/lite/core/mir/variable_place_inference_pass_test.cc
...fluid/lite/core/mir/variable_place_inference_pass_test.cc
+27
-0
paddle/fluid/lite/core/op_lite.cc
paddle/fluid/lite/core/op_lite.cc
+12
-4
paddle/fluid/lite/core/op_lite.h
paddle/fluid/lite/core/op_lite.h
+1
-1
paddle/fluid/lite/core/optimizer.h
paddle/fluid/lite/core/optimizer.h
+1
-1
paddle/fluid/lite/core/profile/CMakeLists.txt
paddle/fluid/lite/core/profile/CMakeLists.txt
+0
-1
paddle/fluid/lite/core/program.cc
paddle/fluid/lite/core/program.cc
+40
-0
paddle/fluid/lite/core/program.h
paddle/fluid/lite/core/program.h
+35
-60
paddle/fluid/lite/core/program_fake_utils.h
paddle/fluid/lite/core/program_fake_utils.h
+10
-10
paddle/fluid/lite/core/scope.cc
paddle/fluid/lite/core/scope.cc
+7
-1
paddle/fluid/lite/core/target_wrapper.h
paddle/fluid/lite/core/target_wrapper.h
+40
-1
paddle/fluid/lite/core/tensor.h
paddle/fluid/lite/core/tensor.h
+20
-11
paddle/fluid/lite/core/variable.h
paddle/fluid/lite/core/variable.h
+1
-1
paddle/fluid/lite/cuda/CMakeLists.txt
paddle/fluid/lite/cuda/CMakeLists.txt
+0
-1
paddle/fluid/lite/host/CMakeLists.txt
paddle/fluid/lite/host/CMakeLists.txt
+0
-1
paddle/fluid/lite/kernels/CMakeLists.txt
paddle/fluid/lite/kernels/CMakeLists.txt
+0
-1
paddle/fluid/lite/kernels/arm/conv_compute.cc
paddle/fluid/lite/kernels/arm/conv_compute.cc
+5
-6
paddle/fluid/lite/kernels/arm/fc_compute.cc
paddle/fluid/lite/kernels/arm/fc_compute.cc
+1
-1
paddle/fluid/lite/kernels/cuda/CMakeLists.txt
paddle/fluid/lite/kernels/cuda/CMakeLists.txt
+0
-1
paddle/fluid/lite/kernels/host/CMakeLists.txt
paddle/fluid/lite/kernels/host/CMakeLists.txt
+1
-2
paddle/fluid/lite/kernels/x86/CMakeLists.txt
paddle/fluid/lite/kernels/x86/CMakeLists.txt
+5
-2
paddle/fluid/lite/model_parser/compatible_pb.cc
paddle/fluid/lite/model_parser/compatible_pb.cc
+2
-1
paddle/fluid/lite/model_parser/cpp/op_desc.cc
paddle/fluid/lite/model_parser/cpp/op_desc.cc
+8
-6
paddle/fluid/lite/model_parser/pb/op_desc.cc
paddle/fluid/lite/model_parser/pb/op_desc.cc
+1
-1
paddle/fluid/lite/operators/CMakeLists.txt
paddle/fluid/lite/operators/CMakeLists.txt
+6
-5
paddle/fluid/lite/operators/conv_op.cc
paddle/fluid/lite/operators/conv_op.cc
+2
-1
paddle/fluid/lite/operators/conv_op.h
paddle/fluid/lite/operators/conv_op.h
+6
-6
paddle/fluid/lite/operators/feed_op.cc
paddle/fluid/lite/operators/feed_op.cc
+2
-2
paddle/fluid/lite/operators/mul_op.h
paddle/fluid/lite/operators/mul_op.h
+4
-3
paddle/fluid/lite/operators/pool_op.cc
paddle/fluid/lite/operators/pool_op.cc
+1
-1
paddle/fluid/lite/operators/pool_op.h
paddle/fluid/lite/operators/pool_op.h
+0
-8
paddle/fluid/lite/tools/Dockerfile.mobile
paddle/fluid/lite/tools/Dockerfile.mobile
+1
-1
paddle/fluid/lite/tools/build.sh
paddle/fluid/lite/tools/build.sh
+21
-3
paddle/fluid/lite/utils/CMakeLists.txt
paddle/fluid/lite/utils/CMakeLists.txt
+1
-2
paddle/fluid/lite/x86/CMakeLists.txt
paddle/fluid/lite/x86/CMakeLists.txt
+0
-1
未找到文件。
paddle/fluid/framework/op_desc.cc
浏览文件 @
c353397d
...
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include <mutex> // NOLINT
#include <string>
#include <unordered_map>
#include <utility>
#include "glog/logging.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_proto_maker.h"
...
...
paddle/fluid/lite/CMakeLists.txt
浏览文件 @
c353397d
...
...
@@ -34,7 +34,7 @@ endfunction()
function
(
lite_deps TARGET
)
set
(
options
""
)
set
(
oneValueArgs
""
)
set
(
multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS
)
set
(
multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS
ARGS
)
cmake_parse_arguments
(
lite_deps
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
set
(
deps
${
lite_deps_DEPS
}
)
...
...
@@ -63,14 +63,39 @@ function (lite_deps TARGET)
endforeach
(
var
)
endif
()
if
(
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
foreach
(
var
${
lite_deps_LIGHT_DEPS
}
)
set
(
deps
${
deps
}
${
var
}
)
endforeach
(
var
)
endif
()
if
(
NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
foreach
(
var
${
lite_deps_HVY_DEPS
}
)
set
(
deps
${
deps
}
${
var
}
)
endforeach
(
var
)
endif
()
set
(
${
TARGET
}
${
deps
}
PARENT_SCOPE
)
endfunction
()
# Add names for lite libraries for latter compile. We use this name list to avoid compiling
# the whole fluid project to accelerate the compile speed.
set
(
offline_lib_registry_file
"
${
CMAKE_BINARY_DIR
}
/lite_libs.txt"
)
file
(
WRITE
${
offline_lib_registry_file
}
""
)
# clean
# cc_library with branch support.
# The branches:
# X86_DEPS: works only when LITE_WITH_X86 is ON.
# CUDA_DEPS: LITE_WITH_CUDA
# ARM_DEPS: LITE_WITH_ARM
# PROFILE_DEPS: LITE_WITH_PROFILE
# LIGHT_DEPS: LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
# HVY_DEPS: NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
function
(
lite_cc_library TARGET
)
set
(
options
""
)
set
(
oneValueArgs
""
)
set
(
multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS
)
set
(
multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS
HVY_DEPS ARGS
)
cmake_parse_arguments
(
args
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
set
(
deps
""
)
...
...
@@ -79,15 +104,22 @@ function(lite_cc_library TARGET)
X86_DEPS
${
args_X86_DEPS
}
CUDA_DEPS
${
args_CUDA_DEPS
}
ARM_DEPS
${
args_ARM_DEPS
}
PROFILE_DEPS
${
args_PROFILE_DEPS
}
)
PROFILE_DEPS
${
args_PROFILE_DEPS
}
LIGHT_DEPS
${
args_LIGHT_DEPS
}
HVY_DEPS
${
args_HVY_DEPS
}
)
cc_library
(
${
TARGET
}
SRCS
${
args_SRCS
}
DEPS
${
deps
}
${
args_DEPS
}
)
# register a library name.
file
(
APPEND
${
offline_lib_registry_file
}
"
${
TARGET
}
\n
"
)
endfunction
()
function
(
lite_cc_binary TARGET
)
set
(
options
""
)
set
(
oneValueArgs
""
)
set
(
multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS
)
set
(
multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS ARGS
)
cmake_parse_arguments
(
args
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
set
(
deps
""
)
...
...
@@ -97,6 +129,8 @@ function(lite_cc_binary TARGET)
CUDA_DEPS
${
args_CUDA_DEPS
}
ARM_DEPS
${
args_ARM_DEPS
}
PROFILE_DEPS
${
args_PROFILE_DEPS
}
LIGHT_DEPS
${
args_LIGHT_DEPS
}
HVY_DEPS
${
args_HVY_DEPS
}
)
cc_binary
(
${
TARGET
}
SRCS
${
args_SRCS
}
DEPS
${
deps
}
${
args_DEPS
}
)
endfunction
()
...
...
@@ -104,15 +138,13 @@ endfunction()
# Add a unit-test name to file for latter offline manual test.
set
(
offline_test_registry_file
"
${
CMAKE_BINARY_DIR
}
/lite_tests.txt"
)
file
(
WRITE
${
offline_test_registry_file
}
""
)
# clean
function
(
register_test_offline TARGET
)
file
(
APPEND
${
offline_test_registry_file
}
"
${
TARGET
}
\n
"
)
endfunction
()
# Test lite modules.
function
(
lite_cc_test TARGET
)
set
(
options
""
)
set
(
oneValueArgs
""
)
set
(
multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS
)
set
(
multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS
ARGS
)
cmake_parse_arguments
(
args
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
set
(
deps
""
)
...
...
@@ -122,9 +154,11 @@ function(lite_cc_test TARGET)
CUDA_DEPS
${
args_CUDA_DEPS
}
ARM_DEPS
${
args_ARM_DEPS
}
PROFILE_DEPS
${
args_PROFILE_DEPS
}
LIGHT_DEPS
${
args_LIGHT_DEPS
}
HVY_DEPS
${
args_HVY_DEPS
}
)
_lite_cc_test
(
${
TARGET
}
SRCS
${
args_SRCS
}
DEPS
${
deps
}
ARGS
${
args_ARGS
}
)
register_test_offline
(
"
${
TARGET
}
"
)
file
(
APPEND
${
offline_test_registry_file
}
"
${
TARGET
}
\n
"
)
endfunction
()
add_subdirectory
(
core
)
...
...
@@ -137,4 +171,4 @@ add_subdirectory(kernels)
add_subdirectory
(
model_parser
)
add_subdirectory
(
utils
)
add_subdirectory
(
api
)
add_subdirectory
(
gen_code
)
paddle/fluid/lite/api/cxx_api_test.cc
浏览文件 @
c353397d
...
...
@@ -76,6 +76,7 @@ TEST(CXXApi, save_model) {
predictor
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)},
valid_places
);
LOG
(
INFO
)
<<
"Save optimized model to "
<<
FLAGS_optimized_model
;
predictor
.
SaveModel
(
FLAGS_optimized_model
);
}
#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
...
...
@@ -130,6 +131,9 @@ USE_LITE_OP(square)
USE_LITE_OP
(
softmax
)
USE_LITE_OP
(
dropout
)
USE_LITE_OP
(
concat
)
USE_LITE_OP
(
conv2d
)
USE_LITE_OP
(
depthwise_conv2d
)
USE_LITE_OP
(
pool2d
)
USE_LITE_KERNEL
(
feed
,
kHost
,
kAny
,
kAny
,
def
);
USE_LITE_KERNEL
(
fetch
,
kHost
,
kAny
,
kAny
,
def
);
...
...
@@ -144,6 +148,9 @@ USE_LITE_KERNEL(elementwise_add, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL
(
softmax
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
dropout
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
concat
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
conv2d
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
depthwise_conv2d
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
pool2d
,
kX86
,
kFloat
,
kNCHW
,
def
);
#endif
#ifdef LITE_WITH_CUDA
...
...
paddle/fluid/lite/api/light_api.h
浏览文件 @
c353397d
...
...
@@ -64,7 +64,7 @@ class LightPredictor {
private:
void
BuildRuntimeProgram
(
const
framework
::
proto
::
ProgramDesc
&
prog
)
{
std
::
vector
<
Instruct
>
insts
;
std
::
vector
<
Instruct
ion
>
insts
;
// 1. Create op first
Program
program
(
prog
,
scope_
,
{});
...
...
@@ -72,7 +72,7 @@ class LightPredictor {
// Create the kernels of the target places, and filter out the specific
// kernel with the target alias.
for
(
auto
&
op
:
program
.
ops
)
{
for
(
auto
&
op
:
program
.
ops
_
)
{
lite
::
pb
::
OpDesc
desc
(
op
->
op_info
()
->
desc
());
auto
kernel_type
=
desc
.
GetAttr
(
kKernelTypeAttr
).
get
<
std
::
string
>
();
std
::
string
op_type
,
alias
;
...
...
@@ -89,8 +89,8 @@ class LightPredictor {
insts
.
emplace_back
(
op
,
std
::
move
(
*
it
));
}
program_
.
reset
(
new
RuntimeProgram
(
std
::
move
(
insts
)));
CHECK
(
program
.
exec_scope
);
program_
->
set_exec_scope
(
program
.
exec_scope
);
CHECK
(
program
.
exec_scope
_
);
program_
->
set_exec_scope
(
program
.
exec_scope
_
);
}
private:
...
...
paddle/fluid/lite/arm/CMakeLists.txt
浏览文件 @
c353397d
add_subdirectory
(
math
)
paddle/fluid/lite/arm/math/CMakeLists.txt
浏览文件 @
c353397d
if
(
NOT
(
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM
))
return
()
endif
()
if
(
NOT
(
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM
))
return
()
...
...
paddle/fluid/lite/core/CMakeLists.txt
浏览文件 @
c353397d
...
...
@@ -8,7 +8,7 @@ lite_cc_library(target_wrapper_lite SRCS target_wrapper.cc
lite_cc_library
(
memory_lite SRCS memory.cc DEPS target_wrapper_lite
)
lite_cc_library
(
lite_tensor SRCS lite_tensor.cc DEPS memory_lite target_wrapper_lite
)
if
(
NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
cc_library
(
hvy_tensor SRCS hvy_tensor.cc DEPS lod_tensor
)
lite_cc_library
(
hvy_tensor SRCS hvy_tensor.cc DEPS lod_tensor HVY_DEPS framework_proto
)
endif
()
if
(
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
...
...
@@ -19,19 +19,18 @@ endif()
proto_library
(
framework_proto_lite SRCS framework.proto
)
cc_library
(
kernel_lite SRCS kernel.cc DEPS type_system target_wrapper_lite any_lite op_params_lite framework_proto_lite
)
cc_library
(
kernel_lite SRCS kernel.cc DEPS type_system target_wrapper_lite any_lite op_params_lite framework_proto_lite
${
tensor_lite
}
)
cc_library
(
variable_lite SRCS variable.cc
)
cc_library
(
op_registry_lite SRCS op_registry.cc DEPS framework_proto_lite
)
cc_library
(
scope_lite SRCS scope.cc DEPS
${
tensor_lite
}
)
cc_library
(
cpu_info_lite SRCS cpu_info.cc
)
cc_library
(
context_lite SRCS context.cc DEPS
${
tensor_lite
}
any_lite cpu_info_lite
)
cc_library
(
op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapper_lite
cpp_op_desc_lite
${
tensor_lite
}
)
cpp_op_desc_lite
${
tensor_lite
}
)
cc_library
(
types_lite SRCS types.cc
)
cc_library
(
type_system SRCS type_system.cc DEPS
${
tensor_lite
}
target_wrapper_lite
)
cc_library
(
program_lite SRCS program.cc DEPS op_lite kernel_lite
)
lite_cc_library
(
program_lite SRCS program.cc DEPS op_lite kernel_lite compatible_pb_lite model_parser_lite HVY_DEPS framework_proto
)
cc_library
(
optimizer_lite SRCS optimizer.cc DEPS mir_pass_manager model_parser_lite program_lite
)
add_subdirectory
(
mir
)
...
...
@@ -57,4 +56,3 @@ lite_cc_test(test_type_system SRCS type_system_test.cc DEPS type_system utils_li
lite_cc_test
(
test_types_lite SRCS types_test.cc DEPS types_lite
)
lite_cc_test
(
test_memory_lite SRCS memory_test.cc DEPS memory_lite
)
lite_cc_test
(
test_context_lite SRCS context_test.cc DEPS context_lite X86_DEPS operator
)
paddle/fluid/lite/core/context.h
浏览文件 @
c353397d
...
...
@@ -173,6 +173,11 @@ class Context<TargetType::kX86> {
new
::
paddle
::
framework
::
ExecutionContext
(
*
x86_device_context_
));
}
Context
(
Context
&&
ctx
)
{
x86_device_context_
=
std
::
move
(
ctx
.
x86_device_context_
);
x86_execution_context_
=
std
::
move
(
ctx
.
x86_execution_context_
);
}
// NOTE: InitOnce should only be used by ContextScheduler
void
InitOnce
()
{}
...
...
paddle/fluid/lite/core/hvy_tensor.h
浏览文件 @
c353397d
...
...
@@ -21,6 +21,7 @@
#pragma once
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/lite/core/target_wrapper.h"
#include "paddle/fluid/lite/core/tensor.h"
namespace
paddle
{
...
...
@@ -65,6 +66,14 @@ class TensorHvy : public TensorBase<TensorHvy> {
using
DDimT
=
DDimHvy
;
using
LoDT
=
framework
::
LoD
;
template
<
typename
DType
,
typename
DimT
,
TargetType
Target
>
void
Assign
(
DType
*
data
,
const
DimT
&
dim
)
{
Resize
(
dim
);
auto
*
dst
=
mutable_data
<
DType
>
(
Target
);
CopySync
<
Target
>
(
dst
,
data
,
dim
.
production
()
*
sizeof
(
DType
),
IoDirection
::
HtoD
);
}
TargetType
target
()
const
{
if
(
platform
::
is_gpu_place
(
data_
.
place
()))
{
return
TARGET
(
kCUDA
);
...
...
@@ -95,13 +104,15 @@ class TensorHvy : public TensorBase<TensorHvy> {
const
void
*
raw_data
()
const
{
return
data_
.
raw_data
();
}
void
Resize
(
const
DDimHvy
&
dims
)
{
LOG
(
INFO
)
<<
"dims.size "
<<
dims
.
size
();
data_
.
Resize
(
framework
::
make_ddim
(
dims
.
Vectorize
()));
}
void
ShareDataWith
(
const
TensorHvy
&
other
)
{
data_
.
ShareDataWith
(
other
.
data_
);
}
void
ShareDataWith
(
const
framework
::
Tensor
&
other
)
{
data_
.
ShareDataWith
(
other
);
}
void
CopyDataFrom
(
const
TensorHvy
&
other
)
{
data_
.
mutable_data
(
other
.
data_
.
place
(),
other
.
data_
.
type
());
TensorCopySync
(
other
.
data_
,
data_
.
place
(),
&
data_
);
...
...
paddle/fluid/lite/core/kernel.h
浏览文件 @
c353397d
...
...
@@ -150,7 +150,7 @@ class KernelBase {
void
Torch
()
{}
protected:
std
::
unique_ptr
<
KernelContext
>
ctx_
;
std
::
unique_ptr
<
KernelContext
>
ctx_
{
nullptr
}
;
mutable
operators
::
param_t
param_
;
// The corresponding op type.
std
::
string
op_type_
{};
...
...
paddle/fluid/lite/core/lite_tensor.h
浏览文件 @
c353397d
...
...
@@ -61,6 +61,14 @@ class TensorLite : public TensorBase<TensorLite> {
TensorLite
()
:
buffer_
(
std
::
make_shared
<
Buffer
>
())
{}
template
<
typename
DType
,
typename
DimT
,
TargetType
Target
>
void
Assign
(
DType
*
data
,
const
DimT
&
dim
)
{
Resize
(
dim
);
auto
*
dst
=
mutable_data
<
DType
>
(
Target
);
CopySync
<
Target
>
(
dst
,
data
,
dim
.
product
()
*
sizeof
(
DType
),
IoDirection
::
HtoD
);
}
template
<
typename
T
>
const
T
*
data
()
const
{
return
static_cast
<
const
T
*>
(
buffer_
->
data
());
...
...
paddle/fluid/lite/core/mir/CMakeLists.txt
浏览文件 @
c353397d
...
...
@@ -28,28 +28,34 @@ cc_test(test_ssa_graph SRCS ssa_graph_test.cc DEPS
mir_pass_manager
program_fake_utils
)
set
(
test_variable_place_infrence_pass_DEPS
mul_op_lite
feed_op_lite
fetch_op_lite
io_copy_op_lite
${
host_kernels
}
mir_passes
mir_pass_manager
optimizer_lite
program_fake_utils
target_wrapper_host
)
if
(
LITE_WITH_CUDA
)
set
(
test_variable_place_infrence_pass_DEPS
${
test_variable_place_infrence_pass_DEPS
}
target_wrapper_cuda
kernels_cuda
)
endif
()
cc_test
(
test_variable_place_infrence_pass SRCS variable_place_inference_pass_test.cc DEPS
${
test_variable_place_infrence_pass_DEPS
}
)
# lite_cc_test(test_variable_place_infrence_pass SRCS variable_place_inference_pass_test.cc
# DEPS
# mul_op_lite
# feed_op_lite
# fetch_op_lite
# io_copy_op_lite
# ${host_kernels}
# mir_passes
# mir_pass_manager
# optimizer_lite
# program_fake_utils
# target_wrapper_host
# PROFILE_DEPS basic_profiler_lite
# CUDA_DEPS target_wrapper_cuda kernels_cuda
# ARM_DEPS mul_compute_arm
# X86_DEPS mul_compute_x86
# )
cc_library
(
pattern_matcher_lite SRCS pattern_matcher.cc DEPS mir_node mir_ssa_graph op_lite
)
cc_test
(
test_pattern_matcher_lite SRCS pattern_matcher_tester.cc DEPS pattern_matcher_lite
)
lite_cc_library
(
pattern_matcher_lite SRCS pattern_matcher.cc DEPS mir_node mir_ssa_graph op_lite
)
lite_cc_test
(
test_pattern_matcher_lite SRCS pattern_matcher_test.cc DEPS pattern_matcher_lite
)
lite_cc_library
(
pattern_matcher_high_api SRCS pattern_matcher_high_api.cc DEPS pattern_matcher_lite
)
# TODO(wz) replace framework/proto to lite proto.
if
(
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
# it depends on the fluid/framework/proto, that is too heavy for mobile execution.
lite_cc_test
(
test_pattern_matcher_high_api SRCS pattern_matcher_high_api_test.cc DEPS
pattern_matcher_high_api proto_desc mir_pass_manager fc_op_lite mul_op_lite elementwise_ops_lite
mir_passes compatible_pb_lite program_lite
${
ops_lite
}
)
endif
()
paddle/fluid/lite/core/mir/generate_program_pass.h
浏览文件 @
c353397d
...
...
@@ -41,7 +41,7 @@ class GenerateProgramPass : public ProgramPass {
}
private:
std
::
vector
<
Instruct
>
insts_
;
std
::
vector
<
Instruct
ion
>
insts_
;
};
}
// namespace mir
...
...
paddle/fluid/lite/core/mir/pass_manager.cc
浏览文件 @
c353397d
...
...
@@ -16,10 +16,6 @@
namespace
paddle
{
namespace
lite
{
namespace
mir
{
PassManager
::
PassManager
()
{}
}
// namespace mir
namespace
mir
{}
// namespace mir
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/core/mir/pass_manager.h
浏览文件 @
c353397d
...
...
@@ -30,7 +30,7 @@ class PassManager {
return
x
;
}
PassManager
()
;
PassManager
()
{}
void
Run
(
const
std
::
unique_ptr
<
SSAGraph
>&
graph
)
{
for
(
auto
&
pass
:
passes_
)
{
...
...
paddle/fluid/lite/core/mir/pattern_matcher.cc
浏览文件 @
c353397d
...
...
@@ -27,6 +27,30 @@ namespace mir {
size_t
PMPattern
::
id_
=
0UL
;
PMNode
&
PMNode
::
operator
>>
(
PMNode
&
right
)
{
pattern_
->
AddEdge
(
this
,
&
right
);
// automatically add out op link relation.
if
(
right
.
IsOp
())
{
CHECK
(
!
right
.
op_type_
.
empty
());
this
->
assert_is_op_input
(
right
.
op_type_
);
}
return
right
;
}
PMNode
&
PMNode
::
operator
>>
(
std
::
vector
<
PMNode
*>
&
nodes
)
{
for
(
auto
*
node
:
nodes
)
{
*
this
>>
*
node
;
}
return
*
this
;
}
void
operator
>>
(
std
::
vector
<
PMNode
*>
&
others
,
PMNode
&
me
)
{
for
(
auto
*
o
:
others
)
{
*
o
>>
me
;
}
}
PMNode
*
PMPattern
::
NewNode
(
const
std
::
string
&
name
)
{
if
(
!
name
.
empty
())
{
CHECK_EQ
(
node_map_
.
count
(
name
),
0UL
)
...
...
@@ -122,9 +146,7 @@ void PatternMatcher::ValidateByNodeRole(
// Collect the inlinks and outlinks.
std
::
unordered_set
<
Node
*>
ios
;
for
(
auto
&
item
:
subgraph
)
{
if
(
!
item
.
first
->
IsIntermediate
())
{
ios
.
insert
(
item
.
second
);
}
ios
.
insert
(
item
.
second
);
}
for
(
auto
&
item
:
subgraph
)
{
if
(
item
.
first
->
IsIntermediate
())
{
...
...
@@ -400,6 +422,30 @@ PMNode *PMNode::assert_is_op_input(const std::string &op_type) {
return
this
;
}
void
GraphSafeRemoveNodes
(
SSAGraph
*
graph
,
const
std
::
unordered_set
<
const
Node
*>
&
nodes
)
{
for
(
auto
*
node
:
nodes
)
{
graph
->
RemoveNode
(
node
);
}
for
(
auto
&
node
:
graph
->
mutable_nodes
())
{
for
(
auto
it
=
node
.
inlinks
.
begin
();
it
!=
node
.
inlinks
.
end
();)
{
if
(
nodes
.
count
(
*
it
))
{
it
=
node
.
inlinks
.
erase
(
it
);
}
else
{
it
++
;
}
}
for
(
auto
it
=
node
.
outlinks
.
begin
();
it
!=
node
.
outlinks
.
end
();)
{
if
(
nodes
.
count
(
*
it
))
{
it
=
node
.
outlinks
.
erase
(
it
);
}
else
{
it
++
;
}
}
}
}
}
// namespace mir
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/core/mir/pattern_matcher.h
浏览文件 @
c353397d
...
...
@@ -58,6 +58,15 @@ struct PMNode {
PMNode
&
LinksTo
(
const
std
::
vector
<
PMNode
*>&
others
);
PMNode
&
LinksFrom
(
const
std
::
vector
<
PMNode
*>&
others
);
// Link this to another node.
PMNode
&
operator
>>
(
PMNode
&
right
);
// Link many nodes to this node.
friend
void
operator
>>
(
std
::
vector
<
PMNode
*>&
others
,
PMNode
&
me
);
// Link this to many other nodes.
PMNode
&
operator
>>
(
std
::
vector
<
PMNode
*>&
nodes
);
bool
Tell
(
const
Node
*
node
)
const
{
if
(
teller_
)
return
teller_
(
node
);
...
...
@@ -92,6 +101,20 @@ struct PMNode {
return
this
;
}
PMNode
*
AsVar
()
{
type_
=
Type
::
kVar
;
assert_is_var
();
return
this
;
}
PMNode
*
AsOp
(
const
std
::
string
&
op_type
)
{
type_
=
Type
::
kOp
;
assert_is_op
(
op_type
);
return
this
;
}
void
set_op_type
(
const
std
::
string
&
op_type
)
{
op_type_
=
op_type
;
}
bool
IsIntermediate
()
const
{
return
role_
==
Role
::
kIntermediate
;
}
bool
IsInput
()
const
{
return
role_
==
Role
::
kInput
;
}
bool
IsOutput
()
const
{
return
role_
==
Role
::
kOutput
;
}
...
...
@@ -141,6 +164,7 @@ struct PMNode {
std
::
vector
<
teller_t
>
asserts_
;
PMPattern
*
pattern_
;
std
::
string
name_
;
std
::
string
op_type_
;
Type
type_
;
Role
role_
{
Role
::
kUnknown
};
};
...
...
@@ -273,6 +297,10 @@ class PatternMatcher {
std
::
unordered_map
<
const
PMNode
*
,
std
::
unordered_set
<
Node
*>>
pmnodes2nodes_
;
};
// Graph safely remove some nodes, will automatically clean up the edges.
void
GraphSafeRemoveNodes
(
SSAGraph
*
graph
,
const
std
::
unordered_set
<
const
Node
*>&
nodes
);
// Some pre-defined patterns those can be reused in multiple passes.
// The related Fluid Layer or Op should be one pattern here for better re-usage
// across different fusion.
...
...
paddle/fluid/lite/core/mir/ssa_graph.cc
浏览文件 @
c353397d
...
...
@@ -94,7 +94,7 @@ std::vector<mir::Node *> SSAGraph::StmtTopologicalOrder() {
}
void
SSAGraph
::
GraphCreateTmpVarNodes
(
const
Program
&
program
)
{
for
(
const
auto
&
name
:
program
.
tmp_vars
)
{
for
(
const
auto
&
name
:
program
.
tmp_vars
()
)
{
CHECK
(
!
arguments_
.
count
(
name
))
<<
"duplicate creating temp variable: "
<<
name
;
VLOG
(
5
)
<<
"create arg node "
<<
name
;
...
...
@@ -107,7 +107,7 @@ void SSAGraph::GraphCreateTmpVarNodes(const Program &program) {
void
SSAGraph
::
GraphCreateWeightVarNodes
(
const
Program
&
program
)
{
// create weight nodes.
for
(
const
auto
&
name
:
program
.
weights
)
{
for
(
const
auto
&
name
:
program
.
weights
()
)
{
CHECK
(
!
arguments_
.
count
(
name
))
<<
"duplicate creating weight variable: "
<<
name
;
VLOG
(
5
)
<<
"create arg node "
<<
name
;
...
...
@@ -119,8 +119,7 @@ void SSAGraph::GraphCreateWeightVarNodes(const Program &program) {
}
Node
*
SSAGraph
::
GraphCreateInstructNode
(
const
Program
&
program
,
const
std
::
shared_ptr
<
OpLite
>
&
op
,
const
std
::
vector
<
Place
>
&
valid_places
)
{
const
std
::
shared_ptr
<
OpLite
>
&
op
,
const
std
::
vector
<
Place
>
&
valid_places
)
{
node_storage_
.
emplace_back
();
// TODO(Superjomn) remove one valid_places here.
op
->
SetValidPlaces
(
valid_places
);
...
...
@@ -140,8 +139,8 @@ void SSAGraph::Build(const Program &program,
GraphCreateWeightVarNodes
(
program
);
CHECK
(
CheckNodesRoleSet
());
for
(
auto
&
op
:
program
.
ops
)
{
auto
*
op_node
=
GraphCreateInstructNode
(
program
,
op
,
valid_places
);
for
(
auto
&
op
:
program
.
ops
()
)
{
auto
*
op_node
=
GraphCreateInstructNode
(
op
,
valid_places
);
for
(
const
std
::
string
&
name
:
op
->
op_info
()
->
input_names
())
{
auto
*
arg
=
Argument
(
name
);
CHECK
(
arg
->
IsRoleSet
());
...
...
@@ -162,6 +161,13 @@ void SSAGraph::Build(const Program &program,
CheckValid
();
}
void
SSAGraph
::
RemoveNode
(
const
mir
::
Node
*
node
)
{
auto
pos
=
std
::
find_if
(
node_storage_
.
begin
(),
node_storage_
.
end
(),
[
&
node
](
mir
::
Node
&
n
)
{
return
&
n
==
node
;
});
CHECK
(
pos
!=
node_storage_
.
end
());
node_storage_
.
erase
(
pos
);
}
mir
::
Node
*
SSAGraph
::
Argument
(
const
std
::
string
&
name
)
{
auto
it
=
arguments_
.
find
(
name
);
CHECK
(
it
!=
arguments_
.
end
())
<<
"no argument called "
<<
name
;
...
...
paddle/fluid/lite/core/mir/ssa_graph.h
浏览文件 @
c353397d
...
...
@@ -38,6 +38,7 @@ class SSAGraph : GraphBase {
// @param program: the op program
// @param valid_places: the valid places user set for the system.
void
Build
(
const
Program
&
program
,
const
std
::
vector
<
Place
>
&
valid_places
);
void
RemoveNode
(
const
mir
::
Node
*
node
);
mir
::
Node
*
Argument
(
const
std
::
string
&
name
);
...
...
@@ -63,12 +64,12 @@ class SSAGraph : GraphBase {
CHECK
(
CheckLinksRoleSet
());
}
Node
*
GraphCreateInstructNode
(
const
std
::
shared_ptr
<
OpLite
>
&
op
,
const
std
::
vector
<
Place
>
&
valid_places
);
private:
void
GraphCreateTmpVarNodes
(
const
Program
&
program
);
void
GraphCreateWeightVarNodes
(
const
Program
&
program
);
Node
*
GraphCreateInstructNode
(
const
Program
&
program
,
const
std
::
shared_ptr
<
OpLite
>
&
op
,
const
std
::
vector
<
Place
>
&
valid_places
);
// Check the bidirectional connection.
bool
CheckBidirectionalConnection
();
...
...
@@ -77,7 +78,7 @@ class SSAGraph : GraphBase {
bool
CheckLinksRoleSet
();
void
MarkArgumentWeights
(
const
Program
&
program
)
{
for
(
const
auto
&
name
:
program
.
weights
)
{
for
(
const
auto
&
name
:
program
.
weights
()
)
{
arguments_
[
name
]
->
AsArg
().
is_weight
=
true
;
}
}
...
...
paddle/fluid/lite/core/mir/static_kernel_pick_pass.cc
浏览文件 @
c353397d
...
...
@@ -37,6 +37,8 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
if
(
!
node
.
IsStmt
())
continue
;
auto
&
instruct
=
node
.
AsStmt
();
std
::
vector
<
std
::
pair
<
size_t
,
std
::
unique_ptr
<
KernelBase
>>>
scored
;
CHECK
(
!
instruct
.
valid_kernels
.
empty
())
<<
"No kernels found for "
<<
instruct
.
op_type
;
for
(
auto
&&
kernel
:
instruct
.
valid_kernels
)
{
size_t
score
=
KernelGrade
(
*
kernel
);
scored
.
emplace_back
(
score
,
std
::
move
(
kernel
));
...
...
paddle/fluid/lite/core/mir/variable_place_inference_pass_test.cc
浏览文件 @
c353397d
...
...
@@ -42,6 +42,12 @@ TEST(variable_place_inference_pass, test) {
Place
{
TARGET
(
kCUDA
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
),
},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
),
},
Place
{
TARGET
(
kX86
),
PRECISION
(
kAny
),
DATALAYOUT
(
kAny
),
},
});
Program
program
(
*
desc
->
Proto
(),
scope
,
places
);
...
...
@@ -58,7 +64,15 @@ TEST(variable_place_inference_pass, test) {
});
Place
prefered_place
{
#ifdef PADDLE_WITH_CUDA
TARGET
(
kCUDA
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
),
#else
#ifdef PADDLE_WITH_ARM
TARGET
(
kARM
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
),
#else // X86
TARGET
(
kX86
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
),
#endif // ARM
#endif
};
optimizer
.
KernelPickPreferPlace
(
prefered_place
);
optimizer
.
Run
(
std
::
move
(
program
),
places
,
factor
,
passes
);
...
...
@@ -72,3 +86,16 @@ USE_LITE_OP(mul);
USE_LITE_OP
(
feed
);
USE_LITE_OP
(
fetch
);
USE_LITE_OP
(
io_copy
);
#ifdef LITE_WITH_X86
USE_LITE_KERNEL
(
mul
,
kX86
,
kFloat
,
kNCHW
,
def
);
#endif
#ifdef LITE_WITH_ARM
USE_LITE_KERNEL
(
mul
,
kARM
,
kFloat
,
kNCHW
,
def
);
#endif
#ifdef LITE_WITH_CUDA
USE_LITE_KERNEL
(
io_copy
,
kCUDA
,
kAny
,
kAny
,
host_to_device
);
USE_LITE_KERNEL
(
io_copy
,
kCUDA
,
kAny
,
kAny
,
device_to_host
);
#endif
paddle/fluid/lite/core/op_lite.cc
浏览文件 @
c353397d
...
...
@@ -28,15 +28,23 @@ std::vector<std::unique_ptr<KernelBase>> OpLite::CreateKernels(
CHECK
(
!
op_type_
.
empty
())
<<
"op_type_ should be set first"
;
auto
pick_kernel
=
[
&
](
const
Place
&
place
)
{
auto
ks
=
KernelRegistry
::
Global
().
Create
(
(
kernel_type
.
empty
()
?
op_type_
:
kernel_type
),
place
.
target
,
place
.
precision
,
place
.
layout
);
auto
ks
=
KernelRegistry
::
Global
().
Create
(
op_type_
,
place
.
target
,
place
.
precision
,
place
.
layout
);
for
(
auto
&&
it
:
ks
)
{
AttachKernel
(
it
.
get
());
kernels
.
emplace_back
(
std
::
move
(
it
));
}
};
if
(
!
kernel_type
.
empty
())
{
Place
place
;
std
::
string
op_type
,
alias
;
KernelBase
::
ParseKernelType
(
kernel_type
,
&
op_type
,
&
alias
,
&
place
);
pick_kernel
(
place
);
CHECK
(
!
kernels
.
empty
())
<<
"no kernel for kernel type "
<<
kernel_type
;
return
kernels
;
}
std
::
set
<
Place
>
place_set
;
for
(
auto
place
:
places
)
{
place_set
.
insert
(
place
);
...
...
@@ -53,7 +61,7 @@ std::vector<std::unique_ptr<KernelBase>> OpLite::CreateKernels(
targets
.
insert
(
place
.
target
);
}
CHECK
(
!
kernels
.
empty
())
<<
"No kernel found for Op "
<<
op_type_
;
//
CHECK(!kernels.empty()) << "No kernel found for Op " << op_type_;
VLOG
(
2
)
<<
"op "
<<
op_type_
<<
" get "
<<
kernels
.
size
()
<<
" kernels"
;
return
kernels
;
}
...
...
paddle/fluid/lite/core/op_lite.h
浏览文件 @
c353397d
...
...
@@ -147,7 +147,7 @@ class OpLite : public Registry {
class
OpInfo
:
public
cpp
::
OpDesc
{
public:
OpInfo
(
const
OpInfo
&
)
=
default
;
OpInfo
(
const
cpp
::
OpDesc
&
other
)
:
cpp
::
OpDesc
(
other
)
{}
explicit
OpInfo
(
const
cpp
::
OpDesc
&
other
)
:
cpp
::
OpDesc
(
other
)
{}
// Collect all the input variable's name.
std
::
vector
<
std
::
string
>
input_names
()
const
{
...
...
paddle/fluid/lite/core/optimizer.h
浏览文件 @
c353397d
...
...
@@ -64,7 +64,7 @@ class Optimizer {
RunPasses
(
passes
);
}
#endif
exec_scope_
=
program
.
exec_scope
;
exec_scope_
=
program
.
exec_scope
()
;
}
void
KernelPickPreferPlace
(
const
Place
&
place
)
{
...
...
paddle/fluid/lite/core/profile/CMakeLists.txt
浏览文件 @
c353397d
...
...
@@ -4,4 +4,3 @@ endif()
lite_cc_library
(
basic_profiler_lite SRCS basic_profiler.cc
)
lite_cc_test
(
test_basic_profiler SRCS basic_profiler_test.cc DEPS basic_profiler_lite
)
paddle/fluid/lite/core/program.cc
浏览文件 @
c353397d
...
...
@@ -62,5 +62,45 @@ void RuntimeProgram::SaveParams(const std::string &dir,
}
}
void
Program
::
Build
(
const
framework
::
proto
::
ProgramDesc
&
program
)
{
CHECK
(
ops_
.
empty
())
<<
"Executor duplicate Build found"
;
// Create operators.
for
(
const
auto
&
proto_op_desc
:
program
.
blocks
(
0
).
ops
())
{
lite
::
OpDesc
op_desc_dummy
(
proto_op_desc
);
cpp
::
OpDesc
op_desc
;
TransformOpDescPbToCpp
(
op_desc_dummy
,
&
op_desc
);
auto
op_type
=
op_desc
.
Type
();
// if (op_type == "feed" || op_type == "fetch") continue;
VLOG
(
4
)
<<
"create Op ["
<<
op_type
<<
"]"
;
LOG
(
INFO
)
<<
"create Op ["
<<
op_type
<<
"]"
;
auto
op
=
LiteOpRegistry
::
Global
().
Create
(
op_type
);
CHECK
(
op
)
<<
"no Op found for "
<<
op_type
;
ops_
.
emplace_back
(
std
::
move
(
op
));
ops_
.
back
()
->
Attach
(
op_desc
,
exec_scope_
);
}
}
void
Program
::
PrepareWorkspace
(
const
framework
::
proto
::
ProgramDesc
&
program
)
{
CHECK
(
!
exec_scope_
)
<<
"Duplicate PrepareWorkspace found"
;
exec_scope_
=
&
scope_
->
NewScope
();
// Create Feed and Fetch var.
scope_
->
Var
(
"feed"
)
->
GetMutable
<
std
::
vector
<
lite
::
Tensor
>>
();
scope_
->
Var
(
"fetch"
)
->
GetMutable
<
std
::
vector
<
lite
::
Tensor
>>
();
tmp_vars_
.
push_back
(
"feed"
);
tmp_vars_
.
push_back
(
"fetch"
);
CHECK
(
!
program
.
blocks
().
empty
());
for
(
auto
proto_var_desc
:
program
.
blocks
(
0
).
vars
())
{
lite
::
VarDesc
var_desc
(
proto_var_desc
);
if
(
!
var_desc
.
Persistable
())
{
tmp_vars_
.
push_back
(
var_desc
.
Name
());
exec_scope_
->
Var
(
var_desc
.
Name
());
}
else
{
if
(
var_desc
.
Name
()
==
"feed"
||
var_desc
.
Name
()
==
"fetch"
)
continue
;
weights_
.
push_back
(
var_desc
.
Name
());
}
}
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/core/program.h
浏览文件 @
c353397d
...
...
@@ -37,79 +37,54 @@ static const char kKernelTypeAttr[] = "__@kernel_type_attr@__";
// - main block, which is a list of OpLite
// - scope: which contains all the weights
struct
Program
{
std
::
list
<
std
::
string
>
tmp_vars
;
std
::
list
<
std
::
string
>
weights
;
std
::
list
<
std
::
shared_ptr
<
OpLite
>>
ops
;
// the scope to run the kernels, NOTE this is the execution scope.
std
::
shared_ptr
<
lite
::
Scope
>
scope
;
std
::
vector
<
Place
>
valid_places
;
// Runtime scope.
lite
::
Scope
*
exec_scope
{};
const
framework
::
proto
::
ProgramDesc
desc
;
explicit
Program
(
const
std
::
shared_ptr
<
Scope
>&
root
)
{
scope
=
root
;
}
public:
explicit
Program
(
const
std
::
shared_ptr
<
Scope
>&
root
)
{
scope_
=
root
;
}
Program
(
const
framework
::
proto
::
ProgramDesc
&
desc
,
const
std
::
shared_ptr
<
Scope
>&
root
,
const
std
::
vector
<
Place
>&
valid_places
)
:
scope
(
root
),
valid_places
(
valid_places
),
desc
(
desc
)
{
CHECK
(
scope
)
<<
"scope should be init first"
;
:
scope
_
(
root
),
valid_places_
(
valid_places
),
desc_
(
desc
)
{
CHECK
(
scope
_
)
<<
"scope should be init first"
;
PrepareWorkspace
(
desc
);
Build
(
desc
);
}
std
::
unique_ptr
<
Program
>
Clone
()
const
{
std
::
unique_ptr
<
Program
>
res
(
new
Program
(
desc
,
scope
,
valid_places
));
std
::
unique_ptr
<
Program
>
res
(
new
Program
(
desc
_
,
scope_
,
valid_places_
));
return
res
;
}
const
std
::
list
<
std
::
string
>&
weights
()
const
{
return
weights_
;
}
const
std
::
list
<
std
::
string
>&
tmp_vars
()
const
{
return
tmp_vars_
;
}
std
::
list
<
std
::
string
>*
mutable_weights
()
{
return
&
weights_
;
}
std
::
list
<
std
::
string
>*
mutable_tmp_vars
()
{
return
&
tmp_vars_
;
}
const
std
::
list
<
std
::
shared_ptr
<
OpLite
>>&
ops
()
const
{
return
ops_
;
}
std
::
list
<
std
::
shared_ptr
<
OpLite
>>*
mutable_ops
()
{
return
&
ops_
;
}
lite
::
Scope
*
exec_scope
()
{
return
exec_scope_
;
}
lite
::
Scope
*
scope
()
{
return
scope_
.
get
();
}
private:
// Build from a program and scope.
void
Build
(
const
framework
::
proto
::
ProgramDesc
&
program
)
{
CHECK
(
ops
.
empty
())
<<
"Executor duplicate Build found"
;
// Create operators.
for
(
const
auto
&
proto_op_desc
:
program
.
blocks
(
0
).
ops
())
{
pb
::
OpDesc
op_desc
(
proto_op_desc
);
auto
op_type
=
op_desc
.
Type
();
// if (op_type == "feed" || op_type == "fetch") continue;
VLOG
(
4
)
<<
"create Op ["
<<
op_type
<<
"]"
;
LOG
(
INFO
)
<<
"create Op ["
<<
op_type
<<
"]"
;
auto
op
=
LiteOpRegistry
::
Global
().
Create
(
op_type
);
CHECK
(
op
)
<<
"no Op found for "
<<
op_type
;
ops
.
emplace_back
(
std
::
move
(
op
));
cpp
::
OpDesc
cpp_op_desc
;
TransformOpDescPbToCpp
(
op_desc
,
&
cpp_op_desc
);
ops
.
back
()
->
Attach
(
cpp_op_desc
,
exec_scope
);
}
}
void
Build
(
const
framework
::
proto
::
ProgramDesc
&
program
);
// Create temporary variables.
void
PrepareWorkspace
(
const
framework
::
proto
::
ProgramDesc
&
program
)
{
CHECK
(
!
exec_scope
)
<<
"Duplicate PrepareWorkspace found"
;
exec_scope
=
&
scope
->
NewScope
();
// Create Feed and Fetch var.
scope
->
Var
(
"feed"
)
->
GetMutable
<
std
::
vector
<
lite
::
Tensor
>>
();
scope
->
Var
(
"fetch"
)
->
GetMutable
<
std
::
vector
<
lite
::
Tensor
>>
();
tmp_vars
.
push_back
(
"feed"
);
tmp_vars
.
push_back
(
"fetch"
);
CHECK
(
!
program
.
blocks
().
empty
());
for
(
auto
proto_var_desc
:
program
.
blocks
(
0
).
vars
())
{
lite
::
VarDesc
var_desc
(
proto_var_desc
);
if
(
!
var_desc
.
Persistable
())
{
tmp_vars
.
push_back
(
var_desc
.
Name
());
exec_scope
->
Var
(
var_desc
.
Name
());
}
else
{
if
(
var_desc
.
Name
()
==
"feed"
||
var_desc
.
Name
()
==
"fetch"
)
continue
;
weights
.
push_back
(
var_desc
.
Name
());
}
}
}
void
PrepareWorkspace
(
const
framework
::
proto
::
ProgramDesc
&
program
);
private:
std
::
list
<
std
::
string
>
tmp_vars_
;
std
::
list
<
std
::
string
>
weights_
;
std
::
list
<
std
::
shared_ptr
<
OpLite
>>
ops_
;
// the scope to run the kernels, NOTE this is the execution scope.
std
::
shared_ptr
<
lite
::
Scope
>
scope_
;
std
::
vector
<
Place
>
valid_places_
;
// Runtime scope.
lite
::
Scope
*
exec_scope_
{};
const
framework
::
proto
::
ProgramDesc
desc_
;
};
struct
Instruct
{
Instruct
(
const
std
::
shared_ptr
<
OpLite
>&
op
,
std
::
unique_ptr
<
KernelBase
>&&
kernel
)
struct
Instruct
ion
{
Instruct
ion
(
const
std
::
shared_ptr
<
OpLite
>&
op
,
std
::
unique_ptr
<
KernelBase
>&&
kernel
)
:
op_
(
op
),
kernel_
(
std
::
move
(
kernel
))
{
#ifdef LITE_WITH_PROFILE
profile_id_
=
profile
::
BasicProfiler
<
profile
::
BasicTimer
>::
Global
()
...
...
@@ -132,7 +107,7 @@ struct Instruct {
kernel_
->
Launch
();
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
Instruct
&
other
)
{
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
Instruct
ion
&
other
)
{
os
<<
other
.
kernel_
->
summary
()
<<
"
\t
("
<<
other
.
kernel_
->
doc
()
<<
")"
;
return
os
;
}
...
...
@@ -156,7 +131,7 @@ struct Instruct {
*/
class
RuntimeProgram
{
public:
explicit
RuntimeProgram
(
std
::
vector
<
Instruct
>&&
insts
)
explicit
RuntimeProgram
(
std
::
vector
<
Instruct
ion
>&&
insts
)
:
instructions_
(
std
::
move
(
insts
))
{
if
(
instructions_
.
empty
())
{
LOG
(
FATAL
)
<<
"no instructions"
;
...
...
@@ -186,7 +161,7 @@ class RuntimeProgram {
private:
RuntimeProgram
(
const
RuntimeProgram
&
)
=
delete
;
std
::
vector
<
Instruct
>
instructions_
;
std
::
vector
<
Instruct
ion
>
instructions_
;
lite
::
Scope
*
exec_scope_
{};
};
...
...
paddle/fluid/lite/core/program_fake_utils.h
浏览文件 @
c353397d
...
...
@@ -33,11 +33,11 @@ Program FakeProgram() {
std
::
string
w1
=
"w"
+
std
::
to_string
(
id
);
std
::
string
b1
=
"b"
+
std
::
to_string
(
id
);
std
::
string
out1
=
"out"
+
std
::
to_string
(
id
);
auto
w1v
=
program
.
scope
->
Var
(
w1
)
->
GetMutable
<
lite
::
Tensor
>
();
auto
b1v
=
program
.
scope
->
Var
(
b1
)
->
GetMutable
<
lite
::
Tensor
>
();
auto
out1v
=
program
.
scope
->
Var
(
out1
)
->
GetMutable
<
lite
::
Tensor
>
();
auto
w1v
=
program
.
scope
()
->
Var
(
w1
)
->
GetMutable
<
lite
::
Tensor
>
();
auto
b1v
=
program
.
scope
()
->
Var
(
b1
)
->
GetMutable
<
lite
::
Tensor
>
();
auto
out1v
=
program
.
scope
()
->
Var
(
out1
)
->
GetMutable
<
lite
::
Tensor
>
();
lite
::
OpDesc
desc
;
cpp
::
OpDesc
desc
;
desc
.
SetInput
(
"Input"
,
{
x
});
desc
.
SetInput
(
"W"
,
{
w1
});
desc
.
SetInput
(
"Bias"
,
{
b1
});
...
...
@@ -46,12 +46,12 @@ Program FakeProgram() {
desc
.
SetAttr
(
"in_num_col_dims"
,
1
);
// add to input
program
.
tmp_vars
.
push_back
(
w1
);
program
.
tmp_vars
.
push_back
(
b1
);
program
.
mutable_tmp_vars
()
->
push_back
(
w1
);
program
.
mutable_tmp_vars
()
->
push_back
(
b1
);
auto
fc_op
=
LiteOpRegistry
::
Global
().
Create
(
"fc"
);
fc_op
->
Attach
(
desc
,
program
.
scope
.
get
());
program
.
ops
.
emplace_back
(
std
::
move
(
fc_op
));
fc_op
->
Attach
(
desc
,
program
.
scope
());
program
.
mutable_ops
()
->
emplace_back
(
std
::
move
(
fc_op
));
w1v
->
Resize
(
DDimHvy
(
std
::
vector
<
int64_t
>
({
100
,
100
})));
b1v
->
Resize
(
DDimHvy
(
std
::
vector
<
int64_t
>
({
100
,
1
})));
...
...
@@ -64,8 +64,8 @@ Program FakeProgram() {
// out1, w2, b2 -fc-> out2
std
::
string
x
=
"x"
;
program
.
tmp_vars
.
push_back
(
x
);
auto
*
xv
=
program
.
scope
->
Var
(
x
)
->
GetMutable
<
lite
::
Tensor
>
();
program
.
mutable_tmp_vars
()
->
push_back
(
x
);
auto
*
xv
=
program
.
scope
()
->
Var
(
x
)
->
GetMutable
<
lite
::
Tensor
>
();
xv
->
Resize
(
DDimHvy
(
std
::
vector
<
int64_t
>
({
100
,
100
})));
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
...
...
paddle/fluid/lite/core/scope.cc
浏览文件 @
c353397d
...
...
@@ -17,7 +17,13 @@
namespace
paddle
{
namespace
lite
{
Scope
::~
Scope
()
{}
Scope
::~
Scope
()
{
for
(
auto
*
x
:
kids_
)
{
if
(
x
)
{
delete
x
;
}
}
}
Scope
&
Scope
::
NewScope
()
const
{
kids_
.
push_back
(
new
Scope
);
...
...
paddle/fluid/lite/core/target_wrapper.h
浏览文件 @
c353397d
...
...
@@ -63,7 +63,8 @@ static const std::string& TargetToStr(TargetType target) {
}
static
const
std
::
string
&
PrecisionToStr
(
PrecisionType
precision
)
{
static
const
std
::
string
precision2string
[]
=
{
"unk"
,
"float"
,
"int8"
,
"any"
};
static
const
std
::
string
precision2string
[]
=
{
"unk"
,
"float"
,
"int8_t"
,
"any"
};
auto
x
=
static_cast
<
int
>
(
precision
);
CHECK_LT
(
x
,
static_cast
<
int
>
(
PRECISION
(
NUM
)));
return
precision2string
[
x
];
...
...
@@ -76,6 +77,29 @@ static const std::string& DataLayoutToStr(DataLayoutType layout) {
return
datalayout2string
[
x
];
}
static
const
std
::
string
&
TargetRepr
(
TargetType
target
)
{
static
const
std
::
string
target2string
[]
=
{
"kUnk"
,
"kHost"
,
"kX86"
,
"kCUDA"
,
"kAny"
};
auto
x
=
static_cast
<
int
>
(
target
);
CHECK_LT
(
x
,
static_cast
<
int
>
(
TARGET
(
NUM
)));
return
target2string
[
x
];
}
static
const
std
::
string
&
PrecisionRepr
(
PrecisionType
precision
)
{
static
const
std
::
string
precision2string
[]
=
{
"kUnk"
,
"kFloat"
,
"kInt8"
,
"kAny"
};
auto
x
=
static_cast
<
int
>
(
precision
);
CHECK_LT
(
x
,
static_cast
<
int
>
(
PRECISION
(
NUM
)));
return
precision2string
[
x
];
}
static
const
std
::
string
&
DataLayoutRepr
(
DataLayoutType
layout
)
{
static
const
std
::
string
datalayout2string
[]
=
{
"kUnk"
,
"kNCHW"
,
"kAny"
};
auto
x
=
static_cast
<
int
>
(
layout
);
CHECK_LT
(
x
,
static_cast
<
int
>
(
DATALAYOUT
(
NUM
)));
return
datalayout2string
[
x
];
}
/*
* Place specifies the execution context of a Kernel or input/output for a
* kernel. It is used to make the analysis of the MIR more clear and accurate.
...
...
@@ -228,5 +252,20 @@ class TargetWrapper<TARGET(kCUDA), cudaStream_t, cudaEvent_t> {
};
#endif // LITE_WITH_CUDA
template
<
TargetType
Target
>
void
CopySync
(
void
*
dst
,
void
*
src
,
size_t
size
,
IoDirection
dir
)
{
switch
(
Target
)
{
case
TARGET
(
kX86
):
case
TARGET
(
kHost
):
case
TARGET
(
kARM
):
TargetWrapperX86
::
MemcpySync
(
dst
,
src
,
size
,
IoDirection
::
HtoH
);
break
;
#ifdef LITE_WITH_CUDA
case
TARGET
(
kCUDA
):
TargetWrapperCuda
::
MemcpySync
(
dst
,
src
,
size
,
dir
);
#endif
}
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/core/tensor.h
浏览文件 @
c353397d
...
...
@@ -21,6 +21,7 @@
* looks the same.
*/
#include <string>
#include <vector>
#include "paddle/fluid/lite/core/target_wrapper.h"
...
...
@@ -47,7 +48,8 @@ class DDimBase {
DDimBase
()
=
default
;
explicit
DDimBase
(
const
std
::
vector
<
int64_t
>
&
x
)
{
self
()
->
ConstructFrom
(
x
);
}
value_type
operator
[](
int
offset
)
const
{
return
(
*
self
())[
offset
];
}
value_type
operator
[](
int
offset
)
const
{
return
(
*
const_self
())[
offset
];
}
value_type
&
operator
[](
int
offset
)
{
return
(
*
self
())[
offset
];
}
std
::
vector
<
int64_t
>
Vectorize
()
const
{
return
self
()
->
Vectorize
();
}
size_t
size
()
const
{
return
const_self
()
->
size
();
}
bool
empty
()
const
{
return
const_self
()
->
empty
();
}
...
...
@@ -73,18 +75,19 @@ class DDimBase {
{
Slice
(
0
,
col
).
production
(),
Slice
(
col
,
size
()).
production
()}));
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
DDimT
&
dims
)
{
if
(
dims
.
empty
())
{
os
<<
"[]"
;
return
os
;
std
::
string
repr
()
const
{
std
::
stringstream
ss
;
ss
<<
"{"
;
for
(
size_t
i
=
0
;
i
<
this
->
size
()
-
1
;
i
++
)
{
ss
<<
(
*
this
)[
i
]
<<
","
;
}
if
(
!
this
->
empty
())
ss
<<
(
*
this
)[
size
()
-
1
];
ss
<<
"}"
;
return
ss
.
str
();
}
os
<<
"["
;
for
(
size_t
i
=
0
;
i
<
dims
.
size
()
-
1
;
i
++
)
{
os
<<
dims
[
i
]
<<
" "
;
}
if
(
!
dims
.
empty
())
os
<<
dims
[
dims
.
size
()
-
1
];
os
<<
"]"
;
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
DDimT
&
dims
)
{
os
<<
dims
.
repr
();
return
os
;
}
...
...
@@ -102,6 +105,12 @@ template <typename TensorT>
class
TensorBase
{
public:
TensorBase
()
=
default
;
template
<
typename
T
,
typename
DimT
>
void
Assign
(
T
*
data
,
const
DimT
&
dim
)
{
self
()
->
Assign
(
data
,
dim
);
}
TargetType
target
()
const
{
return
self
()
->
target
();
}
template
<
typename
T
>
...
...
paddle/fluid/lite/core/variable.h
浏览文件 @
c353397d
...
...
@@ -24,7 +24,7 @@ namespace lite {
class
Variable
{
public:
template
<
typename
T
>
const
T
&
Get
()
{
const
T
&
Get
()
const
{
return
blob_
.
get
<
T
>
();
}
...
...
paddle/fluid/lite/cuda/CMakeLists.txt
浏览文件 @
c353397d
...
...
@@ -4,4 +4,3 @@ endif()
nv_library
(
target_wrapper_cuda SRCS target_wrapper.cc
)
nv_library
(
cuda_blas_lite SRCS blas.cc
)
paddle/fluid/lite/host/CMakeLists.txt
浏览文件 @
c353397d
cc_library
(
target_wrapper_host SRCS target_wrapper.cc
)
paddle/fluid/lite/kernels/CMakeLists.txt
浏览文件 @
c353397d
...
...
@@ -5,4 +5,3 @@ add_subdirectory(arm)
add_subdirectory
(
cuda
)
add_subdirectory
(
x86
)
paddle/fluid/lite/kernels/arm/conv_compute.cc
浏览文件 @
c353397d
...
...
@@ -46,7 +46,6 @@ void ConvCompute::PrepareForRun() {
const
auto
*
b_data
=
param
.
bias
?
param
.
bias
->
data
<
float
>
()
:
nullptr
;
auto
*
o_data
=
param
.
output
->
mutable_data
<
float
>
();
// TODO(xxx): create should be somewhere better!
bool
kps_equal
=
(
param
.
paddings
[
0
]
==
param
.
paddings
[
1
])
&&
(
param
.
strides
[
0
]
==
param
.
strides
[
1
])
&&
(
kw
==
kh
);
bool
no_dilation
=
(
param
.
dilations
[
0
]
==
1
)
&&
(
param
.
dilations
[
1
]
==
1
);
...
...
@@ -60,26 +59,26 @@ void ConvCompute::PrepareForRun() {
if
(
param
.
groups
==
ic
&&
ic
==
oc
&&
kps_equal
&&
no_dilation
&&
flag_dw
)
{
// dw conv impl
impl_
=
new
lite
::
arm
::
math
::
DepthwiseConv
<
PRECISION
(
kFloat
)
>
;
// LOG(INFO
) << "invoking dw conv";
VLOG
(
3
)
<<
"invoking dw conv"
;
}
else
if
(
param
.
groups
==
1
&&
kw
==
3
&&
stride
==
1
&&
kps_equal
&&
no_dilation
)
{
if
(
ic
>=
32
&&
oc
>=
32
&&
oh
>
16
&&
ow
>
16
)
{
// winograd conv impl
impl_
=
new
lite
::
arm
::
math
::
WinogradConv
<
PRECISION
(
kFloat
)
>
;
// LOG(INFO
) << "invoking winograd conv";
VLOG
(
3
)
<<
"invoking winograd conv"
;
}
else
{
// direct conv impl
impl_
=
new
lite
::
arm
::
math
::
DirectConv
<
PRECISION
(
kFloat
)
>
;
// LOG(INFO
) << "invoking direct conv";
VLOG
(
3
)
<<
"invoking direct conv"
;
}
}
else
if
(
param
.
groups
==
1
&&
kw
==
3
&&
stride
==
2
&&
kps_equal
&&
no_dilation
)
{
// direct conv impl
impl_
=
new
lite
::
arm
::
math
::
DirectConv
<
PRECISION
(
kFloat
)
>
;
// LOG(INFO
) << "invoking direct conv";
VLOG
(
3
)
<<
"invoking direct conv"
;
}
else
{
impl_
=
new
lite
::
arm
::
math
::
GemmLikeConv
<
PRECISION
(
kFloat
)
>
;
// LOG(INFO
) << "invoking gemm like conv";
VLOG
(
3
)
<<
"invoking gemm like conv"
;
}
CHECK
(
this
->
impl_
->
create
(
param
,
&
ctx
));
}
...
...
paddle/fluid/lite/kernels/arm/fc_compute.cc
浏览文件 @
c353397d
...
...
@@ -56,7 +56,7 @@ void FcCompute::Run() {
}
else
{
// use sgemmv
// sgemv((const float*)weights, (const float*)din, (float*)dout,
// false, n, x_w,
param_
->_flag_bias, (float*)bias, false);
// false, n, x_w,
_param
->_flag_bias, (float*)bias, false);
}
}
...
...
paddle/fluid/lite/kernels/cuda/CMakeLists.txt
浏览文件 @
c353397d
...
...
@@ -9,4 +9,3 @@ cc_library(io_copy_compute_cuda SRCS io_copy_compute.cc DEPS ${tensor_lite})
nv_library
(
kernels_cuda DEPS mul_compute_cuda io_copy_compute_cuda cuda_blas_lite
)
paddle/fluid/lite/kernels/host/CMakeLists.txt
浏览文件 @
c353397d
...
...
@@ -12,5 +12,4 @@ set(host_kernels
reshape_compute_host
)
set
(
host_kernels
"
${
host_kernels
}
"
CACHE INTERNAL
"host kernels"
)
set
(
host_kernels
"
${
host_kernels
}
"
CACHE GLOBAL
"host kernels"
)
paddle/fluid/lite/kernels/x86/CMakeLists.txt
浏览文件 @
c353397d
...
...
@@ -15,6 +15,8 @@ cc_library(elementwise_compute_x86 SRCS elementwise_compute.cc DEPS ${lite_kerne
cc_library
(
softmax_compute_x86 SRCS softmax_compute.cc DEPS
${
lite_kernel_deps
}
softmax
)
cc_library
(
dropout_compute_x86 SRCS dropout_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
concat_compute_x86 SRCS concat_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
conv_compute_x86 SRCS conv_compute.cc DEPS
${
lite_kernel_deps
}
blas im2col vol2col
)
cc_library
(
pool_compute_x86 SRCS pool_compute.cc DEPS
${
lite_kernel_deps
}
pooling
)
set
(
x86_kernels
activation_compute_x86
...
...
@@ -25,10 +27,11 @@ set(x86_kernels
relu_compute_x86
fc_compute_x86
scale_compute_x86
softmax_compute_x86
softmax_compute_x86
dropout_compute_x86
concat_compute_x86
conv_compute_x86
pool_compute_x86
)
set
(
x86_kernels
"
${
x86_kernels
}
"
CACHE INTERNAL
"x86 kernels"
)
paddle/fluid/lite/model_parser/compatible_pb.cc
浏览文件 @
c353397d
...
...
@@ -13,7 +13,8 @@
// limitations under the License.
#include "paddle/fluid/lite/model_parser/compatible_pb.h"
#include "compatible_pb.h"
#include <string>
#include <vector>
namespace
paddle
{
namespace
lite
{
...
...
paddle/fluid/lite/model_parser/cpp/op_desc.cc
浏览文件 @
c353397d
...
...
@@ -14,6 +14,7 @@
#include "paddle/fluid/lite/model_parser/cpp/op_desc.h"
#include <set>
#include <utility>
namespace
paddle
{
namespace
lite
{
...
...
@@ -44,12 +45,13 @@ FindAttr(const cpp::OpDesc& desc, const std::string& name) {
return
std
::
make_pair
(
it
,
attr_it
);
}
#define GET_IMPL_ONE(T, repr__) \
template <> \
T OpDesc::GetAttr<T>(const std::string& name) const { \
auto pair = FindAttr(*this, name); \
CHECK(pair.second->second == AttrType::repr__); \
return pair.first->second.get<T>(); \
#define GET_IMPL_ONE(T, repr__) \
template <> \
T OpDesc::GetAttr<T>(const std::string& name) const { \
auto pair = FindAttr(*this, name); \
CHECK(pair.second->second == AttrType::repr__) \
<< "required type is " << #repr__ << " not match the true type"; \
return pair.first->second.get<T>(); \
}
GET_IMPL_ONE
(
int32_t
,
INT
);
...
...
paddle/fluid/lite/model_parser/pb/op_desc.cc
浏览文件 @
c353397d
...
...
@@ -44,7 +44,7 @@ FindAttr(framework::proto::OpDesc *desc, const std::string &name) {
}
SET_IMPL_ONE
(
int
,
INT
,
i
);
SET_IMPL_ONE
(
float
,
FLOAT
,
f
);
SET_IMPL_ONE
(
bool
,
FLOAT
,
f
);
SET_IMPL_ONE
(
bool
,
BOOLEAN
,
b
);
template
<
>
void
OpDesc
::
SetAttr
<
std
::
vector
<
int
>>
(
const
std
::
string
&
name
,
...
...
paddle/fluid/lite/operators/CMakeLists.txt
浏览文件 @
c353397d
set
(
op_DEPS
${
tensor_lite
}
op_lite op_params_lite
)
cc_library
(
conv_op_lite SRCS conv_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
pool_op_lite SRCS pool_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
fc_op_lite SRCS fc_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
relu_op_lite SRCS relu_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
mul_op_lite SRCS mul_op.cc DEPS
${
op_DEPS
}
)
...
...
@@ -18,10 +19,10 @@ cc_library(fill_constant_op_lite SRCS fill_constant_op.cc DEPS ${op_DEPS})
cc_library
(
op_params_lite SRCS op_params.cc DEPS
${
tensor_lite
}
any_lite framework_proto_lite
)
cc_library
(
dropout_op_lite SRCS dropout_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
concat_op_lite SRCS concat_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
pool_op_lite SRCS pool_op.cc DEPS
${
op_DEPS
}
)
set
(
ops_lite
conv_op_lite
pool_op_lite
fc_op_lite
relu_op_lite
mul_op_lite
...
...
@@ -42,11 +43,11 @@ set(ops_lite
lite_cc_test
(
test_fc_op_lite SRCS fc_op_test.cc
DEPS fc_op_lite memory_lite
X86_DEPS fc_compute_x86
ARM_DEPS fc_compute_arm
)
ARM_DEPS fc_compute_arm
)
lite_cc_test
(
test_pool_op_lite SRCS pool_op_test.cc
DEPS pool_op_lite memory_lite
ARM_DEPS pool_compute_arm
)
lite_cc_test
(
test_scale_op_lite SRCS scale_op_test.cc DEPS scale_op_lite memory_lite
)
lite_cc_test
(
test_softmax_op_lite SRCS softmax_op_test.cc DEPS softmax_op_lite memory_lite
)
lite_cc_test
(
test_reshape_op_lite SRCS reshape_op_test.cc DEPS reshape_op_lite memory_lite
)
lite_cc_test
(
test_concat_op_lite SRCS concat_op_test.cc DEPS concat_op_lite memory_lite
)
lite_cc_test
(
test_pool_op_lite SRCS pool_op_test.cc
DEPS pool_op_lite memory_lite
ARM_DEPS pool_compute_arm
)
paddle/fluid/lite/operators/conv_op.cc
浏览文件 @
c353397d
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/lite/operators/conv_op.h"
#include <vector>
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
...
...
@@ -74,4 +75,4 @@ bool ConvOpLite::InferShape() const {
}
// namespace paddle
REGISTER_LITE_OP
(
conv2d
,
paddle
::
lite
::
operators
::
ConvOpLite
);
REGISTER_LITE_OP
(
depthwise_conv2d
,
paddle
::
lite
::
operators
::
ConvOpLite
);
\ No newline at end of file
REGISTER_LITE_OP
(
depthwise_conv2d
,
paddle
::
lite
::
operators
::
ConvOpLite
);
paddle/fluid/lite/operators/conv_op.h
浏览文件 @
c353397d
...
...
@@ -13,7 +13,6 @@
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/lite/core/compatible_tensor.h"
...
...
@@ -60,12 +59,13 @@ class ConvOpLite : public OpLite {
const_cast
<
lite
::
Tensor
*>
(
&
(
bias_var
->
Get
<
lite
::
Tensor
>
()));
}
}
if
(
std
::
find
(
input_arg_names
.
begin
(),
input_arg_names
.
end
(),
"ResidualData"
)
!=
input_arg_names
.
end
())
{
auto
residual_data_var
=
scope
->
FindVar
(
op_desc
.
Input
(
"ResidualData"
).
front
());
if
(
std
::
find
(
input_arg_names
.
begin
(),
input_arg_names
.
end
(),
"ResidualData"
)
!=
input_arg_names
.
end
())
{
auto
residual_data_var
=
scope
->
FindVar
(
op_desc
.
Input
(
"ResidualData"
).
front
());
if
(
residual_data_var
!=
nullptr
)
{
param_
.
residualData
=
const_cast
<
lite
::
Tensor
*>
(
&
(
residual_data_var
->
Get
<
lite
::
Tensor
>
()));
param_
.
residualData
=
const_cast
<
lite
::
Tensor
*>
(
&
(
residual_data_var
->
Get
<
lite
::
Tensor
>
()));
}
}
return
true
;
...
...
paddle/fluid/lite/operators/feed_op.cc
浏览文件 @
c353397d
...
...
@@ -38,8 +38,8 @@ class FeedOp : public OpLite {
auto
feed_var_name
=
opdesc
.
Input
(
"X"
).
front
();
auto
*
feed_var
=
scope
->
FindVar
(
feed_var_name
);
CHECK
(
feed_var
);
auto
&
feed_tensor_list
=
feed_var
->
Get
<
std
::
vector
<
lite
::
Tensor
>>
();
param_
.
feed_list
=
&
feed_tensor_list
;
auto
*
feed_tensor_list
=
feed_var
->
GetMutable
<
std
::
vector
<
lite
::
Tensor
>>
();
param_
.
feed_list
=
feed_tensor_list
;
auto
out_name
=
opdesc
.
Output
(
"Out"
).
front
();
auto
*
out_var
=
scope
->
FindVar
(
out_name
);
...
...
paddle/fluid/lite/operators/mul_op.h
浏览文件 @
c353397d
...
...
@@ -45,10 +45,11 @@ class MulOpLite : public OpLite {
CHECK
(
var
);
param_
.
x
=
var
->
GetMutable
<
Tensor
>
();
var
=
scope
->
FindVar
(
W
);
CHECK
(
var
);
CHECK
(
var
)
<<
"no var called "
<<
W
;
param_
.
y
=
var
->
GetMutable
<
Tensor
>
();
CHECK
(
scope
->
FindVar
(
out
));
param_
.
output
=
scope
->
FindVar
(
out
)
->
GetMutable
<
Tensor
>
();
var
=
scope
->
FindVar
(
out
);
CHECK
(
var
)
<<
"no var called "
<<
out
;
param_
.
output
=
var
->
GetMutable
<
Tensor
>
();
param_
.
x_num_col_dims
=
op_desc
.
GetAttr
<
int
>
(
"x_num_col_dims"
);
param_
.
y_num_col_dims
=
op_desc
.
GetAttr
<
int
>
(
"y_num_col_dims"
);
...
...
paddle/fluid/lite/operators/pool_op.cc
浏览文件 @
c353397d
...
...
@@ -85,4 +85,4 @@ bool PoolOpLite::InferShape() const {
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_OP
(
pool
,
paddle
::
lite
::
operators
::
PoolOpLite
);
REGISTER_LITE_OP
(
pool
2d
,
paddle
::
lite
::
operators
::
PoolOpLite
);
paddle/fluid/lite/operators/pool_op.h
浏览文件 @
c353397d
...
...
@@ -37,14 +37,6 @@ class PoolOpLite : public OpLite {
bool
InferShape
()
const
override
;
/*
bool Run() override {
CHECK(kernel_);
kernel_->Run();
return true;
}
*/
// TODO(Superjomn) replace framework::OpDesc with a lite one.
bool
AttachImpl
(
const
cpp
::
OpDesc
&
op_desc
,
lite
::
Scope
*
scope
)
override
{
auto
x
=
op_desc
.
Input
(
"X"
).
front
();
...
...
paddle/fluid/lite/tools/Dockerfile.mobile
浏览文件 @
c353397d
...
...
@@ -88,4 +88,4 @@ RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple wheel
RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple pre-commit
RUN apt-get autoremove -y && apt-get clean
RUN rm -rf /sdk-tools-linux-4333796.zip /tmp/android-ndk-r17c-linux-x86_64.zip /cmake-3.10.3-Linux-x86_64.tar.gz
\ No newline at end of file
paddle/fluid/lite/tools/build.sh
浏览文件 @
c353397d
...
...
@@ -2,17 +2,29 @@
set
-ex
TESTS_FILE
=
"./lite_tests.txt"
LIBS_FILE
=
"./lite_libs.txt"
readonly
common_flags
=
"-DWITH_LITE=ON -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF -DWITH_PYTHON=OFF -DWITH_TESTING=ON -DLITE_WITH_ARM=OFF"
# for code gen, a source file is generated after a test, but is dependended by some targets in cmake.
# here we fake an empty file to make cmake works.
function
prepare_for_codegen
{
# in build directory
mkdir
-p
./paddle/fluid/lite/gen_code
touch
./paddle/fluid/lite/gen_code/__generated_code__.cc
}
function
cmake_x86
{
prepare_for_codegen
cmake ..
-DWITH_GPU
=
OFF
-DWITH_MKLDNN
=
OFF
-DLITE_WITH_X86
=
ON
${
common_flags
}
}
function
cmake_x86_for_CI
{
prepare_for_codegen
cmake ..
-DWITH_GPU
=
OFF
-DWITH_MKLDNN
=
OFF
-DLITE_WITH_X86
=
ON
${
common_flags
}
-DLITE_WITH_PROFILE
=
ON
}
function
cmake_gpu
{
prepare_for_codegen
cmake ..
" -DWITH_GPU=ON {common_flags} -DLITE_WITH_GPU=ON"
}
...
...
@@ -34,7 +46,7 @@ function cmake_arm {
function
build
{
file
=
$1
for
_test
in
$(
cat
$file
)
;
do
make
$_test
-j
$(
expr
$(
nproc
))
make
$_test
-j
$(
expr
$(
nproc
)
- 2
)
done
}
...
...
@@ -42,7 +54,11 @@ function build {
function
test_lite
{
local
file
=
$1
echo
"file:
${
file
}
"
for
_test
in
$(
cat
$file
)
;
do
# We move the build phase here to make the 'gen_code' test compiles after the
# corresponding test is executed and the C++ code generates.
make
$_test
-j
$(
expr
$(
nproc
)
- 2
)
ctest
-R
$_test
-V
done
}
...
...
@@ -86,8 +102,10 @@ function build_test_server {
cd
./build
export
LD_LIBRARY_PATH
=
"
$LD_LIBRARY_PATH
:/paddle/build/third_party/install/mklml/lib"
cmake_x86_for_CI
build
$TESTS_FILE
# compile the tests and execute them.
test_lite
$TESTS_FILE
# build the remaining libraries to check compiling error.
build
$LIBS_FILE
}
# Build the code and run lite server tests. This is executed in the CI system.
...
...
@@ -117,7 +135,6 @@ function build_test_arm {
build_dir
=
build.lite.
${
os
}
.
${
abi
}
mkdir
-p
$build_dir
cd
$build_dir
cmake_arm
${
os
}
${
abi
}
build
$TESTS_FILE
...
...
@@ -167,6 +184,7 @@ function main {
;;
build
)
build
$TESTS_FILE
build
$LIBS_FILE
shift
;;
cmake_x86
)
...
...
paddle/fluid/lite/utils/CMakeLists.txt
浏览文件 @
c353397d
...
...
@@ -8,5 +8,4 @@ set(utils_DEPS glog)
lite_cc_test
(
test_varient SRCS varient_test.cc DEPS utils_lite
)
cc_library
(
any_lite SRCS any.cc
)
cc_library
(
utils_lite SRCS cp_logging.cc DEPS
${
utils_DEPS
}
any_lite
)
cc_library
(
utils_lite SRCS cp_logging.cc string.cc DEPS
${
utils_DEPS
}
any_lite
)
paddle/fluid/lite/x86/CMakeLists.txt
浏览文件 @
c353397d
...
...
@@ -4,4 +4,3 @@ endif()
cc_library
(
target_wrapper_x86 SRCS target_wrapper.cc
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录