Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
e8ebb084
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e8ebb084
编写于
6月 19, 2019
作者:
N
nhzlx
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'incubate/lite' of
http://10.87.145.36/inference/paddlelite
into xzl/incubate/lite
上级
73249df2
d1f8d02f
变更
119
隐藏空白更改
内联
并排
Showing
119 changed file
with
4950 addition
and
1183 deletion
+4950
-1183
CMakeLists.txt
CMakeLists.txt
+30
-6
cmake/cross_compiling/android.cmake
cmake/cross_compiling/android.cmake
+35
-13
cmake/cross_compiling/armlinux.cmake
cmake/cross_compiling/armlinux.cmake
+12
-5
cmake/cross_compiling/findar.cmake
cmake/cross_compiling/findar.cmake
+33
-0
cmake/external/gflags.cmake
cmake/external/gflags.cmake
+2
-1
cmake/external/glog.cmake
cmake/external/glog.cmake
+2
-1
cmake/external/gtest.cmake
cmake/external/gtest.cmake
+3
-1
cmake/external/protobuf.cmake
cmake/external/protobuf.cmake
+1
-0
paddle/fluid/inference/analysis/passes/CMakeLists.txt
paddle/fluid/inference/analysis/passes/CMakeLists.txt
+1
-1
paddle/fluid/inference/analysis/passes/use_passes.cc
paddle/fluid/inference/analysis/passes/use_passes.cc
+1
-1
paddle/fluid/lite/CMakeLists.txt
paddle/fluid/lite/CMakeLists.txt
+11
-0
paddle/fluid/lite/api/CMakeLists.txt
paddle/fluid/lite/api/CMakeLists.txt
+41
-16
paddle/fluid/lite/api/apis_test.cc
paddle/fluid/lite/api/apis_test.cc
+95
-0
paddle/fluid/lite/api/cxx_api.h
paddle/fluid/lite/api/cxx_api.h
+5
-0
paddle/fluid/lite/api/cxx_api_bin.cc
paddle/fluid/lite/api/cxx_api_bin.cc
+4
-3
paddle/fluid/lite/api/cxx_api_test.cc
paddle/fluid/lite/api/cxx_api_test.cc
+44
-80
paddle/fluid/lite/api/light_api.h
paddle/fluid/lite/api/light_api.h
+10
-5
paddle/fluid/lite/api/light_api_test.cc
paddle/fluid/lite/api/light_api_test.cc
+10
-22
paddle/fluid/lite/api/lite_api_test_helper.cc
paddle/fluid/lite/api/lite_api_test_helper.cc
+59
-0
paddle/fluid/lite/api/lite_api_test_helper.h
paddle/fluid/lite/api/lite_api_test_helper.h
+31
-0
paddle/fluid/lite/arm/math/CMakeLists.txt
paddle/fluid/lite/arm/math/CMakeLists.txt
+1
-0
paddle/fluid/lite/arm/math/concat.cc
paddle/fluid/lite/arm/math/concat.cc
+59
-0
paddle/fluid/lite/arm/math/concat.h
paddle/fluid/lite/arm/math/concat.h
+34
-0
paddle/fluid/lite/arm/math/elementwise.cc
paddle/fluid/lite/arm/math/elementwise.cc
+131
-3
paddle/fluid/lite/arm/math/elementwise.h
paddle/fluid/lite/arm/math/elementwise.h
+9
-2
paddle/fluid/lite/core/CMakeLists.txt
paddle/fluid/lite/core/CMakeLists.txt
+1
-1
paddle/fluid/lite/core/context.cc
paddle/fluid/lite/core/context.cc
+22
-17
paddle/fluid/lite/core/hvy_tensor.h
paddle/fluid/lite/core/hvy_tensor.h
+4
-0
paddle/fluid/lite/core/lite_tensor.h
paddle/fluid/lite/core/lite_tensor.h
+2
-0
paddle/fluid/lite/core/mir/CMakeLists.txt
paddle/fluid/lite/core/mir/CMakeLists.txt
+8
-3
paddle/fluid/lite/core/mir/conv_elementwise_add_activation_fuse_pass.cc
...ite/core/mir/conv_elementwise_add_activation_fuse_pass.cc
+8
-7
paddle/fluid/lite/core/mir/conv_elementwise_add_activation_fuse_pass.h
...lite/core/mir/conv_elementwise_add_activation_fuse_pass.h
+32
-0
paddle/fluid/lite/core/mir/conv_elementwise_add_activation_fuse_pass_test.cc
...ore/mir/conv_elementwise_add_activation_fuse_pass_test.cc
+5
-5
paddle/fluid/lite/core/mir/elementwise_add_activation_fuse_pass.cc
...uid/lite/core/mir/elementwise_add_activation_fuse_pass.cc
+36
-0
paddle/fluid/lite/core/mir/elementwise_add_activation_fuse_pass.h
...luid/lite/core/mir/elementwise_add_activation_fuse_pass.h
+1
-1
paddle/fluid/lite/core/mir/elementwise_add_activation_fuse_pass_test.cc
...ite/core/mir/elementwise_add_activation_fuse_pass_test.cc
+117
-0
paddle/fluid/lite/core/mir/fc_fuse_pass_test.cc
paddle/fluid/lite/core/mir/fc_fuse_pass_test.cc
+1
-1
paddle/fluid/lite/core/mir/fusion/CMakeLists.txt
paddle/fluid/lite/core/mir/fusion/CMakeLists.txt
+7
-3
paddle/fluid/lite/core/mir/fusion/conv_bn_fuser.cc
paddle/fluid/lite/core/mir/fusion/conv_bn_fuser.cc
+1
-1
paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_activation_fuser.cc
.../core/mir/fusion/conv_elementwise_add_activation_fuser.cc
+13
-9
paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_activation_fuser.h
...e/core/mir/fusion/conv_elementwise_add_activation_fuser.h
+47
-0
paddle/fluid/lite/core/mir/fusion/elementwise_add_activation_fuser.cc
.../lite/core/mir/fusion/elementwise_add_activation_fuser.cc
+87
-0
paddle/fluid/lite/core/mir/fusion/elementwise_add_activation_fuser.h
...d/lite/core/mir/fusion/elementwise_add_activation_fuser.h
+4
-4
paddle/fluid/lite/core/mir/generate_program_pass.cc
paddle/fluid/lite/core/mir/generate_program_pass.cc
+1
-1
paddle/fluid/lite/core/mir/ssa_graph.cc
paddle/fluid/lite/core/mir/ssa_graph.cc
+4
-1
paddle/fluid/lite/core/mir/ssa_graph_test.cc
paddle/fluid/lite/core/mir/ssa_graph_test.cc
+1
-1
paddle/fluid/lite/core/mir/use_passes.h
paddle/fluid/lite/core/mir/use_passes.h
+2
-9
paddle/fluid/lite/core/mir/variable_place_inference_pass_test.cc
...fluid/lite/core/mir/variable_place_inference_pass_test.cc
+1
-1
paddle/fluid/lite/core/op_lite.h
paddle/fluid/lite/core/op_lite.h
+1
-3
paddle/fluid/lite/core/optimizer.h
paddle/fluid/lite/core/optimizer.h
+4
-3
paddle/fluid/lite/core/optimizer_test.cc
paddle/fluid/lite/core/optimizer_test.cc
+1
-1
paddle/fluid/lite/core/profile/basic_profiler.cc
paddle/fluid/lite/core/profile/basic_profiler.cc
+1
-1
paddle/fluid/lite/core/tensor.h
paddle/fluid/lite/core/tensor.h
+21
-0
paddle/fluid/lite/kernels/arm/CMakeLists.txt
paddle/fluid/lite/kernels/arm/CMakeLists.txt
+9
-3
paddle/fluid/lite/kernels/arm/concat_compute.cc
paddle/fluid/lite/kernels/arm/concat_compute.cc
+87
-0
paddle/fluid/lite/kernels/arm/concat_compute.h
paddle/fluid/lite/kernels/arm/concat_compute.h
+37
-0
paddle/fluid/lite/kernels/arm/concat_compute_test.cc
paddle/fluid/lite/kernels/arm/concat_compute_test.cc
+235
-0
paddle/fluid/lite/kernels/arm/conv_compute.cc
paddle/fluid/lite/kernels/arm/conv_compute.cc
+2
-0
paddle/fluid/lite/kernels/arm/elementwise_compute.cc
paddle/fluid/lite/kernels/arm/elementwise_compute.cc
+111
-0
paddle/fluid/lite/kernels/arm/elementwise_compute.h
paddle/fluid/lite/kernels/arm/elementwise_compute.h
+8
-0
paddle/fluid/lite/kernels/arm/elementwise_compute_test.cc
paddle/fluid/lite/kernels/arm/elementwise_compute_test.cc
+263
-0
paddle/fluid/lite/kernels/arm/fc_compute.cc
paddle/fluid/lite/kernels/arm/fc_compute.cc
+3
-0
paddle/fluid/lite/kernels/arm/mul_compute.cc
paddle/fluid/lite/kernels/arm/mul_compute.cc
+2
-1
paddle/fluid/lite/kernels/arm/pool_compute.cc
paddle/fluid/lite/kernels/arm/pool_compute.cc
+5
-0
paddle/fluid/lite/kernels/arm/pool_compute.h
paddle/fluid/lite/kernels/arm/pool_compute.h
+1
-0
paddle/fluid/lite/kernels/arm/transpose_compute.cc
paddle/fluid/lite/kernels/arm/transpose_compute.cc
+173
-0
paddle/fluid/lite/kernels/arm/transpose_compute.h
paddle/fluid/lite/kernels/arm/transpose_compute.h
+48
-0
paddle/fluid/lite/kernels/arm/transpose_compute_test.cc
paddle/fluid/lite/kernels/arm/transpose_compute_test.cc
+205
-0
paddle/fluid/lite/kernels/arm/use_kernels.h
paddle/fluid/lite/kernels/arm/use_kernels.h
+1
-0
paddle/fluid/lite/kernels/use_kernels.h
paddle/fluid/lite/kernels/use_kernels.h
+56
-0
paddle/fluid/lite/kernels/x86/CMakeLists.txt
paddle/fluid/lite/kernels/x86/CMakeLists.txt
+12
-0
paddle/fluid/lite/kernels/x86/concat_compute.cc
paddle/fluid/lite/kernels/x86/concat_compute.cc
+1
-82
paddle/fluid/lite/kernels/x86/concat_compute.h
paddle/fluid/lite/kernels/x86/concat_compute.h
+98
-0
paddle/fluid/lite/kernels/x86/concat_compute_test.cc
paddle/fluid/lite/kernels/x86/concat_compute_test.cc
+83
-0
paddle/fluid/lite/kernels/x86/conv_compute.cc
paddle/fluid/lite/kernels/x86/conv_compute.cc
+1
-138
paddle/fluid/lite/kernels/x86/conv_compute.h
paddle/fluid/lite/kernels/x86/conv_compute.h
+153
-0
paddle/fluid/lite/kernels/x86/conv_compute_test.cc
paddle/fluid/lite/kernels/x86/conv_compute_test.cc
+92
-0
paddle/fluid/lite/kernels/x86/dropout_compute.cc
paddle/fluid/lite/kernels/x86/dropout_compute.cc
+1
-66
paddle/fluid/lite/kernels/x86/dropout_compute.h
paddle/fluid/lite/kernels/x86/dropout_compute.h
+81
-0
paddle/fluid/lite/kernels/x86/dropout_compute_test.cc
paddle/fluid/lite/kernels/x86/dropout_compute_test.cc
+78
-0
paddle/fluid/lite/kernels/x86/elementwise_compute.cc
paddle/fluid/lite/kernels/x86/elementwise_compute.cc
+1
-106
paddle/fluid/lite/kernels/x86/elementwise_compute.h
paddle/fluid/lite/kernels/x86/elementwise_compute.h
+120
-0
paddle/fluid/lite/kernels/x86/elementwise_compute_test.cc
paddle/fluid/lite/kernels/x86/elementwise_compute_test.cc
+86
-0
paddle/fluid/lite/kernels/x86/fc_compute.cc
paddle/fluid/lite/kernels/x86/fc_compute.cc
+1
-83
paddle/fluid/lite/kernels/x86/fc_compute.h
paddle/fluid/lite/kernels/x86/fc_compute.h
+98
-0
paddle/fluid/lite/kernels/x86/fc_compute_test.cc
paddle/fluid/lite/kernels/x86/fc_compute_test.cc
+100
-0
paddle/fluid/lite/kernels/x86/mul_compute.cc
paddle/fluid/lite/kernels/x86/mul_compute.cc
+1
-116
paddle/fluid/lite/kernels/x86/mul_compute.h
paddle/fluid/lite/kernels/x86/mul_compute.h
+131
-0
paddle/fluid/lite/kernels/x86/mul_compute_test.cc
paddle/fluid/lite/kernels/x86/mul_compute_test.cc
+84
-0
paddle/fluid/lite/kernels/x86/pool_compute.cc
paddle/fluid/lite/kernels/x86/pool_compute.cc
+2
-61
paddle/fluid/lite/kernels/x86/pool_compute.h
paddle/fluid/lite/kernels/x86/pool_compute.h
+75
-0
paddle/fluid/lite/kernels/x86/pool_compute_test.cc
paddle/fluid/lite/kernels/x86/pool_compute_test.cc
+79
-0
paddle/fluid/lite/kernels/x86/relu_compute.cc
paddle/fluid/lite/kernels/x86/relu_compute.cc
+1
-36
paddle/fluid/lite/kernels/x86/relu_compute.h
paddle/fluid/lite/kernels/x86/relu_compute.h
+52
-0
paddle/fluid/lite/kernels/x86/relu_compute_test.cc
paddle/fluid/lite/kernels/x86/relu_compute_test.cc
+75
-0
paddle/fluid/lite/kernels/x86/scale_compute.cc
paddle/fluid/lite/kernels/x86/scale_compute.cc
+1
-42
paddle/fluid/lite/kernels/x86/scale_compute.h
paddle/fluid/lite/kernels/x86/scale_compute.h
+57
-0
paddle/fluid/lite/kernels/x86/scale_compute_test.cc
paddle/fluid/lite/kernels/x86/scale_compute_test.cc
+76
-0
paddle/fluid/lite/kernels/x86/softmax_compute.cc
paddle/fluid/lite/kernels/x86/softmax_compute.cc
+1
-70
paddle/fluid/lite/kernels/x86/softmax_compute.h
paddle/fluid/lite/kernels/x86/softmax_compute.h
+86
-0
paddle/fluid/lite/kernels/x86/softmax_compute_test.cc
paddle/fluid/lite/kernels/x86/softmax_compute_test.cc
+74
-0
paddle/fluid/lite/model_parser/CMakeLists.txt
paddle/fluid/lite/model_parser/CMakeLists.txt
+8
-11
paddle/fluid/lite/model_parser/model_parser.cc
paddle/fluid/lite/model_parser/model_parser.cc
+1
-1
paddle/fluid/lite/operators/CMakeLists.txt
paddle/fluid/lite/operators/CMakeLists.txt
+10
-2
paddle/fluid/lite/operators/dropout_op.cc
paddle/fluid/lite/operators/dropout_op.cc
+1
-1
paddle/fluid/lite/operators/elementwise_ops.cc
paddle/fluid/lite/operators/elementwise_ops.cc
+53
-78
paddle/fluid/lite/operators/elementwise_ops.h
paddle/fluid/lite/operators/elementwise_ops.h
+65
-0
paddle/fluid/lite/operators/fusion_elementwise_activation_ops.cc
...fluid/lite/operators/fusion_elementwise_activation_ops.cc
+57
-0
paddle/fluid/lite/operators/fusion_elementwise_activation_ops.h
.../fluid/lite/operators/fusion_elementwise_activation_ops.h
+60
-0
paddle/fluid/lite/operators/fusion_elementwise_activation_ops_test.cc
.../lite/operators/fusion_elementwise_activation_ops_test.cc
+63
-0
paddle/fluid/lite/operators/op_params.h
paddle/fluid/lite/operators/op_params.h
+17
-0
paddle/fluid/lite/operators/split_op.cc
paddle/fluid/lite/operators/split_op.cc
+3
-3
paddle/fluid/lite/operators/transpose_op.cc
paddle/fluid/lite/operators/transpose_op.cc
+165
-0
paddle/fluid/lite/operators/transpose_op.h
paddle/fluid/lite/operators/transpose_op.h
+66
-0
paddle/fluid/lite/operators/transpose_op_test.cc
paddle/fluid/lite/operators/transpose_op_test.cc
+93
-0
paddle/fluid/lite/operators/use_ops.h
paddle/fluid/lite/operators/use_ops.h
+36
-0
paddle/fluid/lite/tools/build.sh
paddle/fluid/lite/tools/build.sh
+98
-46
paddle/fluid/lite/tools/mobile_readme.md
paddle/fluid/lite/tools/mobile_readme.md
+10
-2
paddle/fluid/lite/x86/CMakeLists.txt
paddle/fluid/lite/x86/CMakeLists.txt
+0
-2
未找到文件。
CMakeLists.txt
浏览文件 @
e8ebb084
...
@@ -22,6 +22,7 @@ include(system)
...
@@ -22,6 +22,7 @@ include(system)
if
(
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
if
(
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
cmake_minimum_required
(
VERSION 3.10
)
cmake_minimum_required
(
VERSION 3.10
)
# TODO(TJ): make as function check_default
# TODO(TJ): make as function check_default
# check os
if
(
NOT DEFINED ARM_TARGET_OS
)
if
(
NOT DEFINED ARM_TARGET_OS
)
set
(
ARM_TARGET_OS
"android"
CACHE STRING
"Choose ARM Target OS"
)
set
(
ARM_TARGET_OS
"android"
CACHE STRING
"Choose ARM Target OS"
)
endif
()
endif
()
...
@@ -31,19 +32,27 @@ if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
...
@@ -31,19 +32,27 @@ if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
message
(
FATAL_ERROR
"ARM_TARGET_OS must be in one of
${
ARM_TARGET_OS_LIST
}
"
)
message
(
FATAL_ERROR
"ARM_TARGET_OS must be in one of
${
ARM_TARGET_OS_LIST
}
"
)
endif
()
endif
()
# check arch abi
if
(
NOT DEFINED ARM_TARGET_ARCH_ABI
)
if
(
NOT DEFINED ARM_TARGET_ARCH_ABI
)
set
(
ARM_TARGET_ARCH_ABI
"arm
64-v8a
"
CACHE STRING
"Choose ARM Target ARCH ABI"
)
set
(
ARM_TARGET_ARCH_ABI
"arm
v8
"
CACHE STRING
"Choose ARM Target ARCH ABI"
)
endif
()
endif
()
set
(
ARM_TARGET_ARCH_ABI_LIST
"arm
64-v8a"
"armeabi-v7a"
"armeabi-v7a-softfp"
"armeabi-v7a-hf
"
)
set
(
ARM_TARGET_ARCH_ABI_LIST
"arm
v8"
"armv7"
"armv7hf"
"arm64-v8a"
"armeabi-v7a
"
)
set_property
(
CACHE ARM_TARGET_ARCH_ABI PROPERTY STRINGS
${
ARM_TARGET_ARCH_ABI_LIST
}
)
set_property
(
CACHE ARM_TARGET_ARCH_ABI PROPERTY STRINGS
${
ARM_TARGET_ARCH_ABI_LIST
}
)
if
(
NOT ARM_TARGET_ARCH_ABI IN_LIST ARM_TARGET_ARCH_ABI_LIST
)
if
(
NOT ARM_TARGET_ARCH_ABI IN_LIST ARM_TARGET_ARCH_ABI_LIST
)
message
(
FATAL_ERROR
"ARM_TARGET_ARCH_ABI must be in one of
${
ARM_TARGET_ARCH_ABI_LIST
}
"
)
message
(
FATAL_ERROR
"ARM_TARGET_ARCH_ABI must be in one of
${
ARM_TARGET_ARCH_ABI_LIST
}
"
)
endif
()
endif
()
if
(
NOT DEFINED TARGET_ARCH_ABI
)
# check arch abi
set
(
ARCH_ABI
"arm64-v8a"
CACHE STRING
"Choose android platform"
)
if
(
NOT DEFINED ARM_TARGET_LANG
)
set
(
ARM_TARGET_LANG
"clang"
CACHE STRING
"Choose ARM Target Language"
)
endif
()
endif
()
set
(
ARM_TARGET_LANG_LIST
"gcc"
"clang"
)
set_property
(
CACHE ARM_TARGET_LANG PROPERTY STRINGS
${
ARM_TARGET_LANG_LIST
}
)
if
(
NOT ARM_TARGET_LANG IN_LIST ARM_TARGET_LANG_LIST
)
message
(
FATAL_ERROR
"ARM_TARGET_LANG must be in one of
${
ARM_TARGET_LANG_LIST
}
"
)
endif
()
message
(
STATUS
"Lite ARM Compile
${
ARM_TARGET_OS
}
with
${
ARM_TARGET_ARCH_ABI
}
${
ARM_TARGET_LANG
}
"
)
include
(
cross_compiling/host
)
include
(
cross_compiling/host
)
include
(
cross_compiling/armlinux
)
include
(
cross_compiling/armlinux
)
include
(
cross_compiling/android
)
include
(
cross_compiling/android
)
...
@@ -159,6 +168,9 @@ include_directories("${PADDLE_SOURCE_DIR}")
...
@@ -159,6 +168,9 @@ include_directories("${PADDLE_SOURCE_DIR}")
# for mobile
# for mobile
if
(
WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
if
(
WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
message
(
STATUS
"Building the mobile framework"
)
message
(
STATUS
"Building the mobile framework"
)
if
(
ANDROID
)
include
(
cross_compiling/findar
)
endif
()
# include the necessary thirdparty dependencies
# include the necessary thirdparty dependencies
include
(
external/gflags
)
# download, build, install gflags
include
(
external/gflags
)
# download, build, install gflags
include
(
external/glog
)
# download, build, install glog
include
(
external/glog
)
# download, build, install glog
...
@@ -171,8 +183,20 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
...
@@ -171,8 +183,20 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
include
(
generic
)
# simplify cmake module
include
(
generic
)
# simplify cmake module
include
(
configure
)
# add paddle env configuration
include
(
configure
)
# add paddle env configuration
add_definitions
(
-std=c++11
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-std=c++11"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-std=c++11"
)
find_package
(
OpenMP REQUIRED
)
if
(
OPENMP_FOUND OR OpenMP_CXX_FOUND
)
add_definitions
(
-DARM_WITH_OMP
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
${
OpenMP_C_FLAGS
}
"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
${
OpenMP_CXX_FLAGS
}
"
)
message
(
STATUS
"Found OpenMP
${
OpenMP_VERSION
}
${
OpenMP_CXX_VERSION
}
"
)
message
(
STATUS
" |-- OpenMP C flags:
${
OpenMP_C_FLAGS
}
"
)
message
(
STATUS
" |-- OpenMP CXX flags:
${
OpenMP_CXX_FLAGS
}
"
)
message
(
STATUS
" |-- OpenMP OpenMP_CXX_LIB_NAMES:
${
OpenMP_CXX_LIB_NAMES
}
"
)
message
(
STATUS
" `-- OpenMP OpenMP_CXX_LIBRARIES:
${
OpenMP_CXX_LIBRARIES
}
"
)
else
()
message
(
FATAL_ERROR
"Could not found openmp !"
)
endif
()
add_subdirectory
(
paddle
)
add_subdirectory
(
paddle
)
return
()
return
()
...
...
cmake/cross_compiling/android.cmake
浏览文件 @
e8ebb084
...
@@ -26,28 +26,34 @@ if(NOT DEFINED ANDROID_NDK)
...
@@ -26,28 +26,34 @@ if(NOT DEFINED ANDROID_NDK)
endif
()
endif
()
endif
()
endif
()
if
(
NOT DEFINED ANDROID_API_LEVEL
)
if
(
NOT DEFINED ANDROID_API_LEVEL
)
set
(
ANDROID_API_LEVEL
"22"
)
set
(
ANDROID_API_LEVEL
"22"
)
endif
()
endif
()
if
(
NOT DEFINED ANDROID_STL_TYPE
)
if
(
NOT DEFINED ANDROID_STL_TYPE
)
set
(
ANDROID_STL_TYPE
"c++_static"
CACHE STRING
"stl type"
)
set
(
ANDROID_STL_TYPE
"c++_static"
CACHE STRING
"stl type"
)
# can also use shared
endif
()
endif
()
# TODO(TJ): enable me
if
(
ARM_TARGET_ARCH_ABI STREQUAL
"armv7hf"
)
if
(
ARM_TARGET_ARCH_ABI STREQUAL
"armeabi-v7a-hf"
)
message
(
FATAL_ERROR
"ANDROID does not support hardfp on v7 use armv7 instead."
)
message
(
FATAL_ERROR
"Not supported building android armeabi-v7a-hf yet"
)
endif
()
endif
()
set
(
ANDROID_ARCH_ABI
${
ARM_TARGET_ARCH_ABI
}
CACHE STRING
"Choose Android Arch ABI"
)
set
(
ANDROID_ARCH_ABI
${
ARM_TARGET_ARCH_ABI
}
CACHE STRING
"Choose Android Arch ABI"
)
if
(
ARM_TARGET_ARCH_ABI STREQUAL
"armv8"
)
set
(
ANDROID_ARCH_ABI
"arm64-v8a"
)
endif
()
if
(
ARM_TARGET_ARCH_ABI STREQUAL
"armv7"
)
set
(
ANDROID_ARCH_ABI
"armeabi-v7a"
)
endif
()
if
(
ANDROID_ARCH_ABI STREQUAL
"armeabi-v7a-softfp"
)
if
(
ANDROID_ARCH_ABI STREQUAL
"armeabi-v7a-softfp"
)
set
(
ANDROID_ARCH_ABI
"armeabi-v7a"
)
set
(
ANDROID_ARCH_ABI
"armeabi-v7a"
)
endif
()
endif
()
set
(
ANDROID_ARCH_ABI_LIST
"arm64-v8a"
"armeabi-v7a"
"armeabi-v6"
"armeabi"
set
(
ANDROID_ARCH_ABI_LIST
"arm64-v8a"
"armeabi-v7a"
"armeabi-v6"
"armeabi"
"mips"
"mips64"
"x86"
"x86_64"
"armeabi-v7a-hf"
)
"mips"
"mips64"
"x86"
"x86_64"
)
set_property
(
CACHE ANDROID_ARCH_ABI PROPERTY STRINGS
${
ANDROID_ARCH_ABI_LIST
}
)
set_property
(
CACHE ANDROID_ARCH_ABI PROPERTY STRINGS
${
ANDROID_ARCH_ABI_LIST
}
)
if
(
NOT ANDROID_ARCH_ABI IN_LIST ANDROID_ARCH_ABI_LIST
)
if
(
NOT ANDROID_ARCH_ABI IN_LIST ANDROID_ARCH_ABI_LIST
)
message
(
FATAL_ERROR
"ANDROID_ARCH_ABI must be in one of
${
ANDROID_ARCH_ABI_LIST
}
"
)
message
(
FATAL_ERROR
"ANDROID_ARCH_ABI must be in one of
${
ANDROID_ARCH_ABI_LIST
}
"
)
...
@@ -59,21 +65,37 @@ if(ANDROID_ARCH_ABI STREQUAL "armeabi-v7a")
...
@@ -59,21 +65,37 @@ if(ANDROID_ARCH_ABI STREQUAL "armeabi-v7a")
message
(
STATUS
"NEON is enabled on arm-v7a with softfp"
)
message
(
STATUS
"NEON is enabled on arm-v7a with softfp"
)
endif
()
endif
()
if
(
ANDROID_ARCH_ABI STREQUAL
"armeabi-v7a-hf"
)
set
(
ANDROID_ARCH_ABI
"armeabi-v7a"
)
set
(
CMAKE_CXX_FLAGS
"-std=c++11 -march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4
${
CMAKE_CXX_FLAGS
}
"
)
set
(
CMAKE_C_FLAGS
"-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4
${
CMAKE_C_FLAGS
}
"
)
message
(
STATUS
"NEON is enabled on arm-v7a with hard float"
)
endif
()
set
(
ANDROID_STL_TYPE_LITS
"gnustl_static"
"c++_static"
)
set
(
ANDROID_STL_TYPE_LITS
"gnustl_static"
"c++_static"
)
set_property
(
CACHE ANDROID_STL_TYPE PROPERTY STRINGS
${
ANDROID_STL_TYPE_LITS
}
)
set_property
(
CACHE ANDROID_STL_TYPE PROPERTY STRINGS
${
ANDROID_STL_TYPE_LITS
}
)
if
(
NOT ANDROID_STL_TYPE IN_LIST ANDROID_STL_TYPE_LITS
)
if
(
NOT ANDROID_STL_TYPE IN_LIST ANDROID_STL_TYPE_LITS
)
message
(
FATAL_ERROR
"ANDROID_STL_TYPE must be in one of
${
ANDROID_STL_TYPE_LITS
}
"
)
message
(
FATAL_ERROR
"ANDROID_STL_TYPE must be in one of
${
ANDROID_STL_TYPE_LITS
}
"
)
endif
()
endif
()
if
(
ARM_TARGET_LANG STREQUAL
"gcc"
)
# gcc do not need set lang
set
(
ARM_TARGET_LANG
""
)
endif
()
set
(
CMAKE_SYSTEM_NAME Android
)
set
(
CMAKE_SYSTEM_NAME Android
)
set
(
CMAKE_SYSTEM_VERSION
${
ANDROID_API_LEVEL
}
)
set
(
CMAKE_SYSTEM_VERSION
${
ANDROID_API_LEVEL
}
)
set
(
CMAKE_ANDROID_ARCH_ABI
${
ANDROID_ARCH_ABI
}
)
set
(
CMAKE_ANDROID_ARCH_ABI
${
ANDROID_ARCH_ABI
}
)
set
(
CMAKE_ANDROID_NDK
${
ANDROID_NDK
}
)
set
(
CMAKE_ANDROID_NDK
${
ANDROID_NDK
}
)
set
(
CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION
${
ARM_TARGET_LANG
}
)
set
(
CMAKE_ANDROID_STL_TYPE
${
ANDROID_STL_TYPE
}
)
set
(
CMAKE_ANDROID_STL_TYPE
${
ANDROID_STL_TYPE
}
)
if
(
ARM_TARGET_LANG STREQUAL
"clang"
)
if
(
ARM_TARGET_ARCH_ABI STREQUAL
"armv8"
)
set
(
triple aarch64-v8a-linux-android
)
elseif
(
ARM_TARGET_ARCH_ABI STREQUAL
"armv7"
)
set
(
triple arm-v7a-linux-android
)
else
()
message
(
FATAL_ERROR
"Clang do not support this
${
ARM_TARGET_ARCH_ABI
}
, use armv8 or armv7"
)
endif
()
set
(
CMAKE_C_COMPILER clang
)
set
(
CMAKE_C_COMPILER_TARGET
${
triple
}
)
set
(
CMAKE_CXX_COMPILER clang++
)
set
(
CMAKE_CXX_COMPILER_TARGET
${
triple
}
)
message
(
STATUS
"CMAKE_CXX_COMPILER_TARGET:
${
CMAKE_CXX_COMPILER_TARGET
}
"
)
endif
()
cmake/cross_compiling/armlinux.cmake
浏览文件 @
e8ebb084
...
@@ -20,7 +20,15 @@ set(ARMLINUX TRUE)
...
@@ -20,7 +20,15 @@ set(ARMLINUX TRUE)
add_definitions
(
-DLITE_WITH_LINUX
)
add_definitions
(
-DLITE_WITH_LINUX
)
set
(
CMAKE_SYSTEM_NAME Linux
)
set
(
CMAKE_SYSTEM_NAME Linux
)
if
(
ARM_TARGET_ARCH_ABI STREQUAL
"arm64-v8a"
)
set
(
ARMLINUX_ARCH_ABI
${
ARM_TARGET_ARCH_ABI
}
CACHE STRING
"Choose Android Arch ABI"
)
set
(
ARMLINUX_ARCH_ABI_LIST
"armv8"
"armv7"
"armv7hf"
)
set_property
(
CACHE ARMLINUX_ARCH_ABI PROPERTY STRINGS
${
ARMLINUX_ARCH_ABI_LIST
}
)
if
(
NOT ARMLINUX_ARCH_ABI IN_LIST ARMLINUX_ARCH_ABI_LIST
)
message
(
FATAL_ERROR
"ARMLINUX_ARCH_ABI(
${
ARMLINUX_ARCH_ABI
}
) must be in one of
${
ARMLINUX_ARCH_ABI_LIST
}
"
)
endif
()
if
(
ARMLINUX_ARCH_ABI STREQUAL
"armv8"
)
set
(
CMAKE_SYSTEM_PROCESSOR aarch64
)
set
(
CMAKE_SYSTEM_PROCESSOR aarch64
)
set
(
CMAKE_C_COMPILER
"aarch64-linux-gnu-gcc"
)
set
(
CMAKE_C_COMPILER
"aarch64-linux-gnu-gcc"
)
set
(
CMAKE_CXX_COMPILER
"aarch64-linux-gnu-g++"
)
set
(
CMAKE_CXX_COMPILER
"aarch64-linux-gnu-g++"
)
...
@@ -30,13 +38,12 @@ if(ARM_TARGET_ARCH_ABI STREQUAL "arm64-v8a")
...
@@ -30,13 +38,12 @@ if(ARM_TARGET_ARCH_ABI STREQUAL "arm64-v8a")
message
(
STATUS
"NEON is enabled on arm64-v8a"
)
message
(
STATUS
"NEON is enabled on arm64-v8a"
)
endif
()
endif
()
if
(
ARM_TARGET_ARCH_ABI STREQUAL
"armeabi-v7a"
if
(
ARMLINUX_ARCH_ABI STREQUAL
"armv7"
OR ARMLINUX_ARCH_ABI STREQUAL
"armv7hf"
)
OR ARM_TARGET_ARCH_ABI STREQUAL
"armeabi-v7a-hf"
)
message
(
FATAL_ERROR
"Not supported building arm linux arm-v7 yet"
)
message
(
FATAL_ERROR
"Not supported building arm linux arm-v7 yet"
)
endif
()
endif
()
# TODO(TJ): make sure v7 works
# TODO(TJ): make sure v7 works
if
(
ARM
_TARGET_ARCH_ABI STREQUAL
"armeabi-v7a
"
)
if
(
ARM
LINUX_ARCH_ABI STREQUAL
"armv7
"
)
set
(
CMAKE_SYSTEM_PROCESSOR arm
)
set
(
CMAKE_SYSTEM_PROCESSOR arm
)
set
(
CMAKE_C_COMPILER
"arm-linux-gnueabi-gcc"
)
set
(
CMAKE_C_COMPILER
"arm-linux-gnueabi-gcc"
)
set
(
CMAKE_CXX_COMPILER
"arm-linux-gnueabi-g++"
)
set
(
CMAKE_CXX_COMPILER
"arm-linux-gnueabi-g++"
)
...
@@ -46,7 +53,7 @@ if(ARM_TARGET_ARCH_ABI STREQUAL "armeabi-v7a")
...
@@ -46,7 +53,7 @@ if(ARM_TARGET_ARCH_ABI STREQUAL "armeabi-v7a")
message
(
STATUS
"NEON is enabled on arm-v7a with softfp"
)
message
(
STATUS
"NEON is enabled on arm-v7a with softfp"
)
endif
()
endif
()
if
(
ARM
_TARGET_ARCH_ABI STREQUAL
"armeabi-v7a-
hf"
)
if
(
ARM
LINUX_ARCH_ABI STREQUAL
"armv7
hf"
)
set
(
CMAKE_SYSTEM_PROCESSOR arm
)
set
(
CMAKE_SYSTEM_PROCESSOR arm
)
set
(
CMAKE_C_COMPILER
"arm-linux-gnueabihf-gcc"
)
set
(
CMAKE_C_COMPILER
"arm-linux-gnueabihf-gcc"
)
set
(
CMAKE_CXX_COMPILER
"arm-linux-gnueabihf-g++"
)
set
(
CMAKE_CXX_COMPILER
"arm-linux-gnueabihf-g++"
)
...
...
cmake/cross_compiling/findar.cmake
0 → 100644
浏览文件 @
e8ebb084
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if
(
NOT ARM_TARGET_LANG STREQUAL
"clang"
)
# only clang need find ar tool
return
()
endif
()
if
(
NOT EXISTS
"
${
CMAKE_CXX_COMPILER
}
"
)
message
(
ERROR
"Can not find CMAKE_CXX_COMPILER
${
CMAKE_CXX_COMPILER
}
"
)
endif
()
get_filename_component
(
AR_PATH
${
CMAKE_CXX_COMPILER
}
PATH
)
find_file
(
AR_TOOL NAMES llvm-ar PATHS
${
AR_PATH
}
)
if
(
NOT AR_TOOL
)
message
(
ERROR
"Failed to find AR_TOOL in
${
AR_PATH
}
"
)
else
()
set
(
CMAKE_AR
${
AR_TOOL
}
)
message
(
STATUS
"Found CMAKE_AR : "
${
CMAKE_AR
}
)
endif
()
cmake/external/gflags.cmake
浏览文件 @
e8ebb084
...
@@ -40,7 +40,8 @@ if(ANDROID)
...
@@ -40,7 +40,8 @@ if(ANDROID)
"-DCMAKE_SYSTEM_VERSION=
${
CMAKE_SYSTEM_VERSION
}
"
"-DCMAKE_SYSTEM_VERSION=
${
CMAKE_SYSTEM_VERSION
}
"
"-DCMAKE_ANDROID_ARCH_ABI=
${
CMAKE_ANDROID_ARCH_ABI
}
"
"-DCMAKE_ANDROID_ARCH_ABI=
${
CMAKE_ANDROID_ARCH_ABI
}
"
"-DCMAKE_ANDROID_NDK=
${
CMAKE_ANDROID_NDK
}
"
"-DCMAKE_ANDROID_NDK=
${
CMAKE_ANDROID_NDK
}
"
"-DCMAKE_ANDROID_STL_TYPE=
${
CMAKE_ANDROID_STL_TYPE
}
"
)
"-DCMAKE_ANDROID_STL_TYPE=
${
CMAKE_ANDROID_STL_TYPE
}
"
"-DCMAKE_ANDROID_NDK_TOOLCHAIN_VERSION=
${
CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION
}
"
)
endif
()
endif
()
ExternalProject_Add
(
ExternalProject_Add
(
...
...
cmake/external/glog.cmake
浏览文件 @
e8ebb084
...
@@ -46,7 +46,8 @@ if(ANDROID)
...
@@ -46,7 +46,8 @@ if(ANDROID)
"-DCMAKE_SYSTEM_VERSION=
${
CMAKE_SYSTEM_VERSION
}
"
"-DCMAKE_SYSTEM_VERSION=
${
CMAKE_SYSTEM_VERSION
}
"
"-DCMAKE_ANDROID_ARCH_ABI=
${
CMAKE_ANDROID_ARCH_ABI
}
"
"-DCMAKE_ANDROID_ARCH_ABI=
${
CMAKE_ANDROID_ARCH_ABI
}
"
"-DCMAKE_ANDROID_NDK=
${
CMAKE_ANDROID_NDK
}
"
"-DCMAKE_ANDROID_NDK=
${
CMAKE_ANDROID_NDK
}
"
"-DCMAKE_ANDROID_STL_TYPE=
${
CMAKE_ANDROID_STL_TYPE
}
"
)
"-DCMAKE_ANDROID_STL_TYPE=
${
CMAKE_ANDROID_STL_TYPE
}
"
"-DCMAKE_ANDROID_NDK_TOOLCHAIN_VERSION=
${
CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION
}
"
)
endif
()
endif
()
ExternalProject_Add
(
ExternalProject_Add
(
...
...
cmake/external/gtest.cmake
浏览文件 @
e8ebb084
...
@@ -58,7 +58,9 @@ IF(WITH_TESTING OR (WITH_DISTRIBUTE AND NOT WITH_GRPC))
...
@@ -58,7 +58,9 @@ IF(WITH_TESTING OR (WITH_DISTRIBUTE AND NOT WITH_GRPC))
"-DCMAKE_SYSTEM_VERSION=
${
CMAKE_SYSTEM_VERSION
}
"
"-DCMAKE_SYSTEM_VERSION=
${
CMAKE_SYSTEM_VERSION
}
"
"-DCMAKE_ANDROID_ARCH_ABI=
${
CMAKE_ANDROID_ARCH_ABI
}
"
"-DCMAKE_ANDROID_ARCH_ABI=
${
CMAKE_ANDROID_ARCH_ABI
}
"
"-DCMAKE_ANDROID_NDK=
${
CMAKE_ANDROID_NDK
}
"
"-DCMAKE_ANDROID_NDK=
${
CMAKE_ANDROID_NDK
}
"
"-DCMAKE_ANDROID_STL_TYPE=
${
CMAKE_ANDROID_STL_TYPE
}
"
)
"-DCMAKE_ANDROID_STL_TYPE=
${
CMAKE_ANDROID_STL_TYPE
}
"
"-DCMAKE_ANDROID_NDK_TOOLCHAIN_VERSION=
${
CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION
}
"
)
endif
()
endif
()
ExternalProject_Add
(
ExternalProject_Add
(
...
...
cmake/external/protobuf.cmake
浏览文件 @
e8ebb084
...
@@ -199,6 +199,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
...
@@ -199,6 +199,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
"-DCMAKE_ANDROID_ARCH_ABI=
${
CMAKE_ANDROID_ARCH_ABI
}
"
"-DCMAKE_ANDROID_ARCH_ABI=
${
CMAKE_ANDROID_ARCH_ABI
}
"
"-DCMAKE_ANDROID_NDK=
${
CMAKE_ANDROID_NDK
}
"
"-DCMAKE_ANDROID_NDK=
${
CMAKE_ANDROID_NDK
}
"
"-DCMAKE_ANDROID_STL_TYPE=
${
CMAKE_ANDROID_STL_TYPE
}
"
"-DCMAKE_ANDROID_STL_TYPE=
${
CMAKE_ANDROID_STL_TYPE
}
"
"-DCMAKE_ANDROID_NDK_TOOLCHAIN_VERSION=
${
CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION
}
"
"-DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
"
"-DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
"
"-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
"
"-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
"
"-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
"
"-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
"
...
...
paddle/fluid/inference/analysis/passes/CMakeLists.txt
浏览文件 @
e8ebb084
...
@@ -5,7 +5,7 @@ cc_library(ir_params_sync_among_devices_pass SRCS ir_params_sync_among_devices_p
...
@@ -5,7 +5,7 @@ cc_library(ir_params_sync_among_devices_pass SRCS ir_params_sync_among_devices_p
cc_library
(
ir_graph_to_program_pass SRCS ir_graph_to_program_pass.cc DEPS analysis_pass graph_to_program_pass
)
cc_library
(
ir_graph_to_program_pass SRCS ir_graph_to_program_pass.cc DEPS analysis_pass graph_to_program_pass
)
cc_library
(
adjust_cudnn_workspace_size_pass SRCS adjust_cudnn_workspace_size_pass.cc DEPS analysis_pass graph_to_program_pass
)
cc_library
(
adjust_cudnn_workspace_size_pass SRCS adjust_cudnn_workspace_size_pass.cc DEPS analysis_pass graph_to_program_pass
)
cc_library
(
analysis_passes SRCS passes.cc DEPS
cc_library
(
analysis_passes SRCS
use_
passes.cc DEPS
ir_graph_build_pass
ir_graph_build_pass
ir_analysis_pass
ir_analysis_pass
ir_params_sync_among_devices_pass
ir_params_sync_among_devices_pass
...
...
paddle/fluid/inference/analysis/passes/passes.cc
→
paddle/fluid/inference/analysis/passes/
use_
passes.cc
浏览文件 @
e8ebb084
...
@@ -12,13 +12,13 @@
...
@@ -12,13 +12,13 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/inference/analysis/passes/passes.h"
#include "paddle/fluid/inference/analysis/passes/adjust_cudnn_workspace_size_pass.h"
#include "paddle/fluid/inference/analysis/passes/adjust_cudnn_workspace_size_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"
#include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
#include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
#include "paddle/fluid/inference/analysis/passes/passes.h"
namespace
paddle
{
namespace
paddle
{
namespace
inference
{
namespace
inference
{
...
...
paddle/fluid/lite/CMakeLists.txt
浏览文件 @
e8ebb084
...
@@ -10,6 +10,9 @@ message(STATUS "LITE_WITH_ARM:\t${LITE_WITH_ARM}")
...
@@ -10,6 +10,9 @@ message(STATUS "LITE_WITH_ARM:\t${LITE_WITH_ARM}")
message
(
STATUS
"LITE_WITH_PROFILE:
\t
${
LITE_WITH_PROFILE
}
"
)
message
(
STATUS
"LITE_WITH_PROFILE:
\t
${
LITE_WITH_PROFILE
}
"
)
set
(
LITE_MODEL_DIR
"
${
THIRD_PARTY_PATH
}
/install"
)
set
(
LITE_MODEL_DIR
"
${
THIRD_PARTY_PATH
}
/install"
)
set
(
LITE_ON_MOBILE
${
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
}
)
set
(
LITE_URL
"http://paddle-inference-dist.bj.bcebos.com"
CACHE STRING
"inference download url"
)
set
(
LITE_URL
"http://paddle-inference-dist.bj.bcebos.com"
CACHE STRING
"inference download url"
)
function
(
lite_download_and_uncompress INSTALL_DIR URL FILENAME
)
function
(
lite_download_and_uncompress INSTALL_DIR URL FILENAME
)
...
@@ -182,3 +185,11 @@ add_subdirectory(model_parser)
...
@@ -182,3 +185,11 @@ add_subdirectory(model_parser)
add_subdirectory
(
utils
)
add_subdirectory
(
utils
)
add_subdirectory
(
api
)
add_subdirectory
(
api
)
add_subdirectory
(
gen_code
)
add_subdirectory
(
gen_code
)
if
(
WITH_TESTING
)
lite_download_and_uncompress
(
${
LITE_MODEL_DIR
}
${
LITE_URL
}
"lite_naive_model.tar.gz"
)
if
(
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
lite_download_and_uncompress
(
${
LITE_MODEL_DIR
}
${
LITE_URL
}
"mobilenet_v2_relu.tar.gz"
)
endif
()
endif
()
paddle/fluid/lite/api/CMakeLists.txt
浏览文件 @
e8ebb084
set
(
cxx_api_lite_deps scope_lite optimizer_lite target_wrapper_host model_parser_lite
)
set
(
cxx_api_lite_deps
scope_lite optimizer_lite target_wrapper_host model_parser_lite program_lite
)
if
(
LITE_WITH_CUDA
)
if
(
LITE_WITH_CUDA
)
set
(
cxx_api_lite_deps
${
cxx_api_lite_deps
}
kernels_cuda
)
set
(
cxx_api_lite_deps
${
cxx_api_lite_deps
}
kernels_cuda
)
cc_library
(
cxx_api_lite_cuda SRCS cxx_api.cc DEPS
${
cxx_api_lite_deps
}
target_wrapper_cuda
)
cc_library
(
cxx_api_lite_cuda SRCS cxx_api.cc DEPS
${
cxx_api_lite_deps
}
target_wrapper_cuda
)
nv_test
(
test_cxx_api_lite_cuda SRCS cxx_api_test.cc DEPS cxx_api_lite_cuda
)
nv_test
(
test_cxx_api_lite_cuda SRCS cxx_api_test.cc DEPS cxx_api_lite_cuda
)
endif
()
endif
()
cc_library
(
cxx_api_lite SRCS cxx_api.cc DEPS
${
cxx_api_lite_deps
}
${
ops_lite
}
program_lite
)
lite_cc_library
(
lite_api_test_helper SRCS lite_api_test_helper.cc
DEPS scope_lite optimizer_lite target_wrapper_host model_parser_lite program_lite
${
ops_lite
}
${
host_kernels
}
CUDA_DEPS kernels_cuda
X86_DEPS
${
x86_kernels
}
)
lite_cc_library
(
cxx_api_lite SRCS cxx_api.cc DEPS lite_api_test_helper
)
set
(
light_api_deps
set
(
light_api_deps
scope_lite target_wrapper_host model_parser_lite
)
scope_lite target_wrapper_host model_parser_lite
program_lite
)
if
(
LITE_WITH_CUDA
)
if
(
LITE_WITH_CUDA
)
set
(
light_api_deps
${
light_api_deps
}
target_wrapper_cuda
)
set
(
light_api_deps
${
light_api_deps
}
target_wrapper_cuda
)
endif
()
endif
()
#cc_library(light_api_lite SRCS light_api.cc DEPS ${light_api_deps} ${ops_lite} ${host_kernels})
lite_cc_library
(
light_api_lite SRCS light_api.cc
DEPS
${
light_api_deps
}
${
ops_lite
}
${
host_kernels
}
)
message
(
STATUS
"get ops
${
ops_lite
}
"
)
message
(
STATUS
"get ops
${
ops_lite
}
"
)
message
(
STATUS
"get Host kernels
${
host_kernels
}
"
)
message
(
STATUS
"get Host kernels
${
host_kernels
}
"
)
...
@@ -24,24 +33,41 @@ include(ExternalProject)
...
@@ -24,24 +33,41 @@ include(ExternalProject)
set
(
LITE_DEMO_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo"
CACHE STRING
set
(
LITE_DEMO_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo"
CACHE STRING
"A path setting inference demo download directories."
)
"A path setting inference demo download directories."
)
if
((
NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
AND WITH_TESTING
)
if
(
WITH_TESTING
)
set
(
eval_model_dir
""
)
set
(
test_cxx_api_deps cxx_api_lite mir_passes
${
ops_lite
}
${
host_kernels
}
${
x86_kernels
}
)
if
(
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
set
(
eval_model_dir
${
LITE_MODEL_DIR
}
/mobilenet_v2_relu
)
set
(
test_cxx_api_deps
${
test_cxx_api_deps
}
${
arm_kernels
}
)
endif
()
lite_cc_test
(
test_cxx_api_lite SRCS cxx_api_test.cc
lite_cc_test
(
test_cxx_api_lite SRCS cxx_api_test.cc
DEPS cxx_api_lite mir_passes
DEPS
${
test_cxx_api_deps
}
${
ops_lite
}
${
host_kernels
}
${
x86_kernels
}
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/lite_naive_model
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/lite_naive_model
--optimized_model=
${
LITE_MODEL_DIR
}
/lite_naive_model_opt SERIAL
)
--optimized_model=
${
LITE_MODEL_DIR
}
/lite_naive_model_opt
--eval_model_dir=eval_model_dir SERIAL
)
lite_download_and_uncompress
(
${
LITE_MODEL_DIR
}
${
LITE_URL
}
"lite_naive_model.tar.gz"
)
add_dependencies
(
test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz
)
add_dependencies
(
test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz
)
if
(
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
add_dependencies
(
test_cxx_api_lite extern_lite_download_mobilenet_v2_relu_tar_gz
)
endif
()
endif
()
endif
()
if
(
NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING
)
# These tests needs CLI arguments, and is not supported in ARM CI.
add_dependencies
(
test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz
)
# TODO(Superjomn) support latter.
endif
()
if
(
NOT LITE_ON_MOBILE
)
lite_cc_test
(
test_light_api SRCS light_api_test.cc
DEPS light_api_lite mir_passes
X86_DEPS
${
x86_kernels
}
ARGS --optimized_model=
${
LITE_MODEL_DIR
}
/lite_naive_model_opt
SERIAL
)
# if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
lite_cc_test
(
test_apis_lite SRCS apis_test.cc
# lite_cc_test(test_light_api SRCS light_api_test.cc DEPS light_api_lite ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
DEPS cxx_api_lite light_api_lite
${
ops_lite
}
mir_passes
# endif()
X86_DEPS
${
x86_kernels
}
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/lite_naive_model
--optimized_model=
${
LITE_MODEL_DIR
}
/lite_naive_model_opt SERIAL
)
endif
()
lite_cc_binary
(
cxx_api_lite_bin SRCS cxx_api_bin.cc
lite_cc_binary
(
cxx_api_lite_bin SRCS cxx_api_bin.cc
DEPS
DEPS
...
@@ -51,4 +77,3 @@ lite_cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc
...
@@ -51,4 +77,3 @@ lite_cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc
mir_passes
mir_passes
${
ops_lite
}
${
host_kernels
}
${
ops_lite
}
${
host_kernels
}
ARM_DEPS
${
arm_kernels
}
)
ARM_DEPS
${
arm_kernels
}
)
paddle/fluid/lite/api/apis_test.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*
* We test multiple apis here.
*/
#include <gtest/gtest.h>
#include <sstream>
#include <vector>
#include "paddle/fluid/lite/api/cxx_api.h"
#include "paddle/fluid/lite/api/light_api.h"
#include "paddle/fluid/lite/core/mir/pass_registry.h"
#include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/kernels/use_kernels.h"
#include "paddle/fluid/lite/operators/use_ops.h"
DEFINE_string
(
model_dir
,
""
,
""
);
DEFINE_string
(
optimized_model
,
""
,
""
);
namespace
paddle
{
namespace
lite
{
void
SetConstInput
(
lite
::
Tensor
*
x
)
{
x
->
Resize
(
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
100
,
100
})));
auto
*
data
=
x
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
100
*
100
;
i
++
)
{
data
[
i
]
=
i
;
}
}
bool
CompareTensors
(
const
std
::
string
&
name
,
const
ExecutorLite
&
cxx_api
,
const
LightPredictor
&
light_api
)
{
const
auto
*
a
=
cxx_api
.
GetTensor
(
name
);
const
auto
*
b
=
light_api
.
GetTensor
(
name
);
return
TensorCompareWith
(
*
a
,
*
b
);
}
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
TEST
(
CXXApi_LightApi
,
save_and_load_model
)
{
lite
::
ExecutorLite
cxx_api
;
lite
::
LightPredictor
light_api
;
// CXXAPi
{
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)}});
cxx_api
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)},
valid_places
);
auto
*
x
=
cxx_api
.
GetInput
(
0
);
SetConstInput
(
x
);
cxx_api
.
Run
();
LOG
(
INFO
)
<<
"Save optimized model to "
<<
FLAGS_optimized_model
;
cxx_api
.
SaveModel
(
FLAGS_optimized_model
);
}
// LightApi
{
light_api
.
Build
(
FLAGS_optimized_model
);
auto
*
x
=
light_api
.
GetInput
(
0
);
SetConstInput
(
x
);
light_api
.
Run
();
}
const
auto
*
cxx_out
=
cxx_api
.
GetOutput
(
0
);
const
auto
*
light_out
=
light_api
.
GetOutput
(
0
);
ASSERT_TRUE
(
TensorCompareWith
(
*
cxx_out
,
*
light_out
));
std
::
vector
<
std
::
string
>
tensors_with_order
({
"a"
,
"fc_0.w_0"
,
"fc_0.tmp_0"
,
"scale_0.tmp_0"
,
});
for
(
const
auto
&
tensor_name
:
tensors_with_order
)
{
ASSERT_TRUE
(
CompareTensors
(
tensor_name
,
cxx_api
,
light_api
));
}
}
#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/api/cxx_api.h
浏览文件 @
e8ebb084
...
@@ -78,6 +78,11 @@ class ExecutorLite {
...
@@ -78,6 +78,11 @@ class ExecutorLite {
return
&
fetch_list
.
at
(
offset
);
return
&
fetch_list
.
at
(
offset
);
}
}
const
lite
::
Tensor
*
GetTensor
(
const
std
::
string
&
name
)
const
{
auto
*
var
=
program_
->
exec_scope
()
->
FindVar
(
name
);
return
&
var
->
Get
<
lite
::
Tensor
>
();
}
void
Run
()
{
program_
->
Run
();
}
void
Run
()
{
program_
->
Run
();
}
const
framework
::
proto
::
ProgramDesc
&
program_desc
()
const
{
const
framework
::
proto
::
ProgramDesc
&
program_desc
()
const
{
...
...
paddle/fluid/lite/api/cxx_api_bin.cc
浏览文件 @
e8ebb084
...
@@ -14,8 +14,9 @@
...
@@ -14,8 +14,9 @@
#include "paddle/fluid/lite/api/cxx_api.h"
#include "paddle/fluid/lite/api/cxx_api.h"
#include <chrono>
#include <chrono>
#include "paddle/fluid/lite/core/mir/passes.h"
#include "paddle/fluid/lite/core/mir/
use_
passes.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
@@ -66,8 +67,8 @@ void Run(const char* model_dir, int repeat) {
...
@@ -66,8 +67,8 @@ void Run(const char* model_dir, int repeat) {
}
// namespace paddle
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
int
main
(
int
argc
,
char
**
argv
)
{
CHECK_EQ
(
argc
,
2
)
<<
"usage: ./cmd <model_dir
>"
;
CHECK_EQ
(
argc
,
3
)
<<
"usage: ./cmd <model_dir> <repeat
>"
;
paddle
::
lite
::
Run
(
argv
[
1
],
1
);
paddle
::
lite
::
Run
(
argv
[
1
],
std
::
stoi
(
argv
[
2
])
);
return
0
;
return
0
;
}
}
...
...
paddle/fluid/lite/api/cxx_api_test.cc
浏览文件 @
e8ebb084
...
@@ -16,59 +16,34 @@
...
@@ -16,59 +16,34 @@
#include <gflags/gflags.h>
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <vector>
#include <vector>
#include "paddle/fluid/lite/core/mir/passes.h"
#include "paddle/fluid/lite/api/lite_api_test_helper.h"
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/kernels/use_kernels.h"
DEFINE_string
(
model_dir
,
""
,
""
);
#include "paddle/fluid/lite/operators/use_ops.h"
DEFINE_string
(
optimized_model
,
""
,
""
);
// For training.
// For training.
DEFINE_string
(
startup_program_path
,
""
,
""
);
DEFINE_string
(
startup_program_path
,
""
,
""
);
DEFINE_string
(
main_program_path
,
""
,
""
);
DEFINE_string
(
main_program_path
,
""
,
""
);
// for eval
DEFINE_string
(
eval_model_dir
,
""
,
""
);
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
TEST
(
CXXApi
,
test
)
{
TEST
(
CXXApi
,
test
)
{
lite
::
ExecutorLite
predictor
;
const
lite
::
Tensor
*
out
=
RunHvyModel
();
#ifndef LITE_WITH_CUDA
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)}});
#else
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
)},
Place
{
TARGET
(
kCUDA
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
)},
Place
{
TARGET
(
kCUDA
),
PRECISION
(
kAny
),
DATALAYOUT
(
kNCHW
)},
Place
{
TARGET
(
kHost
),
PRECISION
(
kAny
),
DATALAYOUT
(
kNCHW
)},
Place
{
TARGET
(
kCUDA
),
PRECISION
(
kAny
),
DATALAYOUT
(
kAny
)},
Place
{
TARGET
(
kHost
),
PRECISION
(
kAny
),
DATALAYOUT
(
kAny
)},
});
#endif
predictor
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)},
// origin cuda
valid_places
);
auto
*
input_tensor
=
predictor
.
GetInput
(
0
);
input_tensor
->
Resize
(
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
100
,
100
})));
auto
*
data
=
input_tensor
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
100
*
100
;
i
++
)
{
data
[
i
]
=
i
;
}
// LOG(INFO) << "input " << *input_tensor;
predictor
.
Run
();
auto
*
out
=
predictor
.
GetOutput
(
0
);
LOG
(
INFO
)
<<
out
<<
" memory size "
<<
out
->
data_size
();
LOG
(
INFO
)
<<
out
<<
" memory size "
<<
out
->
data_size
();
LOG
(
INFO
)
<<
"out "
<<
out
->
data
<
float
>
()[
0
];
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
LOG
(
INFO
)
<<
"out "
<<
out
->
data
<
float
>
()[
1
];
LOG
(
INFO
)
<<
"out "
<<
out
->
data
<
float
>
()[
i
];
}
LOG
(
INFO
)
<<
"dims "
<<
out
->
dims
();
LOG
(
INFO
)
<<
"dims "
<<
out
->
dims
();
// LOG(INFO) << "out " << *out;
// LOG(INFO) << "out " << *out;
}
}
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
TEST
(
CXXApi
,
save_model
)
{
TEST
(
CXXApi
,
save_model
)
{
lite
::
ExecutorLite
predictor
;
lite
::
ExecutorLite
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
...
@@ -79,9 +54,7 @@ TEST(CXXApi, save_model) {
...
@@ -79,9 +54,7 @@ TEST(CXXApi, save_model) {
LOG
(
INFO
)
<<
"Save optimized model to "
<<
FLAGS_optimized_model
;
LOG
(
INFO
)
<<
"Save optimized model to "
<<
FLAGS_optimized_model
;
predictor
.
SaveModel
(
FLAGS_optimized_model
);
predictor
.
SaveModel
(
FLAGS_optimized_model
);
}
}
#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
/*TEST(CXXTrainer, train) {
/*TEST(CXXTrainer, train) {
Place prefer_place({TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)});
Place prefer_place({TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)});
std::vector<Place> valid_places({prefer_place});
std::vector<Place> valid_places({prefer_place});
...
@@ -115,46 +88,37 @@ TEST(CXXApi, save_model) {
...
@@ -115,46 +88,37 @@ TEST(CXXApi, save_model) {
}*/
}*/
#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
}
// namespace lite
#ifdef LITE_WITH_ARM
}
// namespace paddle
TEST
(
CXXApi
,
eval
)
{
DeviceInfo
::
Init
();
lite
::
ExecutorLite
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)}});
USE_LITE_OP
(
mul
);
predictor
.
Build
(
FLAGS_eval_model_dir
,
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)},
USE_LITE_OP
(
fc
);
valid_places
);
USE_LITE_OP
(
relu
);
USE_LITE_OP
(
scale
);
auto
*
input_tensor
=
predictor
.
GetInput
(
0
);
USE_LITE_OP
(
feed
);
input_tensor
->
Resize
(
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
1
,
3
,
224
,
224
})));
USE_LITE_OP
(
fetch
);
auto
*
data
=
input_tensor
->
mutable_data
<
float
>
();
USE_LITE_OP
(
io_copy
);
for
(
int
i
=
0
;
i
<
input_tensor
->
dims
().
production
();
i
++
)
{
USE_LITE_OP
(
elementwise_add
)
data
[
i
]
=
1
;
USE_LITE_OP
(
elementwise_sub
)
}
USE_LITE_OP
(
square
)
USE_LITE_OP
(
softmax
)
USE_LITE_OP
(
dropout
)
USE_LITE_OP
(
concat
)
USE_LITE_OP
(
conv2d
)
USE_LITE_OP
(
depthwise_conv2d
)
USE_LITE_OP
(
pool2d
)
USE_LITE_KERNEL
(
feed
,
kHost
,
kAny
,
kAny
,
def
);
USE_LITE_KERNEL
(
fetch
,
kHost
,
kAny
,
kAny
,
def
);
#ifdef LITE_WITH_X86
USE_LITE_KERNEL
(
relu
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
mul
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
fc
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
scale
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
square
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_sub
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_add
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
softmax
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
dropout
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
concat
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
conv2d
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
depthwise_conv2d
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
pool2d
,
kX86
,
kFloat
,
kNCHW
,
def
);
#endif
#ifdef LITE_WITH_CUDA
predictor
.
Run
();
USE_LITE_KERNEL
(
mul
,
kCUDA
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
io_copy
,
kCUDA
,
kAny
,
kAny
,
host_to_device
);
auto
*
out
=
predictor
.
GetOutput
(
0
);
USE_LITE_KERNEL
(
io_copy
,
kCUDA
,
kAny
,
kAny
,
device_to_host
);
std
::
vector
<
float
>
results
({
0.00097802
,
0.00099822
,
0.00103093
,
0.00100121
,
0.00098268
,
0.00104065
,
0.00099962
,
0.00095181
,
0.00099694
,
0.00099406
});
for
(
int
i
=
0
;
i
<
results
.
size
();
++
i
)
{
EXPECT_NEAR
(
out
->
data
<
float
>
()[
i
],
results
[
i
],
1e-5
);
}
ASSERT_EQ
(
out
->
dims
().
size
(),
2
);
ASSERT_EQ
(
out
->
dims
()[
0
],
1
);
ASSERT_EQ
(
out
->
dims
()[
1
],
1000
);
}
#endif
#endif
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/api/light_api.h
浏览文件 @
e8ebb084
...
@@ -22,6 +22,7 @@
...
@@ -22,6 +22,7 @@
#include <string>
#include <string>
#include <utility>
#include <utility>
#include <vector>
#include <vector>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/context.h"
#include "paddle/fluid/lite/core/context.h"
#include "paddle/fluid/lite/core/program.h"
#include "paddle/fluid/lite/core/program.h"
#include "paddle/fluid/lite/core/types.h"
#include "paddle/fluid/lite/core/types.h"
...
@@ -62,6 +63,11 @@ class LightPredictor {
...
@@ -62,6 +63,11 @@ class LightPredictor {
return
&
fetch_list
.
at
(
offset
);
return
&
fetch_list
.
at
(
offset
);
}
}
const
lite
::
Tensor
*
GetTensor
(
const
std
::
string
&
name
)
const
{
auto
*
var
=
program_
->
exec_scope
()
->
FindVar
(
name
);
return
&
var
->
Get
<
lite
::
Tensor
>
();
}
private:
private:
void
BuildRuntimeProgram
(
const
framework
::
proto
::
ProgramDesc
&
prog
)
{
void
BuildRuntimeProgram
(
const
framework
::
proto
::
ProgramDesc
&
prog
)
{
std
::
vector
<
Instruction
>
insts
;
std
::
vector
<
Instruction
>
insts
;
...
@@ -72,9 +78,8 @@ class LightPredictor {
...
@@ -72,9 +78,8 @@ class LightPredictor {
// Create the kernels of the target places, and filter out the specific
// Create the kernels of the target places, and filter out the specific
// kernel with the target alias.
// kernel with the target alias.
for
(
auto
&
op
:
program
.
ops_
)
{
for
(
auto
&
op
:
program
.
ops
())
{
lite
::
pb
::
OpDesc
desc
(
op
->
op_info
()
->
desc
());
auto
kernel_type
=
op
->
op_info
()
->
GetAttr
<
std
::
string
>
(
kKernelTypeAttr
);
auto
kernel_type
=
desc
.
GetAttr
(
kKernelTypeAttr
).
get
<
std
::
string
>
();
std
::
string
op_type
,
alias
;
std
::
string
op_type
,
alias
;
Place
place
;
Place
place
;
KernelBase
::
ParseKernelType
(
kernel_type
,
&
op_type
,
&
alias
,
&
place
);
KernelBase
::
ParseKernelType
(
kernel_type
,
&
op_type
,
&
alias
,
&
place
);
...
@@ -89,8 +94,8 @@ class LightPredictor {
...
@@ -89,8 +94,8 @@ class LightPredictor {
insts
.
emplace_back
(
op
,
std
::
move
(
*
it
));
insts
.
emplace_back
(
op
,
std
::
move
(
*
it
));
}
}
program_
.
reset
(
new
RuntimeProgram
(
std
::
move
(
insts
)));
program_
.
reset
(
new
RuntimeProgram
(
std
::
move
(
insts
)));
CHECK
(
program
.
exec_scope
_
);
CHECK
(
program
.
exec_scope
()
);
program_
->
set_exec_scope
(
program
.
exec_scope
_
);
program_
->
set_exec_scope
(
program
.
exec_scope
()
);
}
}
private:
private:
...
...
paddle/fluid/lite/api/light_api_test.cc
浏览文件 @
e8ebb084
...
@@ -15,6 +15,9 @@
...
@@ -15,6 +15,9 @@
#include "paddle/fluid/lite/api/light_api.h"
#include "paddle/fluid/lite/api/light_api.h"
#include <gflags/gflags.h>
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/kernels/use_kernels.h"
#include "paddle/fluid/lite/operators/use_ops.h"
DEFINE_string
(
optimized_model
,
""
,
""
);
DEFINE_string
(
optimized_model
,
""
,
""
);
...
@@ -33,29 +36,14 @@ TEST(LightAPI, load) {
...
@@ -33,29 +36,14 @@ TEST(LightAPI, load) {
}
}
predictor
.
Run
();
predictor
.
Run
();
const
auto
*
output
=
predictor
.
GetOutput
(
0
);
const
float
*
raw_output
=
output
->
data
<
float
>
();
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
LOG
(
INFO
)
<<
"out "
<<
raw_output
[
i
];
}
}
}
}
// namespace lite
}
// namespace lite
}
// namespace paddle
}
// namespace paddle
USE_LITE_OP
(
mul
);
USE_LITE_OP
(
fc
);
USE_LITE_OP
(
scale
);
USE_LITE_OP
(
feed
);
USE_LITE_OP
(
fetch
);
USE_LITE_OP
(
io_copy
);
USE_LITE_KERNEL
(
feed
,
kHost
,
kAny
,
kAny
,
def
);
USE_LITE_KERNEL
(
fetch
,
kHost
,
kAny
,
kAny
,
def
);
#ifdef LITE_WITH_X86
USE_LITE_KERNEL
(
relu
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
mul
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
fc
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
scale
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
square
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_sub
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_add
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
softmax
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
dropout
,
kX86
,
kFloat
,
kNCHW
,
def
);
#endif
paddle/fluid/lite/api/lite_api_test_helper.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/api/lite_api_test_helper.h"
DEFINE_string
(
model_dir
,
""
,
""
);
DEFINE_string
(
optimized_model
,
""
,
""
);
namespace
paddle
{
namespace
lite
{
const
lite
::
Tensor
*
RunHvyModel
()
{
lite
::
ExecutorLite
predictor
;
#ifndef LITE_WITH_CUDA
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)}});
#else
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
)},
Place
{
TARGET
(
kCUDA
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
)},
Place
{
TARGET
(
kCUDA
),
PRECISION
(
kAny
),
DATALAYOUT
(
kNCHW
)},
Place
{
TARGET
(
kHost
),
PRECISION
(
kAny
),
DATALAYOUT
(
kNCHW
)},
Place
{
TARGET
(
kCUDA
),
PRECISION
(
kAny
),
DATALAYOUT
(
kAny
)},
Place
{
TARGET
(
kHost
),
PRECISION
(
kAny
),
DATALAYOUT
(
kAny
)},
});
#endif
predictor
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)},
// origin cuda
valid_places
);
auto
*
input_tensor
=
predictor
.
GetInput
(
0
);
input_tensor
->
Resize
(
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
100
,
100
})));
auto
*
data
=
input_tensor
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
100
*
100
;
i
++
)
{
data
[
i
]
=
i
;
}
// LOG(INFO) << "input " << *input_tensor;
predictor
.
Run
();
const
auto
*
out
=
predictor
.
GetOutput
(
0
);
return
out
;
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/api/lite_api_test_helper.h
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <gflags/gflags.h>
#include "paddle/fluid/lite/api/cxx_api.h"
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/op_registry.h"
DECLARE_string
(
model_dir
);
DECLARE_string
(
optimized_model
);
namespace
paddle
{
namespace
lite
{
const
lite
::
Tensor
*
RunHvyModel
();
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/arm/math/CMakeLists.txt
浏览文件 @
e8ebb084
...
@@ -14,6 +14,7 @@ cc_library(math_arm SRCS
...
@@ -14,6 +14,7 @@ cc_library(math_arm SRCS
scale.cc
scale.cc
pooling.cc
pooling.cc
elementwise.cc
elementwise.cc
concat.cc
sgemv.cc
sgemv.cc
type_trans.cpp
type_trans.cpp
conv_impl.cc
conv_impl.cc
...
...
paddle/fluid/lite/arm/math/concat.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/arm/math/concat.h"
#include <algorithm>
#include <limits>
#include <memory>
#include "paddle/fluid/lite/arm/math/funcs.h"
namespace
paddle
{
namespace
lite
{
namespace
arm
{
namespace
math
{
void
concat_func
(
const
std
::
vector
<
lite
::
Tensor
*>
&
input
,
const
int
axis
,
lite
::
Tensor
*
output
)
{
size_t
num
=
input
.
size
();
int
rows
=
1
;
auto
dim_0
=
input
[
0
]
->
dims
();
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
{
rows
*=
dim_0
[
i
];
}
int
out_rows
=
rows
,
out_cols
=
0
;
std
::
vector
<
int64_t
>
input_cols
(
input
.
size
());
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
int
t_cols
=
input
[
i
]
->
numel
()
/
rows
;
out_cols
+=
t_cols
;
input_cols
[
i
]
=
t_cols
;
}
// computation
for
(
int
k
=
0
;
k
<
out_rows
;
++
k
)
{
float
*
dst_ptr
=
output
->
mutable_data
<
float
>
()
+
k
*
out_cols
;
int
col_idx
=
0
;
for
(
int
j
=
0
;
j
<
num
;
++
j
)
{
int
col_len
=
input_cols
[
j
];
const
float
*
src_prt
=
input
[
j
]
->
data
<
float
>
()
+
k
*
col_len
;
std
::
memcpy
(
dst_ptr
+
col_idx
,
src_prt
,
sizeof
(
float
)
*
col_len
);
col_idx
+=
col_len
;
}
}
}
}
// namespace math
}
// namespace arm
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/arm/math/concat.h
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <string>
#include <vector>
#include "paddle/fluid/lite/operators/op_params.h"
#include "paddle/fluid/lite/utils/cp_logging.h"
namespace
paddle
{
namespace
lite
{
namespace
arm
{
namespace
math
{
void
concat_func
(
const
std
::
vector
<
lite
::
Tensor
*>
&
input
,
const
int
axis
,
lite
::
Tensor
*
output
);
}
// namespace math
}
// namespace arm
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/arm/math/elementwise.cc
浏览文件 @
e8ebb084
...
@@ -65,9 +65,61 @@ void elementwise_add<float>(const float* dinx, const float* diny, float* dout,
...
@@ -65,9 +65,61 @@ void elementwise_add<float>(const float* dinx, const float* diny, float* dout,
}
}
template
<
>
template
<
>
void
elementwise_add_axis
<
float
>
(
const
float
*
dinx
,
const
float
*
diny
,
void
elementwise_add_relu
<
float
>
(
const
float
*
dinx
,
const
float
*
diny
,
float
*
dout
,
int
batch
,
int
channels
,
float
*
dout
,
int
num
)
{
int
num
)
{
int
cnt
=
num
>>
4
;
int
remain
=
num
%
16
;
float32x4_t
vzero
=
vdupq_n_f32
(
0.
f
);
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
cnt
;
i
++
)
{
const
float
*
dinx_ptr
=
dinx
+
(
i
<<
4
);
const
float
*
diny_ptr
=
diny
+
(
i
<<
4
);
float
*
dout_ptr
=
dout
+
(
i
<<
4
);
float32x4_t
dinx0
=
vld1q_f32
(
dinx_ptr
);
float32x4_t
dinx1
=
vld1q_f32
(
dinx_ptr
+
4
);
float32x4_t
dinx2
=
vld1q_f32
(
dinx_ptr
+
8
);
float32x4_t
dinx3
=
vld1q_f32
(
dinx_ptr
+
12
);
float32x4_t
diny0
=
vld1q_f32
(
diny_ptr
);
float32x4_t
diny1
=
vld1q_f32
(
diny_ptr
+
4
);
float32x4_t
diny2
=
vld1q_f32
(
diny_ptr
+
8
);
float32x4_t
diny3
=
vld1q_f32
(
diny_ptr
+
12
);
dinx0
=
vaddq_f32
(
dinx0
,
diny0
);
dinx1
=
vaddq_f32
(
dinx1
,
diny1
);
dinx2
=
vaddq_f32
(
dinx2
,
diny2
);
dinx3
=
vaddq_f32
(
dinx3
,
diny3
);
// relu
dinx0
=
vmaxq_f32
(
dinx0
,
vzero
);
dinx1
=
vmaxq_f32
(
dinx1
,
vzero
);
dinx2
=
vmaxq_f32
(
dinx2
,
vzero
);
dinx3
=
vmaxq_f32
(
dinx3
,
vzero
);
vst1q_f32
(
dout_ptr
,
dinx0
);
vst1q_f32
(
dout_ptr
+
4
,
dinx1
);
vst1q_f32
(
dout_ptr
+
8
,
dinx2
);
vst1q_f32
(
dout_ptr
+
12
,
dinx3
);
}
if
(
remain
>
0
)
{
const
float
*
dinx_ptr
=
dinx
+
(
cnt
<<
4
);
const
float
*
diny_ptr
=
diny
+
(
cnt
<<
4
);
float
*
dout_ptr
=
dout
+
(
cnt
<<
4
);
for
(
int
i
=
0
;
i
<
remain
;
i
++
)
{
float
tmp
=
*
dinx_ptr
+
*
diny_ptr
;
*
dout_ptr
=
tmp
>
0.
f
?
tmp
:
0.
f
;
dout_ptr
++
;
dinx_ptr
++
;
diny_ptr
++
;
}
}
}
template
<
>
void
elementwise_add_broadcast
<
float
>
(
const
float
*
dinx
,
const
float
*
diny
,
float
*
dout
,
int
batch
,
int
channels
,
int
num
)
{
#pragma omp parallel for collapse(2)
#pragma omp parallel for collapse(2)
for
(
int
i
=
0
;
i
<
batch
;
++
i
)
{
for
(
int
i
=
0
;
i
<
batch
;
++
i
)
{
for
(
int
j
=
0
;
j
<
channels
;
++
j
)
{
for
(
int
j
=
0
;
j
<
channels
;
++
j
)
{
...
@@ -127,6 +179,82 @@ void elementwise_add_axis<float>(const float* dinx, const float* diny,
...
@@ -127,6 +179,82 @@ void elementwise_add_axis<float>(const float* dinx, const float* diny,
}
}
}
}
template
<
>
void
elementwise_add_relu_broadcast
<
float
>
(
const
float
*
dinx
,
const
float
*
diny
,
float
*
dout
,
int
batch
,
int
channels
,
int
num
)
{
float32x4_t
vzero
=
vdupq_n_f32
(
0.
f
);
#pragma omp parallel for collapse(2)
for
(
int
i
=
0
;
i
<
batch
;
++
i
)
{
for
(
int
j
=
0
;
j
<
channels
;
++
j
)
{
int
offset
=
(
i
*
channels
+
j
)
*
num
;
const
float
*
din_ptr
=
dinx
+
offset
;
const
float
diny_data
=
diny
[
j
];
float
*
dout_ptr
=
dout
+
offset
;
int
cnt
=
num
>>
4
;
int
remain
=
num
%
16
;
float32x4_t
rb
=
vdupq_n_f32
(
diny_data
);
for
(
int
k
=
0
;
k
<
cnt
;
++
k
)
{
float32x4_t
din0
=
vld1q_f32
(
din_ptr
);
float32x4_t
din1
=
vld1q_f32
(
din_ptr
+
4
);
float32x4_t
din2
=
vld1q_f32
(
din_ptr
+
8
);
float32x4_t
din3
=
vld1q_f32
(
din_ptr
+
12
);
din0
=
vaddq_f32
(
din0
,
rb
);
din1
=
vaddq_f32
(
din1
,
rb
);
din2
=
vaddq_f32
(
din2
,
rb
);
din3
=
vaddq_f32
(
din3
,
rb
);
// relu
din0
=
vmaxq_f32
(
din0
,
vzero
);
din1
=
vmaxq_f32
(
din1
,
vzero
);
din2
=
vmaxq_f32
(
din2
,
vzero
);
din3
=
vmaxq_f32
(
din3
,
vzero
);
vst1q_f32
(
dout_ptr
,
din0
);
vst1q_f32
(
dout_ptr
+
4
,
din1
);
vst1q_f32
(
dout_ptr
+
8
,
din2
);
vst1q_f32
(
dout_ptr
+
12
,
din3
);
din_ptr
+=
16
;
dout_ptr
+=
16
;
}
if
(
remain
>=
8
)
{
float32x4_t
din0
=
vld1q_f32
(
din_ptr
);
float32x4_t
din1
=
vld1q_f32
(
din_ptr
+
4
);
din0
=
vaddq_f32
(
din0
,
rb
);
din1
=
vaddq_f32
(
din1
,
rb
);
// relu
din0
=
vmaxq_f32
(
din0
,
vzero
);
din1
=
vmaxq_f32
(
din1
,
vzero
);
vst1q_f32
(
dout_ptr
,
din0
);
vst1q_f32
(
dout_ptr
+
4
,
din1
);
din_ptr
+=
8
;
dout_ptr
+=
8
;
remain
-=
8
;
}
if
(
remain
>=
4
)
{
float32x4_t
din0
=
vld1q_f32
(
din_ptr
);
din0
=
vaddq_f32
(
din0
,
rb
);
// relu
din0
=
vmaxq_f32
(
din0
,
vzero
);
vst1q_f32
(
dout_ptr
,
din0
);
din_ptr
+=
4
;
dout_ptr
+=
4
;
remain
-=
4
;
}
if
(
remain
>
0
)
{
for
(
int
p
=
0
;
p
<
remain
;
p
++
)
{
float
tmp
=
*
din_ptr
+
diny_data
;
*
dout_ptr
=
tmp
>
0.
f
?
tmp
:
0.
f
;
dout_ptr
++
;
din_ptr
++
;
}
}
}
}
}
}
// namespace math
}
// namespace math
}
// namespace arm
}
// namespace arm
}
// namespace lite
}
// namespace lite
...
...
paddle/fluid/lite/arm/math/elementwise.h
浏览文件 @
e8ebb084
...
@@ -23,8 +23,15 @@ template <typename T>
...
@@ -23,8 +23,15 @@ template <typename T>
void
elementwise_add
(
const
T
*
dinx
,
const
T
*
diny
,
T
*
dout
,
int
num
);
void
elementwise_add
(
const
T
*
dinx
,
const
T
*
diny
,
T
*
dout
,
int
num
);
template
<
typename
T
>
template
<
typename
T
>
void
elementwise_add_axis
(
const
T
*
dinx
,
const
T
*
diny
,
T
*
dout
,
int
batch
,
void
elementwise_add_relu
(
const
T
*
dinx
,
const
T
*
diny
,
T
*
dout
,
int
num
);
int
channels
,
int
num
);
template
<
typename
T
>
void
elementwise_add_broadcast
(
const
T
*
dinx
,
const
T
*
diny
,
T
*
dout
,
int
batch
,
int
channels
,
int
num
);
template
<
typename
T
>
void
elementwise_add_relu_broadcast
(
const
T
*
dinx
,
const
T
*
diny
,
T
*
dout
,
int
batch
,
int
channels
,
int
num
);
}
// namespace math
}
// namespace math
}
// namespace arm
}
// namespace arm
...
...
paddle/fluid/lite/core/CMakeLists.txt
浏览文件 @
e8ebb084
if
(
WITH_TESTING
)
if
(
WITH_TESTING
)
cc_library
(
lite_gtest_main SRCS lite_gtest_main.cc DEPS gtest
)
cc_library
(
lite_gtest_main SRCS lite_gtest_main.cc DEPS gtest
gflags
)
endif
()
endif
()
lite_cc_library
(
target_wrapper_lite SRCS target_wrapper.cc
lite_cc_library
(
target_wrapper_lite SRCS target_wrapper.cc
DEPS target_wrapper_host
DEPS target_wrapper_host
...
...
paddle/fluid/lite/core/context.cc
浏览文件 @
e8ebb084
...
@@ -28,6 +28,10 @@
...
@@ -28,6 +28,10 @@
#endif // TARGET_OS_IPHONE
#endif // TARGET_OS_IPHONE
#endif // __APPLE__
#endif // __APPLE__
#ifdef ARM_WITH_OMP
#include <omp.h>
#endif
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
@@ -84,7 +88,7 @@ ARMContext& Context<TargetType::kARM>::operator=(const ARMContext& ctx) {
...
@@ -84,7 +88,7 @@ ARMContext& Context<TargetType::kARM>::operator=(const ARMContext& ctx) {
}
}
void
Context
<
TargetType
::
kARM
>::
BindDev
()
{
void
Context
<
TargetType
::
kARM
>::
BindDev
()
{
#ifdef
USE_OPEN
MP
#ifdef
ARM_WITH_O
MP
int
num_threads
=
active_ids_
.
size
();
int
num_threads
=
active_ids_
.
size
();
omp_set_num_threads
(
num_threads
);
omp_set_num_threads
(
num_threads
);
#ifdef LITE_WITH_LINUX
#ifdef LITE_WITH_LINUX
...
@@ -98,12 +102,12 @@ void Context<TargetType::kARM>::BindDev() {
...
@@ -98,12 +102,12 @@ void Context<TargetType::kARM>::BindDev() {
}
}
for
(
int
i
=
0
;
i
<
num_threads
;
i
++
)
{
for
(
int
i
=
0
;
i
<
num_threads
;
i
++
)
{
if
(
ssarets
[
i
]
!=
0
)
{
if
(
ssarets
[
i
]
!=
0
)
{
LOG
E
(
"set cpu affinity failed, cpuID: %d
\n
"
,
active_ids_
[
i
])
;
LOG
(
ERROR
)
<<
"set cpu affinity failed, cpuID: "
<<
active_ids_
[
i
]
;
return
;
return
;
}
}
}
}
#endif // LITE_WITH_LINUX
#endif // LITE_WITH_LINUX
#else //
USE_OPEN
MP
#else //
ARM_WITH_O
MP
#ifdef LITE_WITH_LINUX
#ifdef LITE_WITH_LINUX
std
::
vector
<
int
>
cpuid1
;
std
::
vector
<
int
>
cpuid1
;
cpuid1
.
push_back
(
active_ids_
[
0
]);
cpuid1
.
push_back
(
active_ids_
[
0
]);
...
@@ -113,7 +117,7 @@ void Context<TargetType::kARM>::BindDev() {
...
@@ -113,7 +117,7 @@ void Context<TargetType::kARM>::BindDev() {
return
;
return
;
}
}
#endif // LITE_WITH_LINUX
#endif // LITE_WITH_LINUX
#endif //
USE_OPEN
MP
#endif //
ARM_WITH_O
MP
}
}
void
Context
<
TargetType
::
kARM
>::
SetRunMode
(
PowerMode
mode
,
int
threads
)
{
void
Context
<
TargetType
::
kARM
>::
SetRunMode
(
PowerMode
mode
,
int
threads
)
{
...
@@ -123,7 +127,7 @@ void Context<TargetType::kARM>::SetRunMode(PowerMode mode, int threads) {
...
@@ -123,7 +127,7 @@ void Context<TargetType::kARM>::SetRunMode(PowerMode mode, int threads) {
if
(
threads
>
big_core_size
+
small_core_size
)
{
if
(
threads
>
big_core_size
+
small_core_size
)
{
threads
=
big_core_size
+
small_core_size
;
threads
=
big_core_size
+
small_core_size
;
}
}
#ifdef
USE_OPEN
MP
#ifdef
ARM_WITH_O
MP
count_
++
;
count_
++
;
int
shift_num
=
(
count_
/
10
)
%
big_core_size
;
int
shift_num
=
(
count_
/
10
)
%
big_core_size
;
switch
(
mode
)
{
switch
(
mode
)
{
...
@@ -146,8 +150,8 @@ void Context<TargetType::kARM>::SetRunMode(PowerMode mode, int threads) {
...
@@ -146,8 +150,8 @@ void Context<TargetType::kARM>::SetRunMode(PowerMode mode, int threads) {
if
(
big_core_size
>
0
)
{
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_HIGH
;
mode_
=
LITE_POWER_HIGH
;
if
(
threads
>
big_core_size
)
{
if
(
threads
>
big_core_size
)
{
LOG
E
(
"threads: %d, exceed the big cores size: %d
\n
"
,
threads
,
LOG
(
ERROR
)
<<
"threads: "
<<
threads
big_core_size
)
;
<<
", exceed the big cores size: "
<<
big_core_size
;
active_ids_
=
dev
.
big_core_ids_
;
active_ids_
=
dev
.
big_core_ids_
;
}
else
{
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
...
@@ -156,7 +160,7 @@ void Context<TargetType::kARM>::SetRunMode(PowerMode mode, int threads) {
...
@@ -156,7 +160,7 @@ void Context<TargetType::kARM>::SetRunMode(PowerMode mode, int threads) {
}
}
}
else
{
}
else
{
mode_
=
LITE_POWER_LOW
;
mode_
=
LITE_POWER_LOW
;
LOG
E
(
"HIGH POWER MODE is not support, switch to little cores
\n
"
)
;
LOG
(
ERROR
)
<<
"HIGH POWER MODE is not support, switch to little cores"
;
if
(
threads
>
small_core_size
)
{
if
(
threads
>
small_core_size
)
{
active_ids_
=
dev
.
little_core_ids_
;
active_ids_
=
dev
.
little_core_ids_
;
}
else
{
}
else
{
...
@@ -174,8 +178,8 @@ void Context<TargetType::kARM>::SetRunMode(PowerMode mode, int threads) {
...
@@ -174,8 +178,8 @@ void Context<TargetType::kARM>::SetRunMode(PowerMode mode, int threads) {
if
(
small_core_size
>
0
)
{
if
(
small_core_size
>
0
)
{
mode_
=
LITE_POWER_LOW
;
mode_
=
LITE_POWER_LOW
;
if
(
threads
>
small_core_size
)
{
if
(
threads
>
small_core_size
)
{
LOG
W
(
"threads: %d, exceed the little cores size: %d
\n
"
,
threads
,
LOG
(
WARNING
)
<<
"threads: "
<<
threads
small_core_size
)
;
<<
", exceed the little cores size: "
<<
small_core_size
;
active_ids_
=
dev
.
little_core_ids_
;
active_ids_
=
dev
.
little_core_ids_
;
}
else
{
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
...
@@ -184,7 +188,7 @@ void Context<TargetType::kARM>::SetRunMode(PowerMode mode, int threads) {
...
@@ -184,7 +188,7 @@ void Context<TargetType::kARM>::SetRunMode(PowerMode mode, int threads) {
}
}
}
else
{
}
else
{
mode_
=
LITE_POWER_HIGH
;
mode_
=
LITE_POWER_HIGH
;
LOG
W
(
"LOW POWER MODE is not support, switch to big cores
\n
"
)
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores"
;
if
(
threads
>
big_core_size
)
{
if
(
threads
>
big_core_size
)
{
active_ids_
=
dev
.
big_core_ids_
;
active_ids_
=
dev
.
big_core_ids_
;
}
else
{
}
else
{
...
@@ -211,8 +215,8 @@ void Context<TargetType::kARM>::SetRunMode(PowerMode mode, int threads) {
...
@@ -211,8 +215,8 @@ void Context<TargetType::kARM>::SetRunMode(PowerMode mode, int threads) {
if
(
big_core_size
>
0
)
{
if
(
big_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_HIGH
;
mode_
=
LITE_POWER_RAND_HIGH
;
if
(
threads
>
big_core_size
)
{
if
(
threads
>
big_core_size
)
{
LOG
W
(
"threads: %d, exceed the big cores size: %d
\n
"
,
threads
,
LOG
(
WARNING
)
<<
"threads: "
<<
threads
big_core_size
)
;
<<
", exceed the big cores size: "
<<
big_core_size
;
active_ids_
=
dev
.
big_core_ids_
;
active_ids_
=
dev
.
big_core_ids_
;
}
else
{
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
...
@@ -222,7 +226,8 @@ void Context<TargetType::kARM>::SetRunMode(PowerMode mode, int threads) {
...
@@ -222,7 +226,8 @@ void Context<TargetType::kARM>::SetRunMode(PowerMode mode, int threads) {
}
}
}
else
{
}
else
{
mode_
=
LITE_POWER_LOW
;
mode_
=
LITE_POWER_LOW
;
LOGW
(
"HIGH POWER MODE is not support, switch to little cores
\n
"
);
LOG
(
WARNING
)
<<
"HIGH POWER MODE is not support, switch to little cores"
;
if
(
threads
>
small_core_size
)
{
if
(
threads
>
small_core_size
)
{
active_ids_
=
dev
.
little_core_ids_
;
active_ids_
=
dev
.
little_core_ids_
;
}
else
{
}
else
{
...
@@ -240,8 +245,8 @@ void Context<TargetType::kARM>::SetRunMode(PowerMode mode, int threads) {
...
@@ -240,8 +245,8 @@ void Context<TargetType::kARM>::SetRunMode(PowerMode mode, int threads) {
if
(
small_core_size
>
0
)
{
if
(
small_core_size
>
0
)
{
mode_
=
LITE_POWER_RAND_LOW
;
mode_
=
LITE_POWER_RAND_LOW
;
if
(
threads
>
small_core_size
)
{
if
(
threads
>
small_core_size
)
{
LOG
W
(
"threads: %d, exceed the little cores size: %d
\n
"
,
threads
,
LOG
(
WARNING
)
<<
"threads: "
<<
threads
small_core_size
)
;
<<
", exceed the little cores size: "
<<
small_core_size
;
active_ids_
=
dev
.
little_core_ids_
;
active_ids_
=
dev
.
little_core_ids_
;
}
else
{
}
else
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
for
(
int
i
=
0
;
i
<
threads
;
++
i
)
{
...
@@ -251,7 +256,7 @@ void Context<TargetType::kARM>::SetRunMode(PowerMode mode, int threads) {
...
@@ -251,7 +256,7 @@ void Context<TargetType::kARM>::SetRunMode(PowerMode mode, int threads) {
}
}
}
else
{
}
else
{
mode_
=
LITE_POWER_HIGH
;
mode_
=
LITE_POWER_HIGH
;
LOG
W
(
"LOW POWER MODE is not support, switch to big cores
\n
"
)
;
LOG
(
WARNING
)
<<
"LOW POWER MODE is not support, switch to big cores"
;
if
(
threads
>
big_core_size
)
{
if
(
threads
>
big_core_size
)
{
active_ids_
=
dev
.
big_core_ids_
;
active_ids_
=
dev
.
big_core_ids_
;
}
else
{
}
else
{
...
...
paddle/fluid/lite/core/hvy_tensor.h
浏览文件 @
e8ebb084
...
@@ -86,6 +86,7 @@ class TensorHvy : public TensorBase<TensorHvy> {
...
@@ -86,6 +86,7 @@ class TensorHvy : public TensorBase<TensorHvy> {
template
<
typename
T
>
template
<
typename
T
>
T
*
mutable_data
()
{
T
*
mutable_data
()
{
memory_size_
=
framework
::
product
(
data_
.
dims
())
*
sizeof
(
T
);
return
data_
.
mutable_data
<
T
>
(
data_
.
dims
(),
platform
::
CPUPlace
());
return
data_
.
mutable_data
<
T
>
(
data_
.
dims
(),
platform
::
CPUPlace
());
}
}
template
<
typename
T
>
template
<
typename
T
>
...
@@ -128,8 +129,11 @@ class TensorHvy : public TensorBase<TensorHvy> {
...
@@ -128,8 +129,11 @@ class TensorHvy : public TensorBase<TensorHvy> {
const
framework
::
LoDTensor
&
raw_tensor
()
const
{
return
data_
;
}
const
framework
::
LoDTensor
&
raw_tensor
()
const
{
return
data_
;
}
framework
::
LoDTensor
&
raw_tensor
()
{
return
data_
;
}
framework
::
LoDTensor
&
raw_tensor
()
{
return
data_
;
}
size_t
memory_size
()
const
{
return
memory_size_
;
}
private:
private:
framework
::
LoDTensor
data_
;
framework
::
LoDTensor
data_
;
size_t
memory_size_
{};
};
};
}
// namespace lite
}
// namespace lite
...
...
paddle/fluid/lite/core/lite_tensor.h
浏览文件 @
e8ebb084
...
@@ -90,6 +90,8 @@ class TensorLite : public TensorBase<TensorLite> {
...
@@ -90,6 +90,8 @@ class TensorLite : public TensorBase<TensorLite> {
void
*
mutable_data
(
size_t
memory_size
);
void
*
mutable_data
(
size_t
memory_size
);
void
*
mutable_data
(
TargetType
target
,
size_t
memory_size
);
void
*
mutable_data
(
TargetType
target
,
size_t
memory_size
);
const
void
*
raw_data
()
const
{
return
buffer_
->
data
();
}
size_t
memory_size
()
const
{
return
memory_size_
;
}
size_t
memory_size
()
const
{
return
memory_size_
;
}
bool
IsInitialized
()
const
{
return
buffer_
->
data
();
}
bool
IsInitialized
()
const
{
return
buffer_
->
data
();
}
...
...
paddle/fluid/lite/core/mir/CMakeLists.txt
浏览文件 @
e8ebb084
...
@@ -7,7 +7,8 @@ cc_library(mir_pass_registry SRCS pass_registry.cc DEPS mir_pass_manager)
...
@@ -7,7 +7,8 @@ cc_library(mir_pass_registry SRCS pass_registry.cc DEPS mir_pass_manager)
add_subdirectory
(
fusion
)
add_subdirectory
(
fusion
)
cc_library
(
mir_passes
cc_library
(
mir_passes
SRCS fc_fuse_pass.cc
SRCS fc_fuse_pass.cc
conv_elementwise_add_relu_fuse_pass.cc
conv_elementwise_add_activation_fuse_pass.cc
elementwise_add_activation_fuse_pass.cc
conv_bn_fuse_pass.cc
conv_bn_fuse_pass.cc
quant_dequant_fuse_pass.cc
quant_dequant_fuse_pass.cc
static_kernel_pick_pass.cc
static_kernel_pick_pass.cc
...
@@ -83,7 +84,11 @@ lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "lite_fc_model.tar.gz
...
@@ -83,7 +84,11 @@ lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "lite_fc_model.tar.gz
add_dependencies
(
test_lite_fc_fuse extern_lite_download_lite_fc_model_tar_gz
)
add_dependencies
(
test_lite_fc_fuse extern_lite_download_lite_fc_model_tar_gz
)
lite_cc_test
(
test_lite_conv_elementwise_add_relu_fuse
lite_cc_test
(
test_lite_conv_elementwise_add_activation_fuse
SRCS conv_elementwise_add_relu_fuse_pass_test.cc
SRCS conv_elementwise_add_activation_fuse_pass_test.cc
DEPS cxx_api_lite mir_passes
${
ops_lite
}
${
host_kernels
}
${
x86_kernels
}
)
lite_cc_test
(
test_lite_elementwise_add_activation_fuse
SRCS elementwise_add_activation_fuse_pass_test.cc
DEPS cxx_api_lite mir_passes
DEPS cxx_api_lite mir_passes
${
ops_lite
}
${
host_kernels
}
${
x86_kernels
}
)
${
ops_lite
}
${
host_kernels
}
${
x86_kernels
}
)
paddle/fluid/lite/core/mir/conv_elementwise_add_
relu
_fuse_pass.cc
→
paddle/fluid/lite/core/mir/conv_elementwise_add_
activation
_fuse_pass.cc
浏览文件 @
e8ebb084
...
@@ -12,22 +12,23 @@
...
@@ -12,22 +12,23 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/lite/core/mir/conv_elementwise_add_
relu
_fuse_pass.h"
#include "paddle/fluid/lite/core/mir/conv_elementwise_add_
activation
_fuse_pass.h"
#include <memory>
#include <memory>
#include <vector>
#include <vector>
#include "paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_
relu
_fuser.h"
#include "paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_
activation
_fuser.h"
#include "paddle/fluid/lite/core/mir/pass_registry.h"
#include "paddle/fluid/lite/core/mir/pass_registry.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
namespace
mir
{
namespace
mir
{
void
ConvElementwiseAdd
ReLU
FusePass
::
Apply
(
void
ConvElementwiseAdd
Activation
FusePass
::
Apply
(
const
std
::
unique_ptr
<
SSAGraph
>&
graph
)
{
const
std
::
unique_ptr
<
SSAGraph
>&
graph
)
{
fusion
::
ConvElementwiseAdd
ReLUFuser
fuser
(
"conv2d
"
);
fusion
::
ConvElementwiseAdd
ActivationFuser
fuser
(
"conv2d"
,
"relu
"
);
fuser
(
graph
.
get
());
fuser
(
graph
.
get
());
fusion
::
ConvElementwiseAddReLUFuser
depthwise_fuser
(
"depthwise_conv2d"
);
fusion
::
ConvElementwiseAddActivationFuser
depthwise_fuser
(
"depthwise_conv2d"
,
"relu"
);
depthwise_fuser
(
graph
.
get
());
depthwise_fuser
(
graph
.
get
());
}
}
...
@@ -35,5 +36,5 @@ void ConvElementwiseAddReLUFusePass::Apply(
...
@@ -35,5 +36,5 @@ void ConvElementwiseAddReLUFusePass::Apply(
}
// namespace lite
}
// namespace lite
}
// namespace paddle
}
// namespace paddle
REGISTER_MIR_PASS
(
lite_conv_elementwise_add_act_fuse_pass
,
REGISTER_MIR_PASS
(
lite_conv_elementwise_add_act
ivation
_fuse_pass
,
paddle
::
lite
::
mir
::
ConvElementwiseAdd
ReLU
FusePass
);
paddle
::
lite
::
mir
::
ConvElementwiseAdd
Activation
FusePass
);
paddle/fluid/lite/core/mir/conv_elementwise_add_activation_fuse_pass.h
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include "paddle/fluid/lite/core/mir/pass.h"
namespace
paddle
{
namespace
lite
{
namespace
mir
{
class
ConvElementwiseAddActivationFusePass
:
public
ProgramPass
{
public:
void
Apply
(
const
std
::
unique_ptr
<
SSAGraph
>&
graph
)
override
;
};
}
// namespace mir
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/core/mir/conv_elementwise_add_
relu
_fuse_pass_test.cc
→
paddle/fluid/lite/core/mir/conv_elementwise_add_
activation
_fuse_pass_test.cc
浏览文件 @
e8ebb084
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/lite/core/mir/conv_elementwise_add_
relu
_fuse_pass.h"
#include "paddle/fluid/lite/core/mir/conv_elementwise_add_
activation
_fuse_pass.h"
#include <gflags/gflags.h>
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <vector>
#include <vector>
...
@@ -20,7 +20,7 @@
...
@@ -20,7 +20,7 @@
#include "paddle/fluid/lite/api/cxx_api.h"
#include "paddle/fluid/lite/api/cxx_api.h"
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/mir/graph_visualize_pass.h"
#include "paddle/fluid/lite/core/mir/graph_visualize_pass.h"
#include "paddle/fluid/lite/core/mir/passes.h"
#include "paddle/fluid/lite/core/mir/
use_
passes.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/program.h"
#include "paddle/fluid/lite/core/program.h"
...
@@ -135,11 +135,11 @@ TEST(conv_elementwise_add_relu_fuse_pass, fuse_test_op) {
...
@@ -135,11 +135,11 @@ TEST(conv_elementwise_add_relu_fuse_pass, fuse_test_op) {
auto
graph
=
BuildGraph
(
&
program_desc
,
scope
,
places
);
auto
graph
=
BuildGraph
(
&
program_desc
,
scope
,
places
);
Visualize
(
graph
.
get
());
Visualize
(
graph
.
get
());
const
int
num_nodes
=
graph
->
nodes
().
size
();
const
int
num_nodes
=
graph
->
nodes
().
size
();
auto
*
fuser
=
new
ConvElementwiseAdd
ReLU
FusePass
;
auto
*
fuser
=
new
ConvElementwiseAdd
Activation
FusePass
;
fuser
->
Apply
(
graph
);
fuser
->
Apply
(
graph
);
Visualize
(
graph
.
get
());
Visualize
(
graph
.
get
());
ASSERT_EQ
(
graph
->
nodes
().
size
(),
num_nodes
-
5UL
*
2
/*nodes removed */
+
ASSERT_EQ
(
graph
->
nodes
().
size
(),
1UL
*
2
/* fused fc node
*/
);
num_nodes
-
5UL
*
2
/*nodes removed */
+
1UL
*
2
/* fused nodes
*/
);
}
}
}
// namespace fusion
}
// namespace fusion
...
...
paddle/fluid/lite/core/mir/elementwise_add_activation_fuse_pass.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/core/mir/elementwise_add_activation_fuse_pass.h"
#include <memory>
#include <vector>
#include "paddle/fluid/lite/core/mir/fusion/elementwise_add_activation_fuser.h"
#include "paddle/fluid/lite/core/mir/pass_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
mir
{
void
ElementwiseAddActivationFusePass
::
Apply
(
const
std
::
unique_ptr
<
SSAGraph
>&
graph
)
{
fusion
::
ElementwiseAddActivationFuser
fuser
(
"relu"
);
fuser
(
graph
.
get
());
}
}
// namespace mir
}
// namespace lite
}
// namespace paddle
REGISTER_MIR_PASS
(
lite_elementwise_add_activation_fuse_pass
,
paddle
::
lite
::
mir
::
ElementwiseAddActivationFusePass
);
paddle/fluid/lite/core/mir/
conv_elementwise_add_relu
_fuse_pass.h
→
paddle/fluid/lite/core/mir/
elementwise_add_activation
_fuse_pass.h
浏览文件 @
e8ebb084
...
@@ -22,7 +22,7 @@ namespace paddle {
...
@@ -22,7 +22,7 @@ namespace paddle {
namespace
lite
{
namespace
lite
{
namespace
mir
{
namespace
mir
{
class
ConvElementwiseAddReLU
FusePass
:
public
ProgramPass
{
class
ElementwiseAddActivation
FusePass
:
public
ProgramPass
{
public:
public:
void
Apply
(
const
std
::
unique_ptr
<
SSAGraph
>&
graph
)
override
;
void
Apply
(
const
std
::
unique_ptr
<
SSAGraph
>&
graph
)
override
;
};
};
...
...
paddle/fluid/lite/core/mir/elementwise_add_activation_fuse_pass_test.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/core/mir/elementwise_add_activation_fuse_pass.h"
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <vector>
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/lite/api/cxx_api.h"
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/mir/graph_visualize_pass.h"
#include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/program.h"
namespace
paddle
{
namespace
lite
{
namespace
mir
{
namespace
fusion
{
std
::
unique_ptr
<
SSAGraph
>
BuildGraph
(
framework
::
ProgramDesc
*
program_desc
,
const
std
::
shared_ptr
<
Scope
>&
scope
,
const
std
::
vector
<
Place
>&
valid_places
)
{
auto
*
main_block
=
program_desc
->
MutableBlock
(
0
);
auto
*
add_1
=
main_block
->
AppendOp
();
auto
*
add_2
=
main_block
->
AppendOp
();
auto
*
relu_1
=
main_block
->
AppendOp
();
auto
*
relu_2
=
main_block
->
AppendOp
();
main_block
->
Var
(
"x_1"
);
main_block
->
Var
(
"y_1"
);
main_block
->
Var
(
"add_out_1"
);
main_block
->
Var
(
"relu_out_1"
);
main_block
->
Var
(
"y_2"
);
main_block
->
Var
(
"add_out_2"
);
main_block
->
Var
(
"out"
);
scope
->
Var
(
"x_1"
)
->
GetMutable
<
lite
::
Tensor
>
();
scope
->
Var
(
"y_1"
)
->
GetMutable
<
lite
::
Tensor
>
();
scope
->
Var
(
"add_out_1"
)
->
GetMutable
<
lite
::
Tensor
>
();
scope
->
Var
(
"relu_out_1"
)
->
GetMutable
<
lite
::
Tensor
>
();
scope
->
Var
(
"y_2"
)
->
GetMutable
<
lite
::
Tensor
>
();
scope
->
Var
(
"add_out_2"
)
->
GetMutable
<
lite
::
Tensor
>
();
scope
->
Var
(
"out"
)
->
GetMutable
<
lite
::
Tensor
>
();
add_1
->
SetType
(
"elementwise_add"
);
add_1
->
SetInput
(
"X"
,
{
"x_1"
});
add_1
->
SetInput
(
"Y"
,
{
"y_1"
});
add_1
->
SetOutput
(
"Out"
,
{
"add_out_1"
});
add_1
->
SetAttr
(
"axis"
,
1
);
relu_1
->
SetType
(
"relu"
);
relu_1
->
SetInput
(
"X"
,
{
"add_out_1"
});
relu_1
->
SetOutput
(
"Out"
,
{
"relu_out_1"
});
add_2
->
SetType
(
"elementwise_add"
);
add_2
->
SetInput
(
"X"
,
{
"relu_out_1"
});
add_2
->
SetInput
(
"Y"
,
{
"y_2"
});
add_2
->
SetOutput
(
"Out"
,
{
"add_out_2"
});
add_2
->
SetAttr
(
"axis"
,
1
);
relu_2
->
SetType
(
"relu"
);
relu_2
->
SetInput
(
"X"
,
{
"add_out_2"
});
relu_2
->
SetOutput
(
"Out"
,
{
"out"
});
program_desc
->
Flush
();
lite
::
Program
program
(
*
program_desc
->
Proto
(),
scope
,
valid_places
);
auto
graph
=
std
::
unique_ptr
<
SSAGraph
>
(
new
SSAGraph
());
graph
->
Build
(
program
,
valid_places
);
return
graph
;
}
TEST
(
elementwise_add_activation_fuse_pass
,
graph_test
)
{
framework
::
ProgramDesc
program_desc
;
std
::
vector
<
Place
>
places
{{
TARGET
(
kHost
),
PRECISION
(
kFloat
)}};
auto
scope
=
std
::
make_shared
<
Scope
>
();
auto
graph
=
BuildGraph
(
&
program_desc
,
scope
,
places
);
ASSERT_EQ
(
graph
->
nodes
().
size
(),
7UL
/*vars*/
+
4UL
/*ops*/
+
1UL
/* SSAGraph tmp node*/
);
}
TEST
(
elementwise_add_activation_fuse_pass
,
fuse_test_op
)
{
framework
::
ProgramDesc
program_desc
;
std
::
vector
<
Place
>
places
{{
TARGET
(
kHost
),
PRECISION
(
kFloat
)}};
auto
scope
=
std
::
make_shared
<
Scope
>
();
auto
graph
=
BuildGraph
(
&
program_desc
,
scope
,
places
);
Visualize
(
graph
.
get
());
const
int
num_nodes
=
graph
->
nodes
().
size
();
auto
*
fuser
=
new
ElementwiseAddActivationFusePass
;
fuser
->
Apply
(
graph
);
Visualize
(
graph
.
get
());
ASSERT_EQ
(
graph
->
nodes
().
size
(),
num_nodes
-
3UL
*
2
/*nodes removed */
+
1UL
*
2
/* fused nodes*/
);
}
}
// namespace fusion
}
// namespace mir
}
// namespace lite
}
// namespace paddle
USE_LITE_OP
(
elementwise_add
);
USE_LITE_OP
(
fusion_elementwise_add_activation
);
USE_LITE_OP
(
relu
);
paddle/fluid/lite/core/mir/fc_fuse_pass_test.cc
浏览文件 @
e8ebb084
...
@@ -17,7 +17,7 @@
...
@@ -17,7 +17,7 @@
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <vector>
#include <vector>
#include "paddle/fluid/lite/api/cxx_api.h"
#include "paddle/fluid/lite/api/cxx_api.h"
#include "paddle/fluid/lite/core/mir/passes.h"
#include "paddle/fluid/lite/core/mir/
use_
passes.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/op_registry.h"
DEFINE_string
(
model_dir
,
""
,
""
);
DEFINE_string
(
model_dir
,
""
,
""
);
...
...
paddle/fluid/lite/core/mir/fusion/CMakeLists.txt
浏览文件 @
e8ebb084
cc_library
(
fuse_fc
cc_library
(
fuse_fc
SRCS fc_fuser.cc
SRCS fc_fuser.cc
DEPS pattern_matcher_high_api
)
DEPS pattern_matcher_high_api
)
cc_library
(
fuse_conv_elementwise_add_
relu
cc_library
(
fuse_conv_elementwise_add_
activation
SRCS conv_elementwise_add_
relu
_fuser.cc
SRCS conv_elementwise_add_
activation
_fuser.cc
DEPS pattern_matcher_high_api
)
DEPS pattern_matcher_high_api
)
cc_library
(
fuse_conv_bn
cc_library
(
fuse_conv_bn
SRCS conv_bn_fuser.cc
SRCS conv_bn_fuser.cc
DEPS pattern_matcher_high_api
)
DEPS pattern_matcher_high_api
)
cc_library
(
fuse_elementwise_add_activation
SRCS elementwise_add_activation_fuser.cc
DEPS pattern_matcher_high_api
)
cc_library
(
fuse_quant_dequant
cc_library
(
fuse_quant_dequant
SRCS quant_dequant_op_fuser.cc
SRCS quant_dequant_op_fuser.cc
...
@@ -14,9 +17,10 @@ cc_library(fuse_quant_dequant
...
@@ -14,9 +17,10 @@ cc_library(fuse_quant_dequant
set
(
mir_fusers
set
(
mir_fusers
fuse_fc
fuse_fc
fuse_conv_elementwise_add_
relu
fuse_conv_elementwise_add_
activation
fuse_conv_bn
fuse_conv_bn
fuse_quant_dequant
fuse_quant_dequant
fuse_elementwise_add_activation
CACHE INTERNAL
"fusers"
)
CACHE INTERNAL
"fusers"
)
if
(
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
if
(
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
...
...
paddle/fluid/lite/core/mir/fusion/conv_bn_fuser.cc
浏览文件 @
e8ebb084
...
@@ -84,7 +84,7 @@ void ConvBNFuser::InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) {
...
@@ -84,7 +84,7 @@ void ConvBNFuser::InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) {
->
GetMutable
<
lite
::
Tensor
>
();
->
GetMutable
<
lite
::
Tensor
>
();
size_t
bias_size
=
bn_scale_t
->
data_size
();
size_t
bias_size
=
bn_scale_t
->
data_size
();
auto
bn_scale_d
=
bn_scale_t
->
mutable_data
<
float
>
();
auto
bn_scale_d
=
bn_scale_t
->
mutable_data
<
float
>
();
CHECK
(
bias_size
==
conv_weight_dims
[
0
]
)
CHECK
_EQ
(
bias_size
,
static_cast
<
size_t
>
(
conv_weight_dims
[
0
])
)
<<
"The BN bias's size should be equal to the size of the first "
<<
"The BN bias's size should be equal to the size of the first "
<<
"dim size of the conv weights"
;
<<
"dim size of the conv weights"
;
...
...
paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_
relu
_fuser.cc
→
paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_
activation
_fuser.cc
浏览文件 @
e8ebb084
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_
relu
_fuser.h"
#include "paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_
activation
_fuser.h"
#include <memory>
#include <memory>
#include <vector>
#include <vector>
...
@@ -21,7 +21,7 @@ namespace lite {
...
@@ -21,7 +21,7 @@ namespace lite {
namespace
mir
{
namespace
mir
{
namespace
fusion
{
namespace
fusion
{
void
ConvElementwiseAdd
ReLU
Fuser
::
BuildPattern
()
{
void
ConvElementwiseAdd
Activation
Fuser
::
BuildPattern
()
{
// create input nodes.
// create input nodes.
auto
*
input
=
auto
*
input
=
VarNode
(
"input"
)
->
assert_is_op_input
(
conv_type_
,
"Input"
)
->
AsInput
();
VarNode
(
"input"
)
->
assert_is_op_input
(
conv_type_
,
"Input"
)
->
AsInput
();
...
@@ -36,7 +36,8 @@ void ConvElementwiseAddReLUFuser::BuildPattern() {
...
@@ -36,7 +36,8 @@ void ConvElementwiseAddReLUFuser::BuildPattern() {
auto
*
add
=
OpNode
(
"add"
,
"elementwise_add"
)
auto
*
add
=
OpNode
(
"add"
,
"elementwise_add"
)
->
assert_is_op
(
"elementwise_add"
)
->
assert_is_op
(
"elementwise_add"
)
->
AsIntermediate
();
->
AsIntermediate
();
auto
*
relu
=
OpNode
(
"relu"
,
"relu"
)
->
assert_is_op
(
"relu"
)
->
AsIntermediate
();
auto
*
act
=
OpNode
(
"act"
,
act_type_
)
->
assert_is_op
(
act_type_
)
->
AsIntermediate
();
// create intermediate nodes
// create intermediate nodes
auto
*
conv2d_out
=
VarNode
(
"conv2d_out"
)
auto
*
conv2d_out
=
VarNode
(
"conv2d_out"
)
...
@@ -45,22 +46,23 @@ void ConvElementwiseAddReLUFuser::BuildPattern() {
...
@@ -45,22 +46,23 @@ void ConvElementwiseAddReLUFuser::BuildPattern() {
->
AsIntermediate
();
->
AsIntermediate
();
auto
*
add_out
=
VarNode
(
"add_out"
)
auto
*
add_out
=
VarNode
(
"add_out"
)
->
assert_is_op_output
(
"elementwise_add"
,
"Out"
)
->
assert_is_op_output
(
"elementwise_add"
,
"Out"
)
->
assert_is_op_input
(
"relu"
,
"X"
)
->
assert_is_op_input
(
act_type_
,
"X"
)
->
AsIntermediate
();
->
AsIntermediate
();
// create output node
// create output node
auto
*
out
=
VarNode
(
"output"
)
->
assert_is_op_output
(
"relu"
,
"Out"
)
->
AsOutput
();
auto
*
out
=
VarNode
(
"output"
)
->
assert_is_op_output
(
act_type_
,
"Out"
)
->
AsOutput
();
// create topology.
// create topology.
std
::
vector
<
PMNode
*>
conv2d_inputs
{
filter
,
input
};
std
::
vector
<
PMNode
*>
conv2d_inputs
{
filter
,
input
};
std
::
vector
<
PMNode
*>
add_inputs
{
conv2d_out
,
bias
};
std
::
vector
<
PMNode
*>
add_inputs
{
conv2d_out
,
bias
};
conv2d_inputs
>>
*
conv2d
>>
*
conv2d_out
;
conv2d_inputs
>>
*
conv2d
>>
*
conv2d_out
;
add_inputs
>>
*
add
>>
*
add_out
;
add_inputs
>>
*
add
>>
*
add_out
;
*
add_out
>>
*
relu
>>
*
out
;
*
add_out
>>
*
act
>>
*
out
;
}
}
void
ConvElementwiseAdd
ReLUFuser
::
InsertNewNode
(
SSAGraph
*
graph
,
void
ConvElementwiseAdd
ActivationFuser
::
InsertNewNode
(
const
key2nodes_t
&
matched
)
{
SSAGraph
*
graph
,
const
key2nodes_t
&
matched
)
{
auto
op_desc
=
GenOpDesc
(
matched
);
auto
op_desc
=
GenOpDesc
(
matched
);
auto
conv_op
=
LiteOpRegistry
::
Global
().
Create
(
conv_type_
);
auto
conv_op
=
LiteOpRegistry
::
Global
().
Create
(
conv_type_
);
auto
conv_old
=
matched
.
at
(
"conv2d"
)
->
stmt
()
->
op
;
auto
conv_old
=
matched
.
at
(
"conv2d"
)
->
stmt
()
->
op
;
...
@@ -76,7 +78,8 @@ void ConvElementwiseAddReLUFuser::InsertNewNode(SSAGraph* graph,
...
@@ -76,7 +78,8 @@ void ConvElementwiseAddReLUFuser::InsertNewNode(SSAGraph* graph,
IR_NODE_LINK_TO
(
new_op_node
,
matched
.
at
(
"output"
));
IR_NODE_LINK_TO
(
new_op_node
,
matched
.
at
(
"output"
));
}
}
cpp
::
OpDesc
ConvElementwiseAddReLUFuser
::
GenOpDesc
(
const
key2nodes_t
&
matched
)
{
cpp
::
OpDesc
ConvElementwiseAddActivationFuser
::
GenOpDesc
(
const
key2nodes_t
&
matched
)
{
auto
*
desc
=
matched
.
at
(
"conv2d"
)
->
stmt
()
->
op_info
();
auto
*
desc
=
matched
.
at
(
"conv2d"
)
->
stmt
()
->
op_info
();
cpp
::
OpDesc
op_desc
=
*
desc
;
cpp
::
OpDesc
op_desc
=
*
desc
;
...
@@ -97,6 +100,7 @@ cpp::OpDesc ConvElementwiseAddReLUFuser::GenOpDesc(const key2nodes_t& matched) {
...
@@ -97,6 +100,7 @@ cpp::OpDesc ConvElementwiseAddReLUFuser::GenOpDesc(const key2nodes_t& matched) {
op_desc
.
SetAttr
(
"paddings"
,
desc
->
GetAttr
<
std
::
vector
<
int
>>
(
"paddings"
));
op_desc
.
SetAttr
(
"paddings"
,
desc
->
GetAttr
<
std
::
vector
<
int
>>
(
"paddings"
));
op_desc
.
SetAttr
(
"groups"
,
desc
->
GetAttr
<
int
>
(
"groups"
));
op_desc
.
SetAttr
(
"groups"
,
desc
->
GetAttr
<
int
>
(
"groups"
));
op_desc
.
SetAttr
(
"dilations"
,
desc
->
GetAttr
<
std
::
vector
<
int
>>
(
"dilations"
));
op_desc
.
SetAttr
(
"dilations"
,
desc
->
GetAttr
<
std
::
vector
<
int
>>
(
"dilations"
));
// TODO(sangoly): support other activation types
op_desc
.
SetAttr
(
"fuse_relu"
,
true
);
op_desc
.
SetAttr
(
"fuse_relu"
,
true
);
return
op_desc
;
return
op_desc
;
}
}
...
...
paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_activation_fuser.h
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include "paddle/fluid/lite/core/mir/pattern_matcher_high_api.h"
namespace
paddle
{
namespace
lite
{
namespace
mir
{
namespace
fusion
{
class
ConvElementwiseAddActivationFuser
:
public
FuseBase
{
public:
explicit
ConvElementwiseAddActivationFuser
(
const
std
::
string
&
conv_type
,
const
std
::
string
&
act_type
)
{
CHECK
(
act_type
==
"relu"
)
<<
"Only relu activation be supported now"
;
conv_type_
=
conv_type
;
act_type_
=
act_type
;
}
void
BuildPattern
()
override
;
void
InsertNewNode
(
SSAGraph
*
graph
,
const
key2nodes_t
&
matched
)
override
;
private:
cpp
::
OpDesc
GenOpDesc
(
const
key2nodes_t
&
matched
)
override
;
std
::
string
conv_type_
;
std
::
string
act_type_
;
};
}
// namespace fusion
}
// namespace mir
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/core/mir/fusion/elementwise_add_activation_fuser.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/core/mir/fusion/elementwise_add_activation_fuser.h"
#include <memory>
#include <vector>
namespace
paddle
{
namespace
lite
{
namespace
mir
{
namespace
fusion
{
void
ElementwiseAddActivationFuser
::
BuildPattern
()
{
// create input nodes.
auto
*
x
=
VarNode
(
"x"
)
->
assert_is_op_input
(
"elementwise_add"
,
"X"
)
->
AsInput
();
auto
*
y
=
VarNode
(
"y"
)
->
assert_is_op_input
(
"elementwise_add"
,
"Y"
)
->
AsInput
();
// create op nodes
auto
*
add
=
OpNode
(
"add"
,
"elementwise_add"
)
->
assert_is_op
(
"elementwise_add"
)
->
AsIntermediate
();
auto
*
act
=
OpNode
(
"act"
,
act_type_
)
->
assert_is_op
(
act_type_
)
->
AsIntermediate
();
// create intermediate nodes
auto
*
add_out
=
VarNode
(
"add_out"
)
->
assert_is_op_output
(
"elementwise_add"
,
"Out"
)
->
assert_is_op_input
(
act_type_
,
"X"
)
->
AsIntermediate
();
// create output node
auto
*
out
=
VarNode
(
"output"
)
->
assert_is_op_output
(
act_type_
,
"Out"
)
->
AsOutput
();
// create topology.
std
::
vector
<
PMNode
*>
add_inputs
{
x
,
y
};
add_inputs
>>
*
add
>>
*
add_out
;
*
add_out
>>
*
act
>>
*
out
;
}
void
ElementwiseAddActivationFuser
::
InsertNewNode
(
SSAGraph
*
graph
,
const
key2nodes_t
&
matched
)
{
auto
op_desc
=
GenOpDesc
(
matched
);
auto
op
=
LiteOpRegistry
::
Global
().
Create
(
"fusion_elementwise_add_activation"
);
auto
old_op
=
matched
.
at
(
"add"
)
->
stmt
()
->
op
;
auto
*
scope
=
old_op
->
scope
();
auto
&
valid_places
=
old_op
->
valid_places
();
op
->
Attach
(
op_desc
,
scope
);
auto
*
new_op_node
=
graph
->
GraphCreateInstructNode
(
op
,
valid_places
);
IR_NODE_LINK_TO
(
matched
.
at
(
"x"
),
new_op_node
);
IR_NODE_LINK_TO
(
matched
.
at
(
"y"
),
new_op_node
);
IR_NODE_LINK_TO
(
new_op_node
,
matched
.
at
(
"output"
));
}
cpp
::
OpDesc
ElementwiseAddActivationFuser
::
GenOpDesc
(
const
key2nodes_t
&
matched
)
{
auto
*
desc
=
matched
.
at
(
"add"
)
->
stmt
()
->
op_info
();
cpp
::
OpDesc
op_desc
;
op_desc
.
SetType
(
"fusion_elementwise_add_activation"
);
op_desc
.
SetInput
(
"X"
,
{
matched
.
at
(
"x"
)
->
arg
()
->
name
});
op_desc
.
SetInput
(
"Y"
,
{
matched
.
at
(
"y"
)
->
arg
()
->
name
});
op_desc
.
SetOutput
(
"Out"
,
{
matched
.
at
(
"output"
)
->
arg
()
->
name
});
op_desc
.
SetAttr
(
"axis"
,
desc
->
GetAttr
<
int
>
(
"axis"
));
op_desc
.
SetAttr
(
"act_type"
,
act_type_
);
return
op_desc
;
}
}
// namespace fusion
}
// namespace mir
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/core/mir/fusion/
conv_elementwise_add_relu
_fuser.h
→
paddle/fluid/lite/core/mir/fusion/
elementwise_add_activation
_fuser.h
浏览文件 @
e8ebb084
...
@@ -23,16 +23,16 @@ namespace lite {
...
@@ -23,16 +23,16 @@ namespace lite {
namespace
mir
{
namespace
mir
{
namespace
fusion
{
namespace
fusion
{
class
ConvElementwiseAddReLU
Fuser
:
public
FuseBase
{
class
ElementwiseAddActivation
Fuser
:
public
FuseBase
{
public:
public:
explicit
ConvElementwiseAddReLUFuser
(
const
std
::
string
&
conv
_type
)
explicit
ElementwiseAddActivationFuser
(
const
std
::
string
&
act
_type
)
:
conv_type_
(
conv
_type
)
{}
:
act_type_
(
act
_type
)
{}
void
BuildPattern
()
override
;
void
BuildPattern
()
override
;
void
InsertNewNode
(
SSAGraph
*
graph
,
const
key2nodes_t
&
matched
)
override
;
void
InsertNewNode
(
SSAGraph
*
graph
,
const
key2nodes_t
&
matched
)
override
;
private:
private:
cpp
::
OpDesc
GenOpDesc
(
const
key2nodes_t
&
matched
)
override
;
cpp
::
OpDesc
GenOpDesc
(
const
key2nodes_t
&
matched
)
override
;
std
::
string
conv
_type_
;
std
::
string
act
_type_
;
};
};
}
// namespace fusion
}
// namespace fusion
...
...
paddle/fluid/lite/core/mir/generate_program_pass.cc
浏览文件 @
e8ebb084
...
@@ -24,7 +24,7 @@ namespace lite {
...
@@ -24,7 +24,7 @@ namespace lite {
namespace
mir
{
namespace
mir
{
void
GenerateProgramPass
::
Apply
(
const
std
::
unique_ptr
<
SSAGraph
>&
graph
)
{
void
GenerateProgramPass
::
Apply
(
const
std
::
unique_ptr
<
SSAGraph
>&
graph
)
{
LOG
(
INFO
)
<<
"final program
\n
"
<<
Visualize
(
graph
.
get
());
VLOG
(
4
)
<<
"final program
\n
"
<<
Visualize
(
graph
.
get
());
for
(
auto
&
item
:
graph
->
StmtTopologicalOrder
())
{
for
(
auto
&
item
:
graph
->
StmtTopologicalOrder
())
{
if
(
item
->
IsStmt
())
{
if
(
item
->
IsStmt
())
{
auto
&
stmt
=
item
->
AsStmt
();
auto
&
stmt
=
item
->
AsStmt
();
...
...
paddle/fluid/lite/core/mir/ssa_graph.cc
浏览文件 @
e8ebb084
...
@@ -24,8 +24,10 @@ namespace lite {
...
@@ -24,8 +24,10 @@ namespace lite {
namespace
mir
{
namespace
mir
{
bool
SSAGraph
::
CheckBidirectionalConnection
()
{
bool
SSAGraph
::
CheckBidirectionalConnection
()
{
LOG
(
INFO
)
<<
"node count "
<<
node_storage_
.
size
();
VLOG
(
4
)
<<
"node count "
<<
node_storage_
.
size
();
for
(
auto
&
node
:
node_storage_
)
{
for
(
auto
&
node
:
node_storage_
)
{
if
(
node
.
IsStmt
())
VLOG
(
4
)
<<
node
.
AsStmt
().
op_info
()
->
Type
();
if
(
node
.
IsArg
())
VLOG
(
4
)
<<
node
.
AsArg
().
name
<<
" "
<<
node
.
AsArg
().
id
;
for
(
auto
*
in
:
node
.
inlinks
)
{
for
(
auto
*
in
:
node
.
inlinks
)
{
CHECK
(
in
->
outlinks
.
end
()
!=
CHECK
(
in
->
outlinks
.
end
()
!=
std
::
find
(
in
->
outlinks
.
begin
(),
in
->
outlinks
.
end
(),
&
node
));
std
::
find
(
in
->
outlinks
.
begin
(),
in
->
outlinks
.
end
(),
&
node
));
...
@@ -121,6 +123,7 @@ void SSAGraph::Build(const Program &program,
...
@@ -121,6 +123,7 @@ void SSAGraph::Build(const Program &program,
std
::
unordered_map
<
std
::
string
,
mir
::
Node
*>
arg_update_node_map_
;
std
::
unordered_map
<
std
::
string
,
mir
::
Node
*>
arg_update_node_map_
;
for
(
auto
&
op
:
program
.
ops
())
{
for
(
auto
&
op
:
program
.
ops
())
{
VLOG
(
3
)
<<
op
->
op_info
()
->
Type
();
auto
*
op_node
=
GraphCreateInstructNode
(
op
,
valid_places
);
auto
*
op_node
=
GraphCreateInstructNode
(
op
,
valid_places
);
for
(
const
std
::
string
&
name
:
op
->
op_info
()
->
input_names
())
{
for
(
const
std
::
string
&
name
:
op
->
op_info
()
->
input_names
())
{
mir
::
Node
*
arg_node
=
nullptr
;
mir
::
Node
*
arg_node
=
nullptr
;
...
...
paddle/fluid/lite/core/mir/ssa_graph_test.cc
浏览文件 @
e8ebb084
...
@@ -17,7 +17,7 @@
...
@@ -17,7 +17,7 @@
#include <memory>
#include <memory>
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/lite/core/mir/graph_visualize_pass.h"
#include "paddle/fluid/lite/core/mir/graph_visualize_pass.h"
#include "paddle/fluid/lite/core/mir/passes.h"
#include "paddle/fluid/lite/core/mir/
use_
passes.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/program_fake_utils.h"
#include "paddle/fluid/lite/core/program_fake_utils.h"
...
...
paddle/fluid/lite/core/mir/passes.h
→
paddle/fluid/lite/core/mir/
use_
passes.h
浏览文件 @
e8ebb084
...
@@ -15,14 +15,6 @@
...
@@ -15,14 +15,6 @@
#pragma once
#pragma once
#include "paddle/fluid/lite/core/mir/pass_registry.h"
#include "paddle/fluid/lite/core/mir/pass_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
mir
{}
// namespace mir
}
// namespace lite
}
// namespace paddle
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#endif
USE_MIR_PASS
(
demo
);
USE_MIR_PASS
(
demo
);
USE_MIR_PASS
(
static_kernel_pick_pass
);
USE_MIR_PASS
(
static_kernel_pick_pass
);
USE_MIR_PASS
(
variable_place_inference_pass
);
USE_MIR_PASS
(
variable_place_inference_pass
);
...
@@ -34,5 +26,6 @@ USE_MIR_PASS(runtime_context_assign_pass);
...
@@ -34,5 +26,6 @@ USE_MIR_PASS(runtime_context_assign_pass);
USE_MIR_PASS
(
lite_conv_bn_fuse_pass
);
USE_MIR_PASS
(
lite_conv_bn_fuse_pass
);
USE_MIR_PASS
(
graph_visualze
);
USE_MIR_PASS
(
graph_visualze
);
USE_MIR_PASS
(
lite_fc_fuse_pass
);
USE_MIR_PASS
(
lite_fc_fuse_pass
);
USE_MIR_PASS
(
lite_conv_elementwise_add_act_fuse_pass
);
USE_MIR_PASS
(
lite_conv_elementwise_add_activation_fuse_pass
);
USE_MIR_PASS
(
lite_elementwise_add_activation_fuse_pass
);
USE_MIR_PASS
(
lite_quant_dequant_fuse_pass
);
USE_MIR_PASS
(
lite_quant_dequant_fuse_pass
);
paddle/fluid/lite/core/mir/variable_place_inference_pass_test.cc
浏览文件 @
e8ebb084
...
@@ -13,7 +13,7 @@
...
@@ -13,7 +13,7 @@
// limitations under the License.
// limitations under the License.
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include "paddle/fluid/lite/core/mir/passes.h"
#include "paddle/fluid/lite/core/mir/
use_
passes.h"
#include "paddle/fluid/lite/core/optimizer.h"
#include "paddle/fluid/lite/core/optimizer.h"
#include "paddle/fluid/lite/core/program_fake_utils.h"
#include "paddle/fluid/lite/core/program_fake_utils.h"
#include "paddle/fluid/lite/kernels/cuda/use_kernels.h"
#include "paddle/fluid/lite/kernels/cuda/use_kernels.h"
...
...
paddle/fluid/lite/core/op_lite.h
浏览文件 @
e8ebb084
...
@@ -54,9 +54,7 @@ class OpLite : public Registry {
...
@@ -54,9 +54,7 @@ class OpLite : public Registry {
OpLite
()
=
default
;
OpLite
()
=
default
;
explicit
OpLite
(
const
std
::
string
&
type
)
:
op_type_
(
type
)
{}
explicit
OpLite
(
const
std
::
string
&
type
)
:
op_type_
(
type
)
{}
explicit
OpLite
(
const
std
::
vector
<
Place
>
&
valid_places
)
explicit
OpLite
(
const
std
::
vector
<
Place
>
&
valid_places
)
:
valid_places_
(
valid_places
)
{
:
valid_places_
(
valid_places
)
{}
LOG
(
INFO
)
<<
"valid places "
<<
valid_places
.
size
();
}
void
SetValidPlaces
(
const
std
::
vector
<
Place
>
&
places
)
{
void
SetValidPlaces
(
const
std
::
vector
<
Place
>
&
places
)
{
VLOG
(
3
)
<<
"valid places "
<<
valid_places_
.
size
();
VLOG
(
3
)
<<
"valid places "
<<
valid_places_
.
size
();
...
...
paddle/fluid/lite/core/optimizer.h
浏览文件 @
e8ebb084
...
@@ -50,7 +50,10 @@ class Optimizer {
...
@@ -50,7 +50,10 @@ class Optimizer {
RunPasses
(
std
::
vector
<
std
::
string
>
{{
RunPasses
(
std
::
vector
<
std
::
string
>
{{
"lite_quant_dequant_fuse_pass"
,
//
"lite_quant_dequant_fuse_pass"
,
//
"lite_conv_bn_fuse_pass"
,
//
"lite_conv_bn_fuse_pass"
,
//
"lite_conv_elementwise_add_act_fuse_pass"
,
//
"lite_conv_elementwise_add_activation_fuse_pass"
,
//
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
"lite_elementwise_add_activation_fuse_pass"
,
//
#endif
"lite_fc_fuse_pass"
,
//
"lite_fc_fuse_pass"
,
//
"static_kernel_pick_pass"
,
//
"static_kernel_pick_pass"
,
//
"variable_place_inference_pass"
,
//
"variable_place_inference_pass"
,
//
...
@@ -60,8 +63,6 @@ class Optimizer {
...
@@ -60,8 +63,6 @@ class Optimizer {
"argument_type_display_pass"
,
//
"argument_type_display_pass"
,
//
"io_copy_kernel_pick_pass"
,
//
"io_copy_kernel_pick_pass"
,
//
"variable_place_inference_pass"
,
//
"variable_place_inference_pass"
,
//
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#endif
"runtime_context_assign_pass"
,
//
"runtime_context_assign_pass"
,
//
}});
}});
}
else
{
}
else
{
...
...
paddle/fluid/lite/core/optimizer_test.cc
浏览文件 @
e8ebb084
...
@@ -18,8 +18,8 @@
...
@@ -18,8 +18,8 @@
#include <utility>
#include <utility>
#include "paddle/fluid/lite/core/mir/generate_program_pass.h"
#include "paddle/fluid/lite/core/mir/generate_program_pass.h"
#include "paddle/fluid/lite/core/mir/pass_manager.h"
#include "paddle/fluid/lite/core/mir/pass_manager.h"
#include "paddle/fluid/lite/core/mir/passes.h"
#include "paddle/fluid/lite/core/mir/static_kernel_pick_pass.h"
#include "paddle/fluid/lite/core/mir/static_kernel_pick_pass.h"
#include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/core/program_fake_utils.h"
#include "paddle/fluid/lite/core/program_fake_utils.h"
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/lite/core/profile/basic_profiler.cc
浏览文件 @
e8ebb084
...
@@ -19,7 +19,7 @@ namespace lite {
...
@@ -19,7 +19,7 @@ namespace lite {
namespace
profile
{
namespace
profile
{
const
int
BasicTimer
::
data_w
=
10
;
const
int
BasicTimer
::
data_w
=
10
;
const
int
BasicTimer
::
name_w
=
1
0
;
const
int
BasicTimer
::
name_w
=
1
5
;
}
// namespace profile
}
// namespace profile
}
// namespace lite
}
// namespace lite
...
...
paddle/fluid/lite/core/tensor.h
浏览文件 @
e8ebb084
...
@@ -91,6 +91,18 @@ class DDimBase {
...
@@ -91,6 +91,18 @@ class DDimBase {
return
os
;
return
os
;
}
}
friend
bool
operator
==
(
const
DDimBase
&
a
,
const
DDimBase
&
b
)
{
if
(
a
.
size
()
!=
b
.
size
())
return
false
;
for
(
size_t
i
=
0
;
i
<
a
.
size
();
i
++
)
{
if
(
a
[
i
]
!=
b
[
i
])
return
false
;
}
return
true
;
}
friend
bool
operator
!=
(
const
DDimBase
&
a
,
const
DDimBase
&
b
)
{
return
!
(
a
==
b
);
}
private:
private:
DDimT
*
self
()
{
return
static_cast
<
DDimT
*>
(
this
);
}
DDimT
*
self
()
{
return
static_cast
<
DDimT
*>
(
this
);
}
const
DDimT
*
const_self
()
const
{
return
static_cast
<
const
DDimT
*>
(
this
);
}
const
DDimT
*
const_self
()
const
{
return
static_cast
<
const
DDimT
*>
(
this
);
}
...
@@ -154,6 +166,7 @@ class TensorBase {
...
@@ -154,6 +166,7 @@ class TensorBase {
const
void
*
raw_data
()
const
{
return
const_self
()
->
data
();
}
const
void
*
raw_data
()
const
{
return
const_self
()
->
data
();
}
size_t
data_size
()
const
{
return
const_self
()
->
dims
().
production
();
}
size_t
data_size
()
const
{
return
const_self
()
->
dims
().
production
();
}
size_t
memory_size
()
const
{
return
const_self
()
->
memory_size
();
}
void
ShareDataWith
(
const
TensorBase
&
other
)
{
self
()
->
ShareDataWith
(
other
);
}
void
ShareDataWith
(
const
TensorBase
&
other
)
{
self
()
->
ShareDataWith
(
other
);
}
void
CopyDataFrom
(
const
TensorBase
&
other
)
{
self
()
->
CopyDataFrom
(
other
);
}
void
CopyDataFrom
(
const
TensorBase
&
other
)
{
self
()
->
CopyDataFrom
(
other
);
}
...
@@ -175,5 +188,13 @@ class TensorBase {
...
@@ -175,5 +188,13 @@ class TensorBase {
}
}
};
};
template
<
typename
TensorT
>
bool
TensorCompareWith
(
const
TensorT
&
a
,
const
TensorT
&
b
)
{
if
(
a
.
dims
()
!=
b
.
dims
())
return
false
;
LOG
(
INFO
)
<<
"data_size: "
<<
a
.
data_size
();
if
(
memcmp
(
a
.
raw_data
(),
b
.
raw_data
(),
a
.
data_size
())
!=
0
)
return
false
;
return
true
;
}
}
// namespace lite
}
// namespace lite
}
// namespace paddle
}
// namespace paddle
paddle/fluid/lite/kernels/arm/CMakeLists.txt
浏览文件 @
e8ebb084
...
@@ -11,10 +11,12 @@ cc_library(scale_compute_arm SRCS scale_compute.cc DEPS ${lite_kernel_deps} math
...
@@ -11,10 +11,12 @@ cc_library(scale_compute_arm SRCS scale_compute.cc DEPS ${lite_kernel_deps} math
cc_library
(
softmax_compute_arm SRCS softmax_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
softmax_compute_arm SRCS softmax_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
conv_compute_arm SRCS conv_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
conv_compute_arm SRCS conv_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
batch_norm_compute_arm SRCS batch_norm_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
batch_norm_compute_arm SRCS batch_norm_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
elementwise_
add_compute_arm SRCS elementwise_add
_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
elementwise_
compute_arm SRCS elementwise
_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
pool_compute_arm SRCS pool_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
pool_compute_arm SRCS pool_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
split_compute_arm SRCS split_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
split_compute_arm SRCS split_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
concat_compute_arm SRCS concat_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
dropout_compute_arm SRCS dropout_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
dropout_compute_arm SRCS dropout_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
transpose_compute_arm SRCS transpose_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
lite_cc_test
(
test_fc_compute_arm SRCS fc_compute_test.cc DEPS fc_compute_arm math_arm
)
lite_cc_test
(
test_fc_compute_arm SRCS fc_compute_test.cc DEPS fc_compute_arm math_arm
)
lite_cc_test
(
test_activation_compute_arm SRCS activation_compute_test.cc DEPS activation_compute_arm
)
lite_cc_test
(
test_activation_compute_arm SRCS activation_compute_test.cc DEPS activation_compute_arm
)
...
@@ -22,11 +24,13 @@ lite_cc_test(test_scale_compute_arm SRCS scale_compute_test.cc DEPS scale_comput
...
@@ -22,11 +24,13 @@ lite_cc_test(test_scale_compute_arm SRCS scale_compute_test.cc DEPS scale_comput
lite_cc_test
(
test_softmax_compute_arm SRCS softmax_compute_test.cc DEPS softmax_compute_arm
)
lite_cc_test
(
test_softmax_compute_arm SRCS softmax_compute_test.cc DEPS softmax_compute_arm
)
lite_cc_test
(
test_conv_compute_arm SRCS conv_compute_test.cc DEPS conv_compute_arm
)
lite_cc_test
(
test_conv_compute_arm SRCS conv_compute_test.cc DEPS conv_compute_arm
)
lite_cc_test
(
test_batch_norm_compute_arm SRCS batch_norm_compute_test.cc DEPS batch_norm_compute_arm
)
lite_cc_test
(
test_batch_norm_compute_arm SRCS batch_norm_compute_test.cc DEPS batch_norm_compute_arm
)
lite_cc_test
(
test_elementwise_
add_compute_arm SRCS elementwise_add_compute_test.cc DEPS elementwise_add
_compute_arm
)
lite_cc_test
(
test_elementwise_
compute_arm SRCS elementwise_compute_test.cc DEPS elementwise
_compute_arm
)
lite_cc_test
(
test_pool_compute_arm SRCS pool_compute_test.cc DEPS pool_compute_arm
)
lite_cc_test
(
test_pool_compute_arm SRCS pool_compute_test.cc DEPS pool_compute_arm
)
lite_cc_test
(
test_mul_compute_arm SRCS mul_compute_test.cc DEPS mul_compute_arm
)
lite_cc_test
(
test_mul_compute_arm SRCS mul_compute_test.cc DEPS mul_compute_arm
)
lite_cc_test
(
test_split_compute_arm SRCS split_compute_test.cc DEPS split_compute_arm
)
lite_cc_test
(
test_split_compute_arm SRCS split_compute_test.cc DEPS split_compute_arm
)
lite_cc_test
(
test_concat_compute_arm SRCS concat_compute_test.cc DEPS concat_compute_arm
)
lite_cc_test
(
test_dropout_compute_arm SRCS dropout_compute_test.cc DEPS dropout_compute_arm
)
lite_cc_test
(
test_dropout_compute_arm SRCS dropout_compute_test.cc DEPS dropout_compute_arm
)
lite_cc_test
(
test_transpose_compute_arm SRCS transpose_compute_test.cc DEPS transpose_compute_arm
)
set
(
arm_kernels
set
(
arm_kernels
fc_compute_arm
fc_compute_arm
...
@@ -36,10 +40,12 @@ set(arm_kernels
...
@@ -36,10 +40,12 @@ set(arm_kernels
softmax_compute_arm
softmax_compute_arm
conv_compute_arm
conv_compute_arm
batch_norm_compute_arm
batch_norm_compute_arm
elementwise_
add_
compute_arm
elementwise_compute_arm
pool_compute_arm
pool_compute_arm
split_compute_arm
split_compute_arm
concat_compute_arm
dropout_compute_arm
dropout_compute_arm
transpose_compute_arm
)
)
set
(
arm_kernels
"
${
arm_kernels
}
"
CACHE INTERNAL
"arm kernels"
)
set
(
arm_kernels
"
${
arm_kernels
}
"
CACHE INTERNAL
"arm kernels"
)
...
...
paddle/fluid/lite/kernels/arm/concat_compute.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/arm/concat_compute.h"
#include <string>
#include <vector>
#include "paddle/fluid/lite/arm/math/funcs.h"
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
arm
{
std
::
vector
<
size_t
>
stride_numel
(
const
DDim
&
ddim
)
{
std
::
vector
<
size_t
>
strides
(
ddim
.
size
());
strides
[
ddim
.
size
()
-
1
]
=
ddim
[
ddim
.
size
()
-
1
];
for
(
int
i
=
ddim
.
size
()
-
2
;
i
>=
0
;
--
i
)
{
strides
[
i
]
=
strides
[
i
+
1
]
*
ddim
[
i
];
}
return
strides
;
}
void
ConcatCompute
::
Run
()
{
auto
&
param
=
Param
<
operators
::
ConcatParam
>
();
std
::
vector
<
lite
::
Tensor
*>
inputs
=
param
.
x
;
auto
*
out
=
param
.
output
;
int
axis
=
param
.
axis
;
out
->
mutable_data
<
float
>
();
/// Sometimes direct copies will be faster, this maybe need deeply analysis.
if
(
axis
==
0
&&
inputs
.
size
()
<
10
)
{
size_t
output_offset
=
0
;
for
(
auto
*
in
:
inputs
)
{
auto
in_stride
=
stride_numel
(
in
->
dims
());
auto
out_stride
=
stride_numel
(
out
->
dims
());
void
*
dst
=
out
->
mutable_data
<
float
>
()
+
output_offset
;
const
void
*
src
=
in
->
data
<
float
>
();
#if 0
LOG(INFO) << "out_stride.size():" << out_stride.size();
LOG(INFO) << "out_stride[0]" << out_stride[0];
for (int i=0; i < out_stride.size(); ++i) {
LOG(INFO) << "out_stride[" << i << "]:" << out_stride[i];
}
LOG(INFO) << "in_stride.size():" << in_stride.size();
for (int i=0; i < in_stride.size(); ++i) {
LOG(INFO) << "in_stride[" << i << "]:" << in_stride[i];
}
#endif
// src and dst tensor should have the same dims size.
CHECK
(
in_stride
.
size
()
==
out_stride
.
size
());
std
::
memcpy
(
dst
,
src
,
sizeof
(
float
)
*
in_stride
[
0
]);
output_offset
+=
in_stride
[
0
];
}
}
else
{
std
::
vector
<
lite
::
Tensor
*>
inputs_concat
(
inputs
.
size
());
for
(
int
j
=
0
;
j
<
inputs
.
size
();
++
j
)
{
inputs_concat
[
j
]
=
inputs
[
j
];
}
lite
::
arm
::
math
::
concat_func
(
inputs_concat
,
axis
,
out
);
}
return
;
}
}
// namespace arm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
concat
,
kARM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
ConcatCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
paddle/fluid/lite/kernels/arm/concat_compute.h
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/operators/concat_op.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
arm
{
class
ConcatCompute
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ConcatParam
;
void
Run
()
override
;
virtual
~
ConcatCompute
()
=
default
;
};
}
// namespace arm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/kernels/arm/concat_compute_test.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/arm/concat_compute.h"
#include <gtest/gtest.h>
#include <limits>
#include <string>
#include <vector>
#include "paddle/fluid/lite/arm/math/funcs.h"
#include "paddle/fluid/lite/core/lite_tensor.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
arm
{
bool
infer_shape
(
const
operators
::
ConcatParam
&
param
)
{
std
::
vector
<
lite
::
DDim
>
input_dims
;
for
(
auto
p
:
param
.
x
)
{
input_dims
.
push_back
(
p
->
dims
());
}
size_t
axis
=
static_cast
<
size_t
>
(
param
.
axis
);
const
size_t
n
=
input_dims
.
size
();
CHECK_GT_OR_FALSE
(
n
,
0
);
auto
&
out_dims
=
input_dims
[
0
];
size_t
in_zero_dims_size
=
out_dims
.
size
();
for
(
size_t
i
=
1
;
i
<
n
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
in_zero_dims_size
;
j
++
)
{
if
(
j
==
axis
)
{
out_dims
[
axis
]
+=
input_dims
[
i
][
j
];
}
else
{
CHECK_EQ_OR_FALSE
(
out_dims
[
j
],
input_dims
[
i
][
j
]);
}
}
}
if
(
out_dims
[
axis
]
<
0
)
{
out_dims
[
axis
]
=
-
1
;
}
// Set output dims
param
.
output
->
Resize
(
lite
::
DDim
(
out_dims
));
return
true
;
}
void
concat_compute_ref
(
const
operators
::
ConcatParam
&
param
)
{
std
::
vector
<
lite
::
Tensor
*>
input
=
param
.
x
;
int
axis
=
param
.
axis
;
infer_shape
(
param
);
lite
::
Tensor
*
output
=
param
.
output
;
int
num
=
input
.
size
();
int
rows
=
1
;
auto
dim_0
=
input
[
0
]
->
dims
();
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
{
rows
*=
dim_0
[
i
];
}
int
out_rows
=
rows
,
out_cols
=
0
;
std
::
vector
<
int
>
input_cols
(
input
.
size
());
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
int
input_i_numel
=
input
[
i
]
->
dims
().
size
()
==
0
?
0
:
1
;
for
(
int
didx
=
0
;
didx
<
input
[
i
]
->
dims
().
size
();
++
didx
)
{
input_i_numel
*=
input
[
i
]
->
dims
()[
didx
];
}
int
t_cols
=
input_i_numel
/
rows
;
out_cols
+=
t_cols
;
input_cols
[
i
]
=
t_cols
;
}
// computation
auto
output_data
=
output
->
mutable_data
<
float
>
();
int
col_idx
=
0
;
for
(
int
j
=
0
;
j
<
num
;
++
j
)
{
int
col_len
=
input_cols
[
j
];
auto
input_data
=
input
[
j
]
->
data
<
float
>
();
for
(
int
k
=
0
;
k
<
out_rows
;
++
k
)
{
memcpy
(
output_data
+
k
*
out_cols
+
col_idx
,
input_data
+
k
*
col_len
,
sizeof
(
float
)
*
col_len
);
}
col_idx
+=
col_len
;
}
}
TEST
(
concat_arm
,
init
)
{
ConcatCompute
concat
;
ASSERT_EQ
(
concat
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
concat
.
target
(),
TARGET
(
kARM
));
}
TEST
(
concat_arm
,
compute_input_single
)
{
ConcatCompute
concat
;
operators
::
ConcatParam
param
;
LOG
(
INFO
)
<<
"test concat start"
;
lite
::
Tensor
output
;
lite
::
Tensor
output_ref
;
lite
::
Tensor
tensorA
;
DDimLite
ddimA
({
10
,
4
,
3
,
2
});
tensorA
.
Resize
(
ddimA
);
for
(
int
i
=
0
;
i
<
ddimA
.
data
()[
0
]
*
ddimA
.
data
()[
1
]
*
ddimA
.
data
()[
2
]
*
ddimA
.
data
()[
3
];
i
++
)
{
tensorA
.
mutable_data
<
float
>
()[
i
]
=
i
;
}
param
.
x
.
push_back
(
&
tensorA
);
for
(
int
cur_axis
:
{
0
,
1
})
{
param
.
output
=
&
output
;
param
.
axis
=
cur_axis
;
CHECK
(
infer_shape
(
param
));
concat
.
SetParam
(
param
);
LOG
(
INFO
)
<<
"test concat start cur_axis:"
<<
cur_axis
;
concat
.
Run
();
LOG
(
INFO
)
<<
"concat.Run end"
;
param
.
output
=
&
output_ref
;
LOG
(
INFO
)
<<
"concat_compute_ref start"
;
concat_compute_ref
(
param
);
LOG
(
INFO
)
<<
"concat_compute_ref end"
;
auto
*
output_data
=
output
.
data
<
float
>
();
auto
*
output_ref_data
=
output_ref
.
data
<
float
>
();
for
(
int
i
=
0
;
i
<
(
ddimA
.
data
()[
0
])
*
ddimA
.
data
()[
1
]
*
ddimA
.
data
()[
2
]
*
ddimA
.
data
()[
3
];
i
++
)
{
// LOG(INFO) << "output[" << i << "]:" << output_data[i] << "
// output_ref_data[" << i << "]:" << output_ref_data[i];
EXPECT_NEAR
(
output_data
[
i
],
output_ref_data
[
i
],
1e-5
);
}
}
}
TEST
(
concat_arm
,
compute_input_multi
)
{
ConcatCompute
concat
;
operators
::
ConcatParam
param
;
LOG
(
INFO
)
<<
"test concat start"
;
// init param
// x: tensorA, tensorB, tensorC, tensorD
// axis: 0
lite
::
Tensor
output
;
lite
::
Tensor
output_ref
;
lite
::
Tensor
tensorA
;
lite
::
Tensor
tensorB
;
lite
::
Tensor
tensorC
;
lite
::
Tensor
tensorD
;
DDimLite
ddimA
({
10
,
4
,
3
,
2
});
DDimLite
ddimB
({
20
,
4
,
3
,
2
});
DDimLite
ddimC
({
30
,
4
,
3
,
2
});
DDimLite
ddimD
({
40
,
4
,
3
,
2
});
tensorA
.
Resize
(
ddimA
);
tensorB
.
Resize
(
ddimB
);
tensorC
.
Resize
(
ddimC
);
tensorD
.
Resize
(
ddimD
);
for
(
int
i
=
0
;
i
<
ddimA
.
data
()[
0
]
*
ddimA
.
data
()[
1
]
*
ddimA
.
data
()[
2
]
*
ddimA
.
data
()[
3
];
i
++
)
{
tensorA
.
mutable_data
<
float
>
()[
i
]
=
i
;
}
for
(
int
i
=
0
;
i
<
ddimB
.
data
()[
0
]
*
ddimB
.
data
()[
1
]
*
ddimB
.
data
()[
2
]
*
ddimB
.
data
()[
3
];
i
++
)
{
tensorB
.
mutable_data
<
float
>
()[
i
]
=
i
+
1
;
}
for
(
int
i
=
0
;
i
<
ddimC
.
data
()[
0
]
*
ddimC
.
data
()[
1
]
*
ddimC
.
data
()[
2
]
*
ddimC
.
data
()[
3
];
i
++
)
{
tensorC
.
mutable_data
<
float
>
()[
i
]
=
i
+
2
;
}
for
(
int
i
=
0
;
i
<
ddimD
.
data
()[
0
]
*
ddimD
.
data
()[
1
]
*
ddimD
.
data
()[
2
]
*
ddimD
.
data
()[
3
];
i
++
)
{
tensorD
.
mutable_data
<
float
>
()[
i
]
=
i
+
3
;
}
param
.
x
.
push_back
(
&
tensorA
);
param
.
x
.
push_back
(
&
tensorB
);
param
.
x
.
push_back
(
&
tensorC
);
param
.
x
.
push_back
(
&
tensorD
);
for
(
int
cur_axis
:
{
0
})
{
param
.
output
=
&
output
;
param
.
axis
=
cur_axis
;
CHECK
(
infer_shape
(
param
));
concat
.
SetParam
(
param
);
LOG
(
INFO
)
<<
"test concat start cur_axis:"
<<
cur_axis
;
concat
.
Run
();
LOG
(
INFO
)
<<
"concat.Run end"
;
param
.
output
=
&
output_ref
;
LOG
(
INFO
)
<<
"concat_compute_ref start"
;
concat_compute_ref
(
param
);
LOG
(
INFO
)
<<
"concat_compute_ref end"
;
auto
*
output_data
=
output
.
data
<
float
>
();
auto
*
output_ref_data
=
output_ref
.
data
<
float
>
();
int
elem_num
=
(
ddimA
.
data
()[
0
]
+
ddimB
.
data
()[
0
]
+
ddimC
.
data
()[
0
]
+
ddimD
.
data
()[
0
])
*
ddimA
.
data
()[
1
]
*
ddimA
.
data
()[
2
]
*
ddimA
.
data
()[
3
];
for
(
int
i
=
0
;
i
<
elem_num
;
i
++
)
{
// LOG(INFO) << "output[" << i << "]:" << output_data[i] << "
// output_ref_data[" << i << "]:" << output_ref_data[i];
EXPECT_NEAR
(
output_data
[
i
],
output_ref_data
[
i
],
1e-5
);
}
}
}
TEST
(
concat
,
retrive_op
)
{
auto
concat
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
(
"concat"
);
ASSERT_FALSE
(
concat
.
empty
());
ASSERT_TRUE
(
concat
.
front
());
}
}
// namespace arm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
concat
,
kARM
,
kFloat
,
kNCHW
,
def
);
paddle/fluid/lite/kernels/arm/conv_compute.cc
浏览文件 @
e8ebb084
...
@@ -28,6 +28,8 @@ void ConvCompute::PrepareForRun() {
...
@@ -28,6 +28,8 @@ void ConvCompute::PrepareForRun() {
auto
o_dims
=
param
.
output
->
dims
();
auto
o_dims
=
param
.
output
->
dims
();
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
// TODO(xxx): make api and expose it
ctx
.
SetRunMode
(
LITE_POWER_HIGH
,
4
);
int
win
=
x_dims
[
3
];
// nchw
int
win
=
x_dims
[
3
];
// nchw
int
hin
=
x_dims
[
2
];
int
hin
=
x_dims
[
2
];
...
...
paddle/fluid/lite/kernels/arm/elementwise_
add_
compute.cc
→
paddle/fluid/lite/kernels/arm/elementwise_compute.cc
浏览文件 @
e8ebb084
...
@@ -12,7 +12,8 @@
...
@@ -12,7 +12,8 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/lite/kernels/arm/elementwise_add_compute.h"
#include "paddle/fluid/lite/kernels/arm/elementwise_compute.h"
#include <string>
#include "paddle/fluid/lite/arm/math/funcs.h"
#include "paddle/fluid/lite/arm/math/funcs.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -20,6 +21,30 @@ namespace lite {
...
@@ -20,6 +21,30 @@ namespace lite {
namespace
kernels
{
namespace
kernels
{
namespace
arm
{
namespace
arm
{
inline
bool
is_broadcast
(
const
DDim
&
x_dims
,
const
DDim
&
y_dims
,
int
axis
,
int
*
pre
,
int
*
n
,
int
*
post
)
{
if
(
axis
<
0
)
{
axis
=
x_dims
.
size
()
-
y_dims
.
size
();
}
if
(
x_dims
.
size
()
==
y_dims
.
size
())
{
return
false
;
}
*
pre
=
1
;
*
n
=
1
;
*
post
=
1
;
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
{
(
*
pre
)
*=
x_dims
[
i
];
}
for
(
int
i
=
0
;
i
<
y_dims
.
size
();
++
i
)
{
CHECK_EQ
(
x_dims
[
i
+
axis
],
y_dims
[
i
])
<<
"Broadcast dimension mismatch."
;
(
*
n
)
*=
y_dims
[
i
];
}
for
(
int
i
=
axis
+
y_dims
.
size
();
i
<
x_dims
.
size
();
++
i
)
{
(
*
post
)
*=
x_dims
[
i
];
}
return
true
;
}
void
ElementwiseAddCompute
::
Run
()
{
void
ElementwiseAddCompute
::
Run
()
{
auto
&
param
=
Param
<
operators
::
ElementwiseParam
>
();
auto
&
param
=
Param
<
operators
::
ElementwiseParam
>
();
const
float
*
x_data
=
param
.
X
->
data
<
float
>
();
const
float
*
x_data
=
param
.
X
->
data
<
float
>
();
...
@@ -28,27 +53,40 @@ void ElementwiseAddCompute::Run() {
...
@@ -28,27 +53,40 @@ void ElementwiseAddCompute::Run() {
int
axis
=
param
.
axis
;
int
axis
=
param
.
axis
;
auto
x_dims
=
param
.
X
->
dims
();
auto
x_dims
=
param
.
X
->
dims
();
auto
y_dims
=
param
.
Y
->
dims
();
auto
y_dims
=
param
.
Y
->
dims
();
if
(
axis
<
0
)
{
int
pre
,
n
,
post
;
axis
=
x_dims
.
size
()
-
y_dims
.
size
();
if
(
is_broadcast
(
x_dims
,
y_dims
,
axis
,
&
pre
,
&
n
,
&
post
))
{
}
lite
::
arm
::
math
::
elementwise_add_broadcast
(
x_data
,
y_data
,
out_data
,
pre
,
n
,
if
(
x_dims
.
size
()
==
y_dims
.
size
())
{
post
);
}
else
{
lite
::
arm
::
math
::
elementwise_add
(
x_data
,
y_data
,
out_data
,
lite
::
arm
::
math
::
elementwise_add
(
x_data
,
y_data
,
out_data
,
x_dims
.
production
());
x_dims
.
production
());
}
else
{
}
int
batch
=
1
;
}
int
channels
=
1
;
int
num
=
1
;
void
ElementwiseAddActivationCompute
::
Run
()
{
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
{
auto
&
param
=
Param
<
operators
::
FusionElementwiseActivationParam
>
();
batch
*=
x_dims
[
i
];
const
float
*
x_data
=
param
.
X
->
data
<
float
>
();
}
const
float
*
y_data
=
param
.
Y
->
data
<
float
>
();
for
(
int
i
=
0
;
i
<
y_dims
.
size
();
++
i
)
{
float
*
out_data
=
param
.
Out
->
mutable_data
<
float
>
();
channels
*=
y_dims
[
i
];
int
axis
=
param
.
axis
;
std
::
string
act_type
=
param
.
act_type
;
auto
x_dims
=
param
.
X
->
dims
();
auto
y_dims
=
param
.
Y
->
dims
();
int
pre
,
n
,
post
;
if
(
is_broadcast
(
x_dims
,
y_dims
,
axis
,
&
pre
,
&
n
,
&
post
))
{
if
(
act_type
==
"relu"
)
{
lite
::
arm
::
math
::
elementwise_add_relu_broadcast
(
x_data
,
y_data
,
out_data
,
pre
,
n
,
post
);
}
else
{
LOG
(
FATAL
)
<<
"unsupported Activation type: "
<<
act_type
;
}
}
for
(
int
i
=
y_dims
.
size
()
+
axis
;
i
<
x_dims
.
size
();
++
i
)
{
}
else
{
num
*=
x_dims
[
i
];
if
(
act_type
==
"relu"
)
{
lite
::
arm
::
math
::
elementwise_add_relu
(
x_data
,
y_data
,
out_data
,
x_dims
.
production
());
}
else
{
LOG
(
FATAL
)
<<
"unsupported Activation type: "
<<
act_type
;
}
}
lite
::
arm
::
math
::
elementwise_add_axis
(
x_data
,
y_data
,
out_data
,
batch
,
channels
,
num
);
}
}
}
}
...
@@ -63,3 +101,11 @@ REGISTER_LITE_KERNEL(elementwise_add, kARM, kFloat, kNCHW,
...
@@ -63,3 +101,11 @@ REGISTER_LITE_KERNEL(elementwise_add, kARM, kFloat, kNCHW,
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
.
Finalize
();
REGISTER_LITE_KERNEL
(
fusion_elementwise_add_activation
,
kARM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
ElementwiseAddActivationCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
paddle/fluid/lite/kernels/arm/elementwise_
add_
compute.h
→
paddle/fluid/lite/kernels/arm/elementwise_compute.h
浏览文件 @
e8ebb084
...
@@ -30,6 +30,14 @@ class ElementwiseAddCompute
...
@@ -30,6 +30,14 @@ class ElementwiseAddCompute
virtual
~
ElementwiseAddCompute
()
=
default
;
virtual
~
ElementwiseAddCompute
()
=
default
;
};
};
class
ElementwiseAddActivationCompute
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
{
public:
void
Run
()
override
;
virtual
~
ElementwiseAddActivationCompute
()
=
default
;
};
}
// namespace arm
}
// namespace arm
}
// namespace kernels
}
// namespace kernels
}
// namespace lite
}
// namespace lite
...
...
paddle/fluid/lite/kernels/arm/elementwise_
add_
compute_test.cc
→
paddle/fluid/lite/kernels/arm/elementwise_compute_test.cc
浏览文件 @
e8ebb084
...
@@ -12,8 +12,9 @@
...
@@ -12,8 +12,9 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/lite/kernels/arm/elementwise_
add_
compute.h"
#include "paddle/fluid/lite/kernels/arm/elementwise_compute.h"
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/op_registry.h"
...
@@ -37,7 +38,9 @@ TEST(elementwise_add_arm, init) {
...
@@ -37,7 +38,9 @@ TEST(elementwise_add_arm, init) {
}
}
template
<
typename
dtype
>
template
<
typename
dtype
>
void
elementwise_add_compute_ref
(
const
operators
::
ElementwiseParam
&
param
)
{
void
elementwise_compute_ref
(
const
operators
::
ElementwiseParam
&
param
,
const
std
::
string
elt_type
,
const
std
::
string
act_type
)
{
const
dtype
*
x_data
=
param
.
X
->
data
<
const
dtype
>
();
const
dtype
*
x_data
=
param
.
X
->
data
<
const
dtype
>
();
const
dtype
*
y_data
=
param
.
Y
->
data
<
const
dtype
>
();
const
dtype
*
y_data
=
param
.
Y
->
data
<
const
dtype
>
();
dtype
*
out_data
=
param
.
Out
->
mutable_data
<
dtype
>
();
dtype
*
out_data
=
param
.
Out
->
mutable_data
<
dtype
>
();
...
@@ -59,17 +62,52 @@ void elementwise_add_compute_ref(const operators::ElementwiseParam& param) {
...
@@ -59,17 +62,52 @@ void elementwise_add_compute_ref(const operators::ElementwiseParam& param) {
for
(
int
i
=
y_dims
.
size
()
+
axis
;
i
<
x_dims
.
size
();
++
i
)
{
for
(
int
i
=
y_dims
.
size
()
+
axis
;
i
<
x_dims
.
size
();
++
i
)
{
num
*=
x_dims
[
i
];
num
*=
x_dims
[
i
];
}
}
for
(
int
i
=
0
;
i
<
batch
;
++
i
)
{
// do elementwise add/sub/max...
for
(
int
j
=
0
;
j
<
channels
;
++
j
)
{
if
(
elt_type
==
"add"
)
{
int
offset
=
(
i
*
channels
+
j
)
*
num
;
for
(
int
i
=
0
;
i
<
batch
;
++
i
)
{
const
dtype
*
din_ptr
=
x_data
+
offset
;
for
(
int
j
=
0
;
j
<
channels
;
++
j
)
{
const
dtype
diny_data
=
y_data
[
j
];
int
offset
=
(
i
*
channels
+
j
)
*
num
;
dtype
*
dout_ptr
=
out_data
+
offset
;
const
dtype
*
din_ptr
=
x_data
+
offset
;
for
(
int
k
=
0
;
k
<
num
;
++
k
)
{
const
dtype
diny_data
=
y_data
[
j
];
*
dout_ptr
=
*
din_ptr
+
diny_data
;
dtype
*
dout_ptr
=
out_data
+
offset
;
dout_ptr
++
;
for
(
int
k
=
0
;
k
<
num
;
++
k
)
{
din_ptr
++
;
*
dout_ptr
=
*
din_ptr
+
diny_data
;
dout_ptr
++
;
din_ptr
++
;
}
}
}
}
else
if
(
elt_type
==
"sub"
)
{
for
(
int
i
=
0
;
i
<
batch
;
++
i
)
{
for
(
int
j
=
0
;
j
<
channels
;
++
j
)
{
int
offset
=
(
i
*
channels
+
j
)
*
num
;
const
dtype
*
din_ptr
=
x_data
+
offset
;
const
dtype
diny_data
=
y_data
[
j
];
dtype
*
dout_ptr
=
out_data
+
offset
;
for
(
int
k
=
0
;
k
<
num
;
++
k
)
{
*
dout_ptr
=
*
din_ptr
-
diny_data
;
dout_ptr
++
;
din_ptr
++
;
}
}
}
}
else
{
LOG
(
FATAL
)
<<
"unsupported Elementwise type: "
<<
elt_type
;
}
// do activation relu/sigmod...
if
(
act_type
.
size
()
>
0
)
{
if
(
act_type
==
"relu"
)
{
for
(
int
i
=
0
;
i
<
batch
;
++
i
)
{
for
(
int
j
=
0
;
j
<
channels
;
++
j
)
{
dtype
*
dout_ptr
=
out_data
+
(
i
*
channels
+
j
)
*
num
;
for
(
int
k
=
0
;
k
<
num
;
++
k
)
{
*
dout_ptr
=
*
dout_ptr
>
0.0
f
?
*
dout_ptr
:
0.0
f
;
dout_ptr
++
;
}
}
}
}
}
else
{
LOG
(
FATAL
)
<<
"unsupported Activation type: "
<<
elt_type
;
}
}
}
}
}
}
...
@@ -123,7 +161,7 @@ TEST(elementwise_add, compute) {
...
@@ -123,7 +161,7 @@ TEST(elementwise_add, compute) {
elementwise_add
.
SetParam
(
param
);
elementwise_add
.
SetParam
(
param
);
elementwise_add
.
Run
();
elementwise_add
.
Run
();
param
.
Out
=
&
output_ref
;
param
.
Out
=
&
output_ref
;
elementwise_
add_compute_ref
<
float
>
(
param
);
elementwise_
compute_ref
<
float
>
(
param
,
"add"
,
""
);
for
(
int
i
=
0
;
i
<
output
.
dims
().
production
();
i
++
)
{
for
(
int
i
=
0
;
i
<
output
.
dims
().
production
();
i
++
)
{
EXPECT_NEAR
(
output_data
[
i
],
output_ref_data
[
i
],
1e-5
);
EXPECT_NEAR
(
output_data
[
i
],
output_ref_data
[
i
],
1e-5
);
}
}
...
@@ -135,9 +173,91 @@ TEST(elementwise_add, compute) {
...
@@ -135,9 +173,91 @@ TEST(elementwise_add, compute) {
}
}
}
}
TEST
(
fusion_elementwise_add_activation_arm
,
retrive_op
)
{
auto
fusion_elementwise_add_activation
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
(
"fusion_elementwise_add_activation"
);
ASSERT_FALSE
(
fusion_elementwise_add_activation
.
empty
());
ASSERT_TRUE
(
fusion_elementwise_add_activation
.
front
());
}
TEST
(
fusion_elementwise_add_activation_arm
,
init
)
{
ElementwiseAddActivationCompute
fusion_elementwise_add_activation
;
ASSERT_EQ
(
fusion_elementwise_add_activation
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
fusion_elementwise_add_activation
.
target
(),
TARGET
(
kARM
));
}
TEST
(
fusion_elementwise_add_activation_arm
,
compute
)
{
ElementwiseAddActivationCompute
fusion_elementwise_add_activation
;
operators
::
FusionElementwiseActivationParam
param
;
lite
::
Tensor
x
,
y
,
output
,
output_ref
;
for
(
auto
act_type
:
{
"relu"
})
{
for
(
auto
n
:
{
1
,
3
,
4
,
11
})
{
for
(
auto
c
:
{
1
,
3
,
4
,
11
})
{
for
(
auto
h
:
{
1
,
3
,
4
,
11
})
{
for
(
auto
w
:
{
1
,
3
,
4
,
11
})
{
for
(
auto
axis
:
{
-
1
,
0
,
1
,
2
,
3
})
{
for
(
auto
yd
:
{
std
::
vector
<
int64_t
>
({
n
}),
std
::
vector
<
int64_t
>
({
c
}),
std
::
vector
<
int64_t
>
({
h
}),
std
::
vector
<
int64_t
>
({
w
}),
std
::
vector
<
int64_t
>
({
n
,
c
}),
std
::
vector
<
int64_t
>
({
c
,
h
}),
std
::
vector
<
int64_t
>
({
h
,
w
}),
std
::
vector
<
int64_t
>
({
n
,
c
,
h
}),
std
::
vector
<
int64_t
>
({
c
,
h
,
w
}),
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})})
{
auto
x_dim
=
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
}));
auto
y_dim
=
DDim
(
yd
);
int
axis_t
=
axis
<
0
?
x_dim
.
size
()
-
y_dim
.
size
()
:
axis
;
if
(
axis_t
+
y_dim
.
size
()
>
4
)
continue
;
bool
flag
=
false
;
for
(
int
i
=
0
;
i
<
y_dim
.
size
();
i
++
)
{
if
(
x_dim
[
i
+
axis_t
]
!=
y_dim
[
i
])
flag
=
true
;
}
if
(
flag
)
continue
;
x
.
Resize
(
x_dim
);
y
.
Resize
(
y_dim
);
output
.
Resize
(
x_dim
);
output_ref
.
Resize
(
x_dim
);
auto
*
x_data
=
x
.
mutable_data
<
float
>
();
auto
*
y_data
=
y
.
mutable_data
<
float
>
();
auto
*
output_data
=
output
.
mutable_data
<
float
>
();
auto
*
output_ref_data
=
output_ref
.
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
x_dim
.
production
();
i
++
)
{
float
sign
=
i
%
3
==
0
?
-
1.0
f
:
1.0
f
;
x_data
[
i
]
=
i
*
sign
;
}
for
(
int
i
=
0
;
i
<
y_dim
.
production
();
i
++
)
{
float
sign
=
i
%
2
==
0
?
0.5
f
:
-
0.5
f
;
y_data
[
i
]
=
i
*
sign
;
}
param
.
X
=
&
x
;
param
.
Y
=
&
y
;
param
.
axis
=
axis
;
param
.
Out
=
&
output
;
param
.
act_type
=
act_type
;
fusion_elementwise_add_activation
.
SetParam
(
param
);
fusion_elementwise_add_activation
.
Run
();
param
.
Out
=
&
output_ref
;
elementwise_compute_ref
<
float
>
(
param
,
"add"
,
act_type
);
for
(
int
i
=
0
;
i
<
output
.
dims
().
production
();
i
++
)
{
EXPECT_NEAR
(
output_data
[
i
],
output_ref_data
[
i
],
1e-5
);
}
}
}
}
}
}
}
}
}
}
// namespace arm
}
// namespace arm
}
// namespace kernels
}
// namespace kernels
}
// namespace lite
}
// namespace lite
}
// namespace paddle
}
// namespace paddle
USE_LITE_KERNEL
(
elementwise_add
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_add
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
fusion_elementwise_add_activation
,
kARM
,
kFloat
,
kNCHW
,
def
);
paddle/fluid/lite/kernels/arm/fc_compute.cc
浏览文件 @
e8ebb084
...
@@ -27,6 +27,9 @@ void FcCompute::PrepareForRun() {
...
@@ -27,6 +27,9 @@ void FcCompute::PrepareForRun() {
auto
x_dims
=
param
.
input
->
dims
();
auto
x_dims
=
param
.
input
->
dims
();
auto
w_dims
=
param
.
w
->
dims
();
auto
w_dims
=
param
.
w
->
dims
();
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
ctx
.
SetRunMode
(
LITE_POWER_HIGH
,
4
);
CHECK_GE
(
x_dims
.
size
(),
2UL
);
CHECK_GE
(
x_dims
.
size
(),
2UL
);
CHECK_EQ
(
w_dims
.
size
(),
2UL
);
CHECK_EQ
(
w_dims
.
size
(),
2UL
);
CHECK_EQ
(
param
.
output
->
dims
().
size
(),
2UL
);
CHECK_EQ
(
param
.
output
->
dims
().
size
(),
2UL
);
...
...
paddle/fluid/lite/kernels/arm/mul_compute.cc
浏览文件 @
e8ebb084
...
@@ -23,7 +23,8 @@ namespace kernels {
...
@@ -23,7 +23,8 @@ namespace kernels {
namespace
arm
{
namespace
arm
{
void
MulCompute
::
PrepareForRun
()
{
void
MulCompute
::
PrepareForRun
()
{
// TODO(TJ): transpose x or y if necessary
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
ctx
.
SetRunMode
(
LITE_POWER_HIGH
,
4
);
}
}
void
MulCompute
::
Run
()
{
void
MulCompute
::
Run
()
{
...
...
paddle/fluid/lite/kernels/arm/pool_compute.cc
浏览文件 @
e8ebb084
...
@@ -24,6 +24,11 @@ namespace lite {
...
@@ -24,6 +24,11 @@ namespace lite {
namespace
kernels
{
namespace
kernels
{
namespace
arm
{
namespace
arm
{
void
PoolCompute
::
PrepareForRun
()
{
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
ctx
.
SetRunMode
(
LITE_POWER_HIGH
,
4
);
}
void
PoolCompute
::
Run
()
{
void
PoolCompute
::
Run
()
{
auto
&
param
=
Param
<
operators
::
PoolParam
>
();
auto
&
param
=
Param
<
operators
::
PoolParam
>
();
auto
&
in_dims
=
param
.
x
->
dims
();
auto
&
in_dims
=
param
.
x
->
dims
();
...
...
paddle/fluid/lite/kernels/arm/pool_compute.h
浏览文件 @
e8ebb084
...
@@ -26,6 +26,7 @@ class PoolCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
...
@@ -26,6 +26,7 @@ class PoolCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
public:
public:
using
param_t
=
operators
::
PoolParam
;
using
param_t
=
operators
::
PoolParam
;
void
PrepareForRun
()
override
;
void
Run
()
override
;
void
Run
()
override
;
TargetType
target
()
const
override
;
TargetType
target
()
const
override
;
...
...
paddle/fluid/lite/kernels/arm/transpose_compute.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/arm/transpose_compute.h"
#include <string>
#include <vector>
#include "paddle/fluid/lite/arm/math/funcs.h"
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
arm
{
bool
IsShuffleChannel
(
const
std
::
vector
<
int
>
&
axis
)
{
bool
is_shuffle_channel
=
true
;
if
(
axis
.
size
()
>
2
&&
axis
[
0
]
==
0
&&
axis
[
1
]
==
2
&&
axis
[
2
]
==
1
)
{
for
(
int
i
=
3
;
i
<
axis
.
size
();
++
i
)
{
if
(
axis
[
i
]
!=
i
)
{
is_shuffle_channel
=
false
;
break
;
}
}
}
else
{
return
false
;
}
return
is_shuffle_channel
;
}
template
<
typename
Dtype
>
void
ShuffleChannelCompute
(
const
std
::
vector
<
int
>
&
axis
,
const
lite
::
Tensor
*
input
,
lite
::
Tensor
*
output
)
{
const
Dtype
*
input_ptr
=
input
->
data
<
Dtype
>
();
Dtype
*
output_ptr
=
output
->
mutable_data
<
Dtype
>
();
// input and output's shape dimension must >= 2 && <= 6.
const
DDim
&
in_dim
=
input
->
dims
();
const
DDim
&
out_dim
=
output
->
dims
();
size_t
offset
=
1
;
for
(
int
i
=
3
;
i
<
axis
.
size
();
++
i
)
{
offset
*=
in_dim
[
i
];
}
#pragma omp parallel for collapse(3)
for
(
int
batch
=
0
;
batch
<
out_dim
[
0
];
++
batch
)
{
for
(
int
c1
=
0
;
c1
<
out_dim
[
1
];
++
c1
)
{
for
(
int
c2
=
0
;
c2
<
out_dim
[
2
];
++
c2
)
{
size_t
out_offset
=
((
batch
*
out_dim
[
1
]
+
c1
)
*
out_dim
[
2
]
+
c2
)
*
offset
;
size_t
in_offset
=
((
batch
*
in_dim
[
1
]
+
c2
)
*
in_dim
[
2
]
+
c1
)
*
offset
;
memcpy
(
output_ptr
+
out_offset
,
input_ptr
+
in_offset
,
offset
*
sizeof
(
Dtype
));
}
}
}
}
template
<
typename
Dtype
>
void
TransposeCompute_
(
const
std
::
vector
<
int
>
&
axis
,
const
lite
::
Tensor
*
input
,
lite
::
Tensor
*
output
)
{
// const Dtype *input_ptr = input->data<Dtype>();
const
Dtype
*
input_ptr
=
input
->
data
<
float
>
();
Dtype
*
output_ptr
=
output
->
mutable_data
<
Dtype
>
();
// input and output's shape dimension must >= 2 && <= 6.
const
DDim
&
in_dim
=
input
->
dims
();
const
DDim
&
out_dim
=
output
->
dims
();
// precompute inverted output dim and strides
size_t
rout_dim
[
6
],
strides
[
6
];
int
permute
=
axis
.
size
();
// permute must >=2 && <= 6.
for
(
int
i
=
0
;
i
<
permute
;
++
i
)
{
int
k
=
permute
-
1
-
i
;
strides
[
k
]
=
1
;
for
(
int
j
=
axis
[
i
]
+
1
;
j
<
permute
;
++
j
)
{
strides
[
k
]
*=
in_dim
[
j
];
}
rout_dim
[
k
]
=
out_dim
[
i
];
}
// unroll the first 2 dimensions
int
reamin_dim
=
1
;
for
(
int
i
=
2
;
i
<
out_dim
.
size
();
++
i
)
{
reamin_dim
*=
out_dim
[
i
];
}
#pragma omp parallel for collapse(2)
for
(
int
batch
=
0
;
batch
<
out_dim
[
0
];
++
batch
)
{
for
(
int
j
=
0
;
j
<
out_dim
[
1
];
++
j
)
{
size_t
offset
=
batch
*
strides
[
permute
-
1
]
+
j
*
strides
[
permute
-
2
];
Dtype
*
out_ptr
=
output_ptr
+
(
batch
*
out_dim
[
1
]
+
j
)
*
reamin_dim
;
int
indics
[
4
]
=
{
0
,
0
,
0
,
0
};
for
(
int
k
=
0
;
k
<
reamin_dim
;
++
k
)
{
out_ptr
[
k
]
=
input_ptr
[
offset
];
indics
[
0
]
+=
1
;
offset
+=
strides
[
0
];
for
(
int
p
=
0
;
p
<
permute
-
3
;
++
p
)
{
if
(
indics
[
p
]
==
rout_dim
[
p
])
{
indics
[
p
+
1
]
+=
1
;
indics
[
p
]
=
0
;
offset
+=
strides
[
p
+
1
];
offset
-=
rout_dim
[
p
]
*
strides
[
p
];
}
else
{
break
;
}
}
}
}
}
}
// Transpose
void
TransposeCompute
::
Run
()
{
auto
&
param
=
Param
<
operators
::
TransposeParam
>
();
auto
*
input
=
param
.
x
;
auto
*
output
=
param
.
output
;
const
std
::
vector
<
int
>
axis
=
param
.
axis
;
bool
shuffle_channel
=
IsShuffleChannel
(
axis
);
if
(
shuffle_channel
)
{
ShuffleChannelCompute
<
float
>
(
axis
,
input
,
output
);
}
else
{
TransposeCompute_
<
float
>
(
axis
,
input
,
output
);
}
return
;
}
// Transpose2
void
Transpose2Compute
::
Run
()
{
auto
&
param
=
Param
<
operators
::
TransposeParam
>
();
auto
*
input
=
param
.
x
;
auto
*
output
=
param
.
output
;
const
std
::
vector
<
int
>
axis
=
param
.
axis
;
bool
shuffle_channel
=
IsShuffleChannel
(
axis
);
if
(
shuffle_channel
)
{
ShuffleChannelCompute
<
float
>
(
axis
,
input
,
output
);
}
else
{
TransposeCompute_
<
float
>
(
axis
,
input
,
output
);
}
return
;
}
}
// namespace arm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
// Transpose
REGISTER_LITE_KERNEL
(
transpose
,
kARM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
TransposeCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
// Transpose2
REGISTER_LITE_KERNEL
(
transpose2
,
kARM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
Transpose2Compute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
paddle/fluid/lite/kernels/arm/transpose_compute.h
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/operators/transpose_op.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
arm
{
// Transpose
class
TransposeCompute
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
TransposeParam
;
void
Run
()
override
;
virtual
~
TransposeCompute
()
=
default
;
};
// Transpose2
class
Transpose2Compute
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
TransposeParam
;
void
Run
()
override
;
virtual
~
Transpose2Compute
()
=
default
;
};
}
// namespace arm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/kernels/arm/transpose_compute_test.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/arm/transpose_compute.h"
#include <gtest/gtest.h>
#include <limits>
#include <string>
#include <vector>
#include "paddle/fluid/lite/arm/math/funcs.h"
#include "paddle/fluid/lite/core/lite_tensor.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
arm
{
#define IN(n, c, h, w) \
input_data[w + h * input_w + c * input_h * input_w + \
n * input_c * input_h * input_w]
#define OUT(n, c, h, w) \
output_data[w + h * output_w + c * output_h * output_w + \
n * output_c * output_h * output_w]
void
transpose_compute_ref
(
const
operators
::
TransposeParam
&
param
)
{
const
lite
::
Tensor
*
input
=
param
.
x
;
lite
::
Tensor
*
output
=
param
.
output
;
std
::
vector
<
int
>
axis
=
param
.
axis
;
auto
*
input_data
=
input
->
data
<
float
>
();
auto
*
output_data
=
output
->
mutable_data
<
float
>
();
int
input_n
=
input
->
dims
()[
0
];
int
input_c
=
input
->
dims
()[
1
];
int
input_h
=
input
->
dims
()[
2
];
int
input_w
=
input
->
dims
()[
3
];
int
output_n
=
output
->
dims
()[
0
];
int
output_c
=
output
->
dims
()[
1
];
int
output_h
=
output
->
dims
()[
2
];
int
output_w
=
output
->
dims
()[
3
];
for
(
int
n
=
0
;
n
<
input_n
;
++
n
)
{
for
(
int
c
=
0
;
c
<
input_c
;
++
c
)
{
for
(
int
h
=
0
;
h
<
input_h
;
++
h
)
{
for
(
int
w
=
0
;
w
<
input_w
;
++
w
)
{
OUT
(
n
,
h
,
w
,
c
)
=
IN
(
n
,
c
,
h
,
w
);
}
}
}
}
}
// Transpose
TEST
(
transpose_arm
,
init
)
{
TransposeCompute
transpose
;
ASSERT_EQ
(
transpose
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
transpose
.
target
(),
TARGET
(
kARM
));
}
TEST
(
transpose_arm
,
compute_shape_nchw
)
{
TransposeCompute
transpose
;
operators
::
TransposeParam
param
;
std
::
vector
<
int
>
axis
{
0
,
2
,
3
,
1
};
param
.
axis
=
axis
;
lite
::
Tensor
input
;
lite
::
Tensor
output
;
lite
::
Tensor
output_ref
;
const
std
::
vector
<
int64_t
>
input_shape
{
1
,
24
,
2
,
2
};
const
std
::
vector
<
int64_t
>
output_shape
{
1
,
2
,
2
,
24
};
DDimLite
ddimInput
(
input_shape
);
DDimLite
ddimOutput
(
output_shape
);
input
.
Resize
(
ddimInput
);
output
.
Resize
(
ddimOutput
);
output_ref
.
Resize
(
ddimOutput
);
for
(
int
i
=
0
;
i
<
input_shape
[
0
]
*
input_shape
[
1
]
*
input_shape
[
2
]
*
input_shape
[
3
];
i
+=
4
)
{
input
.
mutable_data
<
float
>
()[
i
]
=
i
;
input
.
mutable_data
<
float
>
()[
i
+
1
]
=
i
+
1
;
input
.
mutable_data
<
float
>
()[
i
+
2
]
=
i
+
2
;
input
.
mutable_data
<
float
>
()[
i
+
3
]
=
i
+
3
;
}
for
(
int
i
=
0
;
i
<
input_shape
[
0
]
*
input_shape
[
1
]
*
input_shape
[
2
]
*
input_shape
[
3
];
i
+=
4
)
{
}
param
.
x
=
&
input
;
param
.
output
=
&
output
;
// run transpose_compute
transpose
.
SetParam
(
param
);
transpose
.
Run
();
// run transpose_compute_ref
param
.
output
=
&
output_ref
;
transpose_compute_ref
(
param
);
auto
*
output_data
=
output
.
data
<
float
>
();
auto
*
output_ref_data
=
output_ref
.
data
<
float
>
();
for
(
int
i
=
0
;
i
<
input_shape
[
0
]
*
input_shape
[
1
]
*
input_shape
[
2
]
*
input_shape
[
3
];
i
+=
4
)
{
EXPECT_NEAR
(
output_data
[
i
],
output_ref_data
[
i
],
1e-5
);
}
}
TEST
(
transpose
,
retrive_op
)
{
auto
transpose
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
(
"transpose"
);
ASSERT_FALSE
(
transpose
.
empty
());
ASSERT_TRUE
(
transpose
.
front
());
}
// Transpose2
TEST
(
transpose2_arm
,
init
)
{
Transpose2Compute
transpose2
;
ASSERT_EQ
(
transpose2
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
transpose2
.
target
(),
TARGET
(
kARM
));
}
TEST
(
transpose2_arm
,
compute_shape_nchw
)
{
Transpose2Compute
transpose2
;
operators
::
TransposeParam
param
;
std
::
vector
<
int
>
axis
{
0
,
2
,
3
,
1
};
param
.
axis
=
axis
;
lite
::
Tensor
input
;
lite
::
Tensor
output
;
lite
::
Tensor
output_ref
;
const
std
::
vector
<
int64_t
>
input_shape
{
1
,
24
,
2
,
2
};
const
std
::
vector
<
int64_t
>
output_shape
{
1
,
2
,
2
,
24
};
DDimLite
ddimInput
(
input_shape
);
DDimLite
ddimOutput
(
output_shape
);
input
.
Resize
(
ddimInput
);
output
.
Resize
(
ddimOutput
);
output_ref
.
Resize
(
ddimOutput
);
for
(
int
i
=
0
;
i
<
input_shape
[
0
]
*
input_shape
[
1
]
*
input_shape
[
2
]
*
input_shape
[
3
];
i
+=
4
)
{
input
.
mutable_data
<
float
>
()[
i
]
=
i
;
input
.
mutable_data
<
float
>
()[
i
+
1
]
=
i
+
1
;
input
.
mutable_data
<
float
>
()[
i
+
2
]
=
i
+
2
;
input
.
mutable_data
<
float
>
()[
i
+
3
]
=
i
+
3
;
}
for
(
int
i
=
0
;
i
<
input_shape
[
0
]
*
input_shape
[
1
]
*
input_shape
[
2
]
*
input_shape
[
3
];
i
+=
4
)
{
}
param
.
x
=
&
input
;
param
.
output
=
&
output
;
// run transpose_compute
transpose2
.
SetParam
(
param
);
transpose2
.
Run
();
// run transpose_compute_ref
param
.
output
=
&
output_ref
;
transpose_compute_ref
(
param
);
auto
*
output_data
=
output
.
data
<
float
>
();
auto
*
output_ref_data
=
output_ref
.
data
<
float
>
();
for
(
int
i
=
0
;
i
<
input_shape
[
0
]
*
input_shape
[
1
]
*
input_shape
[
2
]
*
input_shape
[
3
];
i
+=
4
)
{
EXPECT_NEAR
(
output_data
[
i
],
output_ref_data
[
i
],
1e-5
);
}
}
TEST
(
transpose2
,
retrive_op
)
{
auto
transpose2
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
(
"transpose2"
);
ASSERT_FALSE
(
transpose2
.
empty
());
ASSERT_TRUE
(
transpose2
.
front
());
}
}
// namespace arm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
transpose
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
transpose2
,
kARM
,
kFloat
,
kNCHW
,
def
);
paddle/fluid/lite/kernels/arm/use_kernels.h
浏览文件 @
e8ebb084
...
@@ -19,6 +19,7 @@ USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def);
...
@@ -19,6 +19,7 @@ USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL
(
mul
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
mul
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
scale
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
scale
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
softmax
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
softmax
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
concat
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
pool
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
pool
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
feed
,
kARM
,
kAny
,
kAny
,
def
);
USE_LITE_KERNEL
(
feed
,
kARM
,
kAny
,
kAny
,
def
);
USE_LITE_KERNEL
(
fetch
,
kARM
,
kAny
,
kAny
,
def
);
USE_LITE_KERNEL
(
fetch
,
kARM
,
kAny
,
kAny
,
def
);
paddle/fluid/lite/kernels/use_kernels.h
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
/*
* ATTENTION this header file can only include in .cc file.
*/
USE_LITE_KERNEL
(
feed
,
kHost
,
kAny
,
kAny
,
def
);
USE_LITE_KERNEL
(
fetch
,
kHost
,
kAny
,
kAny
,
def
);
#ifdef LITE_WITH_X86
USE_LITE_KERNEL
(
relu
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
mul
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
fc
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
scale
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
square
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_sub
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_add
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
softmax
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
dropout
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
concat
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
conv2d
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
depthwise_conv2d
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
pool2d
,
kX86
,
kFloat
,
kNCHW
,
def
);
#endif
#ifdef LITE_WITH_ARM
USE_LITE_KERNEL
(
fc
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
mul
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
scale
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
conv2d
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
batch_norm
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
relu
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
depthwise_conv2d
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
pool2d
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_add
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
softmax
,
kARM
,
kFloat
,
kNCHW
,
def
);
#endif
#ifdef LITE_WITH_CUDA
USE_LITE_KERNEL
(
mul
,
kCUDA
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
io_copy
,
kCUDA
,
kAny
,
kAny
,
host_to_device
);
USE_LITE_KERNEL
(
io_copy
,
kCUDA
,
kAny
,
kAny
,
device_to_host
);
#endif
paddle/fluid/lite/kernels/x86/CMakeLists.txt
浏览文件 @
e8ebb084
...
@@ -18,6 +18,18 @@ cc_library(concat_compute_x86 SRCS concat_compute.cc DEPS ${lite_kernel_deps} )
...
@@ -18,6 +18,18 @@ cc_library(concat_compute_x86 SRCS concat_compute.cc DEPS ${lite_kernel_deps} )
cc_library
(
conv_compute_x86 SRCS conv_compute.cc DEPS
${
lite_kernel_deps
}
blas im2col vol2col
)
cc_library
(
conv_compute_x86 SRCS conv_compute.cc DEPS
${
lite_kernel_deps
}
blas im2col vol2col
)
cc_library
(
pool_compute_x86 SRCS pool_compute.cc DEPS
${
lite_kernel_deps
}
pooling
)
cc_library
(
pool_compute_x86 SRCS pool_compute.cc DEPS
${
lite_kernel_deps
}
pooling
)
lite_cc_test
(
test_fc_compute_x86 SRCS fc_compute_test.cc DEPS fc_compute_x86
)
lite_cc_test
(
test_conv2d_compute_x86 SRCS conv_compute_test.cc DEPS conv_compute_x86
)
lite_cc_test
(
test_pool2d_compute_x86 SRCS pool_compute_test.cc DEPS pool_compute_x86
)
lite_cc_test
(
test_concat_compute_x86 SRCS concat_compute_test.cc DEPS concat_compute_x86
)
lite_cc_test
(
test_softmax_compute_x86 SRCS softmax_compute_test.cc DEPS softmax_compute_x86
)
lite_cc_test
(
test_elementwise_compute_x86 SRCS elementwise_compute_test.cc DEPS elementwise_compute_x86
)
lite_cc_test
(
test_relu_compute_x86 SRCS relu_compute_test.cc DEPS relu_compute_x86
)
lite_cc_test
(
test_mul_compute_x86 SRCS mul_compute_test.cc DEPS mul_compute_x86 operator
)
lite_cc_test
(
test_scale_compute_x86 SRCS scale_compute_test.cc DEPS scale_compute_x86
)
lite_cc_test
(
test_dropout_compute_x86 SRCS dropout_compute_test.cc DEPS dropout_compute_x86
)
set
(
x86_kernels
set
(
x86_kernels
activation_compute_x86
activation_compute_x86
elementwise_compute_x86
elementwise_compute_x86
...
...
paddle/fluid/lite/kernels/x86/concat_compute.cc
浏览文件 @
e8ebb084
...
@@ -12,88 +12,7 @@
...
@@ -12,88 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include <Eigen/Core>
#include "paddle/fluid/lite/kernels/x86/concat_compute.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/types.h"
#include "paddle/fluid/operators/strided_memcpy.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
>
class
ConcatCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ConcatParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
int64_t
axis
=
static_cast
<
int64_t
>
(
param
.
axis
);
auto
out
=
param
.
output
;
if
(
axis
==
0
&&
param
.
x
.
size
()
<
10
)
{
size_t
output_offset
=
0
;
for
(
auto
*
in
:
param
.
x
)
{
if
(
!
in
||
in
->
dims
().
production
()
==
0UL
)
{
continue
;
}
auto
in_stride
=
framework
::
stride_numel
(
in
->
dims
().
data
());
auto
out_stride
=
framework
::
stride_numel
(
out
->
dims
().
data
());
paddle
::
operators
::
StridedNumelCopyWithAxis
<
T
>
(
platform
::
CPUDeviceContext
(),
axis
,
out
->
mutable_data
<
T
>
()
+
output_offset
,
out_stride
,
in
->
data
<
T
>
(),
in_stride
,
in_stride
[
axis
]);
output_offset
+=
in_stride
[
axis
];
}
}
else
{
std
::
vector
<
lite
::
Tensor
>
inputs
;
for
(
size_t
j
=
0
;
j
<
param
.
x
.
size
();
++
j
)
{
if
(
param
.
x
[
j
]
&&
param
.
x
[
j
]
->
dims
().
production
()
>
0
)
{
inputs
.
push_back
(
*
param
.
x
[
j
]);
}
else
{
continue
;
}
}
int
num
=
inputs
.
size
();
int
rows
=
1
;
auto
dim_0
=
inputs
[
0
].
dims
();
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
{
rows
*=
dim_0
[
i
];
}
int
out_rows
=
rows
,
out_cols
=
0
;
std
::
vector
<
int64_t
>
input_cols
(
inputs
.
size
());
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
int
t_cols
=
inputs
[
i
].
dims
().
production
()
/
rows
;
out_cols
+=
t_cols
;
input_cols
[
i
]
=
t_cols
;
}
// computation
auto
output_data
=
param
.
output
->
template
mutable_data
<
T
>();
int
col_idx
=
0
;
for
(
int
j
=
0
;
j
<
num
;
++
j
)
{
int
col_len
=
input_cols
[
j
];
auto
input_data
=
inputs
[
j
].
data
<
float
>
();
for
(
int
k
=
0
;
k
<
out_rows
;
++
k
)
{
std
::
memcpy
(
output_data
+
k
*
out_cols
+
col_idx
,
input_data
+
k
*
col_len
,
sizeof
(
T
)
*
col_len
);
}
col_idx
+=
col_len
;
}
}
}
virtual
~
ConcatCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
concat
,
kX86
,
kFloat
,
kNCHW
,
REGISTER_LITE_KERNEL
(
concat
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
ConcatCompute
<
float
>
,
def
)
paddle
::
lite
::
kernels
::
x86
::
ConcatCompute
<
float
>
,
def
)
...
...
paddle/fluid/lite/kernels/x86/concat_compute.h
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <Eigen/Core>
#include <vector>
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/types.h"
#include "paddle/fluid/operators/strided_memcpy.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
>
class
ConcatCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ConcatParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
int64_t
axis
=
static_cast
<
int64_t
>
(
param
.
axis
);
auto
out
=
param
.
output
;
if
(
axis
==
0
&&
param
.
x
.
size
()
<
10
)
{
size_t
output_offset
=
0
;
for
(
auto
*
in
:
param
.
x
)
{
if
(
!
in
||
in
->
dims
().
production
()
==
0UL
)
{
continue
;
}
auto
in_stride
=
framework
::
stride_numel
(
in
->
dims
().
data
());
auto
out_stride
=
framework
::
stride_numel
(
out
->
dims
().
data
());
paddle
::
operators
::
StridedNumelCopyWithAxis
<
T
>
(
platform
::
CPUDeviceContext
(),
axis
,
out
->
mutable_data
<
T
>
()
+
output_offset
,
out_stride
,
in
->
data
<
T
>
(),
in_stride
,
in_stride
[
axis
]);
output_offset
+=
in_stride
[
axis
];
}
}
else
{
std
::
vector
<
lite
::
Tensor
>
inputs
;
for
(
size_t
j
=
0
;
j
<
param
.
x
.
size
();
++
j
)
{
if
(
param
.
x
[
j
]
&&
param
.
x
[
j
]
->
dims
().
production
()
>
0
)
{
inputs
.
push_back
(
*
param
.
x
[
j
]);
}
else
{
continue
;
}
}
int
num
=
inputs
.
size
();
int
rows
=
1
;
auto
dim_0
=
inputs
[
0
].
dims
();
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
{
rows
*=
dim_0
[
i
];
}
int
out_rows
=
rows
,
out_cols
=
0
;
std
::
vector
<
int64_t
>
input_cols
(
inputs
.
size
());
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
int
t_cols
=
inputs
[
i
].
dims
().
production
()
/
rows
;
out_cols
+=
t_cols
;
input_cols
[
i
]
=
t_cols
;
}
// computation
auto
output_data
=
param
.
output
->
template
mutable_data
<
T
>();
int
col_idx
=
0
;
for
(
int
j
=
0
;
j
<
num
;
++
j
)
{
int
col_len
=
input_cols
[
j
];
auto
input_data
=
inputs
[
j
].
data
<
float
>
();
for
(
int
k
=
0
;
k
<
out_rows
;
++
k
)
{
std
::
memcpy
(
output_data
+
k
*
out_cols
+
col_idx
,
input_data
+
k
*
col_len
,
sizeof
(
T
)
*
col_len
);
}
col_idx
+=
col_len
;
}
}
}
virtual
~
ConcatCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/kernels/x86/concat_compute_test.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/x86/concat_compute.h"
#include <gtest/gtest.h>
#include <iostream>
#include <vector>
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
TEST
(
concat_x86
,
retrive_op
)
{
auto
concat
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
(
"concat"
);
ASSERT_FALSE
(
concat
.
empty
());
ASSERT_TRUE
(
concat
.
front
());
}
TEST
(
concat_x86
,
init
)
{
ConcatCompute
<
float
>
concat
;
ASSERT_EQ
(
concat
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
concat
.
target
(),
TARGET
(
kX86
));
}
TEST
(
concat_x86
,
run_test
)
{
lite
::
Tensor
x1
,
x2
,
out
;
constexpr
int
batch_size
=
1
;
std
::
vector
<
int64_t
>
x1_shape
{
batch_size
,
1
,
3
,
3
};
x1
.
Resize
(
lite
::
DDim
(
x1_shape
));
std
::
vector
<
int64_t
>
x2_shape
{
batch_size
,
1
,
3
,
3
};
x2
.
Resize
(
lite
::
DDim
(
x2_shape
));
std
::
vector
<
lite
::
Tensor
*>
x
=
{
&
x1
,
&
x2
};
std
::
vector
<
int64_t
>
out_shape
{
batch_size
,
2
,
3
,
3
};
out
.
Resize
(
lite
::
DDim
(
out_shape
));
auto
x1_data
=
x1
.
mutable_data
<
float
>
();
auto
x2_data
=
x2
.
mutable_data
<
float
>
();
auto
out_data
=
out
.
mutable_data
<
float
>
();
for
(
int64_t
i
=
0
;
i
<
x1
.
dims
().
production
();
i
++
)
{
x1_data
[
i
]
=
1
;
x2_data
[
i
]
=
2
;
}
ConcatCompute
<
float
>
concat
;
operators
::
ConcatParam
param
;
param
.
x
=
x
;
param
.
output
=
&
out
;
param
.
axis
=
1
;
concat
.
SetParam
(
param
);
concat
.
Run
();
std
::
cout
<<
"output: "
;
for
(
int
i
=
0
;
i
<
out
.
dims
().
production
();
i
++
)
{
std
::
cout
<<
out_data
[
i
]
<<
" "
;
}
std
::
cout
<<
std
::
endl
;
}
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
concat
,
kX86
,
kFloat
,
kNCHW
,
def
);
paddle/fluid/lite/kernels/x86/conv_compute.cc
浏览文件 @
e8ebb084
...
@@ -12,144 +12,7 @@
...
@@ -12,144 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include <Eigen/Core>
#include "paddle/fluid/lite/kernels/x86/conv_compute.h"
#include <string>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/types.h"
#include "paddle/fluid/lite/operators/conv_op.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/depthwise_conv.h"
#include "paddle/fluid/operators/math/im2col.h"
#include "paddle/fluid/operators/math/vol2col.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
inline
bool
IsExpand
(
const
std
::
vector
<
int64_t
>&
filter_dim
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
)
{
bool
filter_1
=
true
,
strides_1
=
true
,
padding_0
=
true
,
dilation_1
=
true
;
for
(
size_t
j
=
0
;
j
<
strides
.
size
();
++
j
)
{
filter_1
=
filter_1
&&
(
static_cast
<
int
>
(
filter_dim
[
j
+
2
])
==
1
);
strides_1
=
strides_1
&&
(
strides
[
j
]
==
1
);
padding_0
=
padding_0
&&
(
paddings
[
j
]
==
0
);
dilation_1
=
dilation_1
&&
(
dilations
[
j
]
==
1
);
}
return
!
(
filter_1
&&
strides_1
&&
padding_0
&&
dilation_1
);
}
template
<
typename
T
>
class
Conv2dCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ConvParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
ConvParam
>
();
lite
::
Tensor
filter
=
*
param
.
filter
;
param
.
output
->
template
mutable_data
<
T
>();
const
int
batch_size
=
static_cast
<
int
>
(
param
.
x
->
dims
()[
0
]);
std
::
vector
<
int64_t
>
filter_shape_vec
(
filter
.
dims
().
Vectorize
());
std
::
vector
<
int64_t
>
output_shape_vec
(
param
.
output
->
dims
().
Vectorize
());
size_t
data_dim
=
filter_shape_vec
.
size
()
-
2
;
std
::
vector
<
int64_t
>
col_shape_vec
(
1
+
2
*
data_dim
);
col_shape_vec
[
0
]
=
param
.
x
->
dims
()[
1
]
/
param
.
groups
;
for
(
size_t
j
=
0
;
j
<
data_dim
;
++
j
)
{
col_shape_vec
[
j
+
1
]
=
filter_shape_vec
[
j
+
2
];
col_shape_vec
[
j
+
1
+
data_dim
]
=
output_shape_vec
[
j
+
2
];
}
lite
::
DDim
col_shape
(
col_shape_vec
);
lite
::
DDim
col_matrix_shape
=
col_shape
.
Flattern2D
(
data_dim
+
1
);
bool
is_expand
=
IsExpand
(
filter_shape_vec
,
param
.
strides
,
param
.
paddings
,
param
.
dilations
);
lite
::
Tensor
col
;
lite
::
Tensor
col_matrix
;
if
(
is_expand
)
{
col
.
Resize
(
col_shape
);
col
.
mutable_data
<
T
>
();
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
lite
::
DDim
input_shape
=
param
.
x
->
dims
().
Slice
(
1
,
param
.
x
->
dims
().
size
());
lite
::
DDim
filter_matrix_shape
(
std
::
vector
<
int64_t
>
{
filter
.
dims
()[
0
],
filter
.
dims
().
production
()
/
filter
.
dims
()[
0
]});
filter
.
Resize
(
filter_matrix_shape
);
lite
::
DDim
output_matrix_shape
(
std
::
vector
<
int64_t
>
{
param
.
output
->
dims
()[
1
],
param
.
output
->
dims
().
production
()
/
(
param
.
output
->
dims
()[
0
]
*
param
.
output
->
dims
()[
1
])});
int
in_step
=
static_cast
<
int
>
(
param
.
x
->
dims
()[
1
])
/
param
.
groups
;
int
out_step
=
static_cast
<
int
>
(
param
.
output
->
dims
()[
1
])
/
param
.
groups
;
paddle
::
operators
::
math
::
Vol2ColFunctor
<
platform
::
CPUDeviceContext
,
T
>
vol2col
;
paddle
::
operators
::
math
::
Im2ColFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
platform
::
CPUDeviceContext
,
T
>
im2col
;
auto
blas
=
paddle
::
operators
::
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
platform
::
CPUDeviceContext
());
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
lite
::
Tensor
in_batch
;
in_batch
.
ShareDataWith
(
param
.
x
->
raw_tensor
().
Slice
(
i
,
i
+
1
).
Resize
(
input_shape
.
data
()));
lite
::
Tensor
out_batch
;
out_batch
.
ShareDataWith
(
param
.
output
->
raw_tensor
().
Slice
(
i
,
i
+
1
).
Resize
(
output_matrix_shape
.
data
()));
for
(
int
g
=
0
;
g
<
param
.
groups
;
g
++
)
{
lite
::
Tensor
in_slice
;
in_slice
.
ShareDataWith
(
in_batch
.
raw_tensor
().
Slice
(
g
*
in_step
,
(
g
+
1
)
*
in_step
));
if
(
!
is_expand
)
{
col
.
ShareDataWith
(
in_slice
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
else
if
(
data_dim
==
2U
)
{
// im2col
im2col
(
platform
::
CPUDeviceContext
(),
in_slice
.
raw_tensor
(),
param
.
dilations
,
param
.
strides
,
std
::
vector
<
int
>
{
param
.
paddings
[
0
],
param
.
paddings
[
1
],
param
.
paddings
[
0
],
param
.
paddings
[
1
]},
&
(
col
.
raw_tensor
()));
}
else
if
(
data_dim
==
3U
)
{
// vol2col
vol2col
(
platform
::
CPUDeviceContext
(),
in_slice
.
raw_tensor
(),
param
.
dilations
,
param
.
strides
,
param
.
paddings
,
&
(
col
.
raw_tensor
()));
}
// gemm
lite
::
Tensor
out_slice
;
out_slice
.
ShareDataWith
(
out_batch
.
raw_tensor
().
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
));
lite
::
Tensor
filter_slice
;
filter_slice
.
ShareDataWith
(
filter
.
raw_tensor
().
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
));
blas
.
MatMul
(
filter_slice
.
raw_tensor
(),
false
,
col_matrix
.
raw_tensor
(),
false
,
T
(
1.0
),
&
(
out_slice
.
raw_tensor
()),
T
(
0.0
));
}
}
}
virtual
~
Conv2dCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
conv2d
,
kX86
,
kFloat
,
kNCHW
,
REGISTER_LITE_KERNEL
(
conv2d
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
Conv2dCompute
<
float
>
,
def
)
paddle
::
lite
::
kernels
::
x86
::
Conv2dCompute
<
float
>
,
def
)
...
...
paddle/fluid/lite/kernels/x86/conv_compute.h
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <Eigen/Core>
#include <string>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/types.h"
#include "paddle/fluid/lite/operators/conv_op.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/depthwise_conv.h"
#include "paddle/fluid/operators/math/im2col.h"
#include "paddle/fluid/operators/math/vol2col.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
inline
bool
IsExpand
(
const
std
::
vector
<
int64_t
>&
filter_dim
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
)
{
bool
filter_1
=
true
,
strides_1
=
true
,
padding_0
=
true
,
dilation_1
=
true
;
for
(
size_t
j
=
0
;
j
<
strides
.
size
();
++
j
)
{
filter_1
=
filter_1
&&
(
static_cast
<
int
>
(
filter_dim
[
j
+
2
])
==
1
);
strides_1
=
strides_1
&&
(
strides
[
j
]
==
1
);
padding_0
=
padding_0
&&
(
paddings
[
j
]
==
0
);
dilation_1
=
dilation_1
&&
(
dilations
[
j
]
==
1
);
}
return
!
(
filter_1
&&
strides_1
&&
padding_0
&&
dilation_1
);
}
template
<
typename
T
>
class
Conv2dCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ConvParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
ConvParam
>
();
lite
::
Tensor
filter
=
*
param
.
filter
;
param
.
output
->
template
mutable_data
<
T
>();
const
int
batch_size
=
static_cast
<
int
>
(
param
.
x
->
dims
()[
0
]);
std
::
vector
<
int64_t
>
filter_shape_vec
(
filter
.
dims
().
Vectorize
());
std
::
vector
<
int64_t
>
output_shape_vec
(
param
.
output
->
dims
().
Vectorize
());
size_t
data_dim
=
filter_shape_vec
.
size
()
-
2
;
std
::
vector
<
int64_t
>
col_shape_vec
(
1
+
2
*
data_dim
);
col_shape_vec
[
0
]
=
param
.
x
->
dims
()[
1
]
/
param
.
groups
;
for
(
size_t
j
=
0
;
j
<
data_dim
;
++
j
)
{
col_shape_vec
[
j
+
1
]
=
filter_shape_vec
[
j
+
2
];
col_shape_vec
[
j
+
1
+
data_dim
]
=
output_shape_vec
[
j
+
2
];
}
lite
::
DDim
col_shape
(
col_shape_vec
);
lite
::
DDim
col_matrix_shape
=
col_shape
.
Flattern2D
(
data_dim
+
1
);
bool
is_expand
=
IsExpand
(
filter_shape_vec
,
param
.
strides
,
param
.
paddings
,
param
.
dilations
);
lite
::
Tensor
col
;
lite
::
Tensor
col_matrix
;
if
(
is_expand
)
{
col
.
Resize
(
col_shape
);
col
.
mutable_data
<
T
>
();
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
lite
::
DDim
input_shape
=
param
.
x
->
dims
().
Slice
(
1
,
param
.
x
->
dims
().
size
());
lite
::
DDim
filter_matrix_shape
(
std
::
vector
<
int64_t
>
{
filter
.
dims
()[
0
],
filter
.
dims
().
production
()
/
filter
.
dims
()[
0
]});
filter
.
Resize
(
filter_matrix_shape
);
lite
::
DDim
output_matrix_shape
(
std
::
vector
<
int64_t
>
{
param
.
output
->
dims
()[
1
],
param
.
output
->
dims
().
production
()
/
(
param
.
output
->
dims
()[
0
]
*
param
.
output
->
dims
()[
1
])});
int
in_step
=
static_cast
<
int
>
(
param
.
x
->
dims
()[
1
])
/
param
.
groups
;
int
out_step
=
static_cast
<
int
>
(
param
.
output
->
dims
()[
1
])
/
param
.
groups
;
paddle
::
operators
::
math
::
Vol2ColFunctor
<
platform
::
CPUDeviceContext
,
T
>
vol2col
;
paddle
::
operators
::
math
::
Im2ColFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
platform
::
CPUDeviceContext
,
T
>
im2col
;
auto
blas
=
paddle
::
operators
::
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
platform
::
CPUDeviceContext
());
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
lite
::
Tensor
in_batch
;
in_batch
.
ShareDataWith
(
param
.
x
->
raw_tensor
().
Slice
(
i
,
i
+
1
).
Resize
(
input_shape
.
data
()));
lite
::
Tensor
out_batch
;
out_batch
.
ShareDataWith
(
param
.
output
->
raw_tensor
().
Slice
(
i
,
i
+
1
).
Resize
(
output_matrix_shape
.
data
()));
for
(
int
g
=
0
;
g
<
param
.
groups
;
g
++
)
{
lite
::
Tensor
in_slice
;
in_slice
.
ShareDataWith
(
in_batch
.
raw_tensor
().
Slice
(
g
*
in_step
,
(
g
+
1
)
*
in_step
));
if
(
!
is_expand
)
{
col
.
ShareDataWith
(
in_slice
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
else
if
(
data_dim
==
2U
)
{
// im2col
im2col
(
platform
::
CPUDeviceContext
(),
in_slice
.
raw_tensor
(),
param
.
dilations
,
param
.
strides
,
std
::
vector
<
int
>
{
param
.
paddings
[
0
],
param
.
paddings
[
1
],
param
.
paddings
[
0
],
param
.
paddings
[
1
]},
&
(
col
.
raw_tensor
()));
}
else
if
(
data_dim
==
3U
)
{
// vol2col
vol2col
(
platform
::
CPUDeviceContext
(),
in_slice
.
raw_tensor
(),
param
.
dilations
,
param
.
strides
,
param
.
paddings
,
&
(
col
.
raw_tensor
()));
}
// gemm
lite
::
Tensor
out_slice
;
out_slice
.
ShareDataWith
(
out_batch
.
raw_tensor
().
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
));
lite
::
Tensor
filter_slice
;
filter_slice
.
ShareDataWith
(
filter
.
raw_tensor
().
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
));
blas
.
MatMul
(
filter_slice
.
raw_tensor
(),
false
,
col_matrix
.
raw_tensor
(),
false
,
T
(
1.0
),
&
(
out_slice
.
raw_tensor
()),
T
(
0.0
));
}
}
}
virtual
~
Conv2dCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/kernels/x86/conv_compute_test.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/x86/conv_compute.h"
#include <gtest/gtest.h>
#include <vector>
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
TEST
(
conv_x86
,
retrive_op
)
{
auto
conv2d
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
(
"conv2d"
);
ASSERT_FALSE
(
conv2d
.
empty
());
ASSERT_TRUE
(
conv2d
.
front
());
}
TEST
(
conv2d_x86
,
init
)
{
Conv2dCompute
<
float
>
conv2d
;
ASSERT_EQ
(
conv2d
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
conv2d
.
target
(),
TARGET
(
kX86
));
}
TEST
(
conv2d_x86
,
run_test
)
{
lite
::
Tensor
x
,
filter
,
b
,
out
;
constexpr
int
batch_size
=
1
;
std
::
vector
<
int64_t
>
x_shape
{
batch_size
,
3
,
3
,
3
};
x
.
Resize
(
lite
::
DDim
(
x_shape
));
std
::
vector
<
int64_t
>
filter_shape
{
1
,
3
,
3
,
3
};
filter
.
Resize
(
lite
::
DDim
(
filter_shape
));
std
::
vector
<
int64_t
>
b_shape
{
1
,
3
,
1
,
1
};
b
.
Resize
(
lite
::
DDim
(
b_shape
));
std
::
vector
<
int64_t
>
out_shape
{
batch_size
,
1
,
1
,
1
};
out
.
Resize
(
lite
::
DDim
(
out_shape
));
auto
x_data
=
x
.
mutable_data
<
float
>
();
auto
filter_data
=
filter
.
mutable_data
<
float
>
();
auto
b_data
=
b
.
mutable_data
<
float
>
();
auto
out_data
=
out
.
mutable_data
<
float
>
();
for
(
int64_t
i
=
0
;
i
<
x
.
dims
().
production
();
i
++
)
{
x_data
[
i
]
=
1
;
}
for
(
int64_t
i
=
0
;
i
<
filter
.
dims
().
production
();
i
++
)
{
filter_data
[
i
]
=
1
;
}
for
(
int64_t
i
=
0
;
i
<
b
.
dims
().
production
();
i
++
)
{
b_data
[
i
]
=
0
;
}
Conv2dCompute
<
float
>
conv2d
;
operators
::
ConvParam
param
;
param
.
x
=
&
x
;
param
.
filter
=
&
filter
;
param
.
bias
=
&
b
;
param
.
output
=
&
out
;
param
.
strides
=
{
1
,
1
};
param
.
paddings
=
{
0
,
0
};
param
.
groups
=
1
;
param
.
dilations
=
{
1
,
1
};
conv2d
.
SetParam
(
param
);
conv2d
.
Run
();
LOG
(
INFO
)
<<
"output: "
;
for
(
int
i
=
0
;
i
<
out
.
dims
().
production
();
i
++
)
{
LOG
(
INFO
)
<<
out_data
[
i
]
<<
" "
;
}
}
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
conv2d
,
kX86
,
kFloat
,
kNCHW
,
def
);
paddle/fluid/lite/kernels/x86/dropout_compute.cc
浏览文件 @
e8ebb084
...
@@ -12,72 +12,7 @@
...
@@ -12,72 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include <random>
#include "paddle/fluid/lite/kernels/x86/dropout_compute.h"
#include <string>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenMatrix
=
framework
::
EigenMatrix
<
T
,
MajorType
,
IndexType
>
;
template
<
typename
T
>
class
DropoutCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
DropoutParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
DropoutParam
>
();
const
auto
*
x_data
=
param
.
x
->
data
<
T
>
();
auto
*
out_data
=
param
.
output
->
template
mutable_data
<
T
>();
if
(
!
param
.
is_test
)
{
auto
*
mask_data
=
param
.
mask
->
template
mutable_data
<
T
>();
std
::
random_device
rnd
;
std
::
minstd_rand
engine
;
int
seed
=
param
.
fix_seed
?
param
.
seed
:
rnd
();
engine
.
seed
(
seed
);
std
::
uniform_real_distribution
<
float
>
dist
(
0
,
1
);
size_t
size
=
framework
::
product
(
param
.
mask
->
dims
().
data
());
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
if
(
dist
(
engine
)
<
param
.
dropout_prob
)
{
mask_data
[
i
]
=
0
;
out_data
[
i
]
=
0
;
}
else
{
if
(
param
.
dropout_implementation
==
"upscale_in_train"
)
{
mask_data
[
i
]
=
1.0
f
/
static_cast
<
T
>
(
1.0
f
-
param
.
dropout_prob
);
out_data
[
i
]
=
x_data
[
i
]
/
static_cast
<
T
>
(
1.0
f
-
param
.
dropout_prob
);
}
else
{
mask_data
[
i
]
=
1
;
out_data
[
i
]
=
x_data
[
i
];
}
}
}
}
else
{
auto
X
=
EigenMatrix
<
T
>::
Reshape
(
param
.
x
->
raw_tensor
(),
1
);
auto
Y
=
EigenMatrix
<
T
>::
Reshape
(
param
.
output
->
raw_tensor
(),
1
);
auto
&
place
=
*
platform
::
CPUDeviceContext
().
eigen_device
();
if
(
param
.
dropout_implementation
==
"upscale_in_train"
)
{
Y
.
device
(
place
)
=
X
;
}
else
{
Y
.
device
(
place
)
=
X
*
static_cast
<
T
>
(
1.0
f
-
param
.
dropout_prob
);
}
}
}
virtual
~
DropoutCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
dropout
,
kX86
,
kFloat
,
kNCHW
,
REGISTER_LITE_KERNEL
(
dropout
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
DropoutCompute
<
float
>
,
def
)
paddle
::
lite
::
kernels
::
x86
::
DropoutCompute
<
float
>
,
def
)
...
...
paddle/fluid/lite/kernels/x86/dropout_compute.h
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <random>
#include <string>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenMatrix
=
framework
::
EigenMatrix
<
T
,
MajorType
,
IndexType
>
;
template
<
typename
T
>
class
DropoutCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
DropoutParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
DropoutParam
>
();
const
auto
*
x_data
=
param
.
x
->
data
<
T
>
();
auto
*
out_data
=
param
.
output
->
template
mutable_data
<
T
>();
if
(
!
param
.
is_test
)
{
auto
*
mask_data
=
param
.
mask
->
template
mutable_data
<
T
>();
std
::
random_device
rnd
;
std
::
minstd_rand
engine
;
int
seed
=
param
.
fix_seed
?
param
.
seed
:
rnd
();
engine
.
seed
(
seed
);
std
::
uniform_real_distribution
<
float
>
dist
(
0
,
1
);
size_t
size
=
framework
::
product
(
param
.
mask
->
dims
().
data
());
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
if
(
dist
(
engine
)
<
param
.
dropout_prob
)
{
mask_data
[
i
]
=
0
;
out_data
[
i
]
=
0
;
}
else
{
if
(
param
.
dropout_implementation
==
"upscale_in_train"
)
{
mask_data
[
i
]
=
1.0
f
/
static_cast
<
T
>
(
1.0
f
-
param
.
dropout_prob
);
out_data
[
i
]
=
x_data
[
i
]
/
static_cast
<
T
>
(
1.0
f
-
param
.
dropout_prob
);
}
else
{
mask_data
[
i
]
=
1
;
out_data
[
i
]
=
x_data
[
i
];
}
}
}
}
else
{
auto
X
=
EigenMatrix
<
T
>::
Reshape
(
param
.
x
->
raw_tensor
(),
1
);
auto
Y
=
EigenMatrix
<
T
>::
Reshape
(
param
.
output
->
raw_tensor
(),
1
);
auto
&
place
=
*
platform
::
CPUDeviceContext
().
eigen_device
();
if
(
param
.
dropout_implementation
==
"upscale_in_train"
)
{
Y
.
device
(
place
)
=
X
;
}
else
{
Y
.
device
(
place
)
=
X
*
static_cast
<
T
>
(
1.0
f
-
param
.
dropout_prob
);
}
}
}
virtual
~
DropoutCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/kernels/x86/dropout_compute_test.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/x86/dropout_compute.h"
#include <gtest/gtest.h>
#include <iostream>
#include <vector>
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
TEST
(
dropout_x86
,
retrive_op
)
{
auto
dropout
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
(
"dropout"
);
ASSERT_FALSE
(
dropout
.
empty
());
ASSERT_TRUE
(
dropout
.
front
());
}
TEST
(
dropout_x86
,
init
)
{
DropoutCompute
<
float
>
dropout
;
ASSERT_EQ
(
dropout
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
dropout
.
target
(),
TARGET
(
kX86
));
}
TEST
(
dropout_x86
,
run_test
)
{
lite
::
Tensor
x
,
y
,
out
;
constexpr
int
batch_size
=
1
;
std
::
vector
<
int64_t
>
x_shape
{
batch_size
,
3
,
2
,
2
};
x
.
Resize
(
lite
::
DDim
(
x_shape
));
std
::
vector
<
int64_t
>
out_shape
{
batch_size
,
3
,
2
,
2
};
out
.
Resize
(
lite
::
DDim
(
out_shape
));
auto
x_data
=
x
.
mutable_data
<
float
>
();
auto
out_data
=
out
.
mutable_data
<
float
>
();
for
(
int64_t
i
=
0
;
i
<
x
.
dims
().
production
();
i
++
)
{
x_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
// DropoutCompute dropout;
DropoutCompute
<
float
>
dropout
;
operators
::
DropoutParam
param
;
param
.
x
=
&
x
;
param
.
dropout_prob
=
0.25
;
param
.
is_test
=
true
;
param
.
fix_seed
=
true
;
param
.
output
=
&
out
;
dropout
.
SetParam
(
param
);
dropout
.
Run
();
LOG
(
INFO
)
<<
"output: "
;
for
(
int
i
=
0
;
i
<
out
.
dims
().
production
();
i
++
)
{
LOG
(
INFO
)
<<
out_data
[
i
];
}
}
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
dropout
,
kX86
,
kFloat
,
kNCHW
,
def
);
paddle/fluid/lite/kernels/x86/elementwise_compute.cc
浏览文件 @
e8ebb084
...
@@ -12,113 +12,8 @@
...
@@ -12,113 +12,8 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/lite/kernels/x86/elementwise_compute.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
>
struct
SubFunctor
{
inline
HOSTDEVICE
T
operator
()(
T
a
,
T
b
)
const
{
return
a
-
b
;
}
};
template
<
typename
T
>
struct
AddFunctor
{
inline
HOSTDEVICE
T
operator
()(
T
a
,
T
b
)
const
{
return
a
+
b
;
}
};
template
<
typename
T
>
class
ElementwiseSubCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ElementwiseParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
context
=
ctx_
->
As
<
X86Context
>
();
CHECK
(
context
.
x86_device_context
());
param
.
Out
->
template
mutable_data
<
T
>();
paddle
::
operators
::
ElementwiseComputeEx
<
SubFunctor
<
T
>
,
platform
::
CPUDeviceContext
,
T
>
(
*
context
.
x86_execution_context
(),
&
param
.
X
->
raw_tensor
(),
&
param
.
Y
->
raw_tensor
(),
param
.
axis
,
SubFunctor
<
T
>
(),
&
param
.
Out
->
raw_tensor
());
}
virtual
~
ElementwiseSubCompute
()
=
default
;
};
template
<
typename
T
>
struct
SubGradDX
{
T
operator
()(
T
x
,
T
y
,
T
out
,
T
dout
)
const
{
return
dout
;
}
};
template
<
typename
T
>
struct
SubGradDY
{
T
operator
()(
T
x
,
T
y
,
T
out
,
T
dout
)
const
{
return
-
dout
;
}
};
template
<
typename
T
>
class
ElementwiseSubGradCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ElementwiseGradParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
context
=
ctx_
->
As
<
X86Context
>
();
CHECK
(
context
.
x86_device_context
());
param
.
X_grad
->
template
mutable_data
<
T
>();
param
.
Y_grad
->
template
mutable_data
<
T
>();
// skip out, x, y
auto
dout
=
param
.
Out_grad
->
raw_tensor
();
auto
dx
=
param
.
X_grad
->
raw_tensor
();
auto
dy
=
param
.
Y_grad
->
raw_tensor
();
auto
&
skip
=
dout
;
paddle
::
operators
::
ElemwiseExplicitGradCompute
<
platform
::
CPUDeviceContext
,
T
,
SubGradDX
<
T
>
,
SubGradDY
<
T
>>
(
*
context
.
x86_execution_context
(),
skip
,
skip
,
skip
,
dout
,
param
.
axis
,
&
dx
,
&
dy
,
SubGradDX
<
T
>
(),
SubGradDY
<
T
>
());
}
virtual
~
ElementwiseSubGradCompute
()
=
default
;
};
template
<
typename
T
>
class
ElementwiseAddCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ElementwiseParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
context
=
ctx_
->
As
<
X86Context
>
();
CHECK
(
context
.
x86_device_context
());
param
.
Out
->
template
mutable_data
<
T
>();
paddle
::
operators
::
ElementwiseComputeEx
<
AddFunctor
<
T
>
,
platform
::
CPUDeviceContext
,
T
>
(
*
context
.
x86_execution_context
(),
&
param
.
X
->
raw_tensor
(),
&
param
.
Y
->
raw_tensor
(),
param
.
axis
,
AddFunctor
<
T
>
(),
&
param
.
Out
->
raw_tensor
());
}
virtual
~
ElementwiseAddCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
// float
REGISTER_LITE_KERNEL
(
elementwise_sub
,
kX86
,
kFloat
,
kNCHW
,
REGISTER_LITE_KERNEL
(
elementwise_sub
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
ElementwiseSubCompute
<
float
>
,
paddle
::
lite
::
kernels
::
x86
::
ElementwiseSubCompute
<
float
>
,
def
)
def
)
...
...
paddle/fluid/lite/kernels/x86/elementwise_compute.h
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
>
struct
SubFunctor
{
inline
HOSTDEVICE
T
operator
()(
T
a
,
T
b
)
const
{
return
a
-
b
;
}
};
template
<
typename
T
>
struct
AddFunctor
{
inline
HOSTDEVICE
T
operator
()(
T
a
,
T
b
)
const
{
return
a
+
b
;
}
};
template
<
typename
T
>
class
ElementwiseSubCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ElementwiseParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
context
=
ctx_
->
As
<
X86Context
>
();
CHECK
(
context
.
x86_device_context
());
param
.
Out
->
template
mutable_data
<
T
>();
paddle
::
operators
::
ElementwiseComputeEx
<
SubFunctor
<
T
>
,
platform
::
CPUDeviceContext
,
T
>
(
*
context
.
x86_execution_context
(),
&
param
.
X
->
raw_tensor
(),
&
param
.
Y
->
raw_tensor
(),
param
.
axis
,
SubFunctor
<
T
>
(),
&
param
.
Out
->
raw_tensor
());
}
virtual
~
ElementwiseSubCompute
()
=
default
;
};
template
<
typename
T
>
struct
SubGradDX
{
T
operator
()(
T
x
,
T
y
,
T
out
,
T
dout
)
const
{
return
dout
;
}
};
template
<
typename
T
>
struct
SubGradDY
{
T
operator
()(
T
x
,
T
y
,
T
out
,
T
dout
)
const
{
return
-
dout
;
}
};
template
<
typename
T
>
class
ElementwiseSubGradCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ElementwiseGradParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
context
=
ctx_
->
As
<
X86Context
>
();
CHECK
(
context
.
x86_device_context
());
param
.
X_grad
->
template
mutable_data
<
T
>();
param
.
Y_grad
->
template
mutable_data
<
T
>();
// skip out, x, y
auto
dout
=
param
.
Out_grad
->
raw_tensor
();
auto
dx
=
param
.
X_grad
->
raw_tensor
();
auto
dy
=
param
.
Y_grad
->
raw_tensor
();
auto
&
skip
=
dout
;
paddle
::
operators
::
ElemwiseExplicitGradCompute
<
platform
::
CPUDeviceContext
,
T
,
SubGradDX
<
T
>
,
SubGradDY
<
T
>>
(
*
context
.
x86_execution_context
(),
skip
,
skip
,
skip
,
dout
,
param
.
axis
,
&
dx
,
&
dy
,
SubGradDX
<
T
>
(),
SubGradDY
<
T
>
());
}
virtual
~
ElementwiseSubGradCompute
()
=
default
;
};
template
<
typename
T
>
class
ElementwiseAddCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ElementwiseParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
context
=
ctx_
->
As
<
X86Context
>
();
CHECK
(
context
.
x86_device_context
());
param
.
Out
->
template
mutable_data
<
T
>();
paddle
::
operators
::
ElementwiseComputeEx
<
AddFunctor
<
T
>
,
platform
::
CPUDeviceContext
,
T
>
(
*
context
.
x86_execution_context
(),
&
param
.
X
->
raw_tensor
(),
&
param
.
Y
->
raw_tensor
(),
param
.
axis
,
AddFunctor
<
T
>
(),
&
param
.
Out
->
raw_tensor
());
}
virtual
~
ElementwiseAddCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/kernels/x86/elementwise_compute_test.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/x86/elementwise_compute.h"
#include <gtest/gtest.h>
#include <iostream>
#include <vector>
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
TEST
(
elementwise_add_x86
,
retrive_op
)
{
auto
elementwise_add
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
(
"elementwise_add"
);
ASSERT_FALSE
(
elementwise_add
.
empty
());
ASSERT_TRUE
(
elementwise_add
.
front
());
}
TEST
(
elementwise_add_x86
,
init
)
{
ElementwiseAddCompute
<
float
>
elementwise_add
;
ASSERT_EQ
(
elementwise_add
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
elementwise_add
.
target
(),
TARGET
(
kX86
));
}
TEST
(
elementwise_add_x86
,
run_test
)
{
lite
::
Tensor
x
,
y
,
out
;
constexpr
int
batch_size
=
1
;
std
::
vector
<
int64_t
>
x_shape
{
batch_size
,
3
,
2
,
2
};
x
.
Resize
(
lite
::
DDim
(
x_shape
));
std
::
vector
<
int64_t
>
y_shape
{
batch_size
,
3
,
2
,
2
};
y
.
Resize
(
lite
::
DDim
(
y_shape
));
std
::
vector
<
int64_t
>
out_shape
{
batch_size
,
3
,
2
,
2
};
out
.
Resize
(
lite
::
DDim
(
out_shape
));
auto
x_data
=
x
.
mutable_data
<
float
>
();
auto
y_data
=
y
.
mutable_data
<
float
>
();
auto
out_data
=
out
.
mutable_data
<
float
>
();
for
(
int64_t
i
=
0
;
i
<
x
.
dims
().
production
();
i
++
)
{
x_data
[
i
]
=
1
;
}
for
(
int64_t
i
=
0
;
i
<
y
.
dims
().
production
();
i
++
)
{
y_data
[
i
]
=
2
;
}
// ElementwiseAddCompute elementwise_add;
ElementwiseAddCompute
<
float
>
elementwise_add
;
operators
::
ElementwiseParam
param
;
param
.
X
=
&
x
;
param
.
Y
=
&
y
;
param
.
Out
=
&
out
;
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
ctx
->
As
<
X86Context
>
();
elementwise_add
.
SetParam
(
param
);
elementwise_add
.
SetContext
(
std
::
move
(
ctx
));
elementwise_add
.
Run
();
LOG
(
INFO
)
<<
"output: "
;
for
(
int
i
=
0
;
i
<
out
.
dims
().
production
();
i
++
)
{
LOG
(
INFO
)
<<
out_data
[
i
];
}
}
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
elementwise_add
,
kX86
,
kFloat
,
kNCHW
,
def
);
paddle/fluid/lite/kernels/x86/fc_compute.cc
浏览文件 @
e8ebb084
...
@@ -12,89 +12,7 @@
...
@@ -12,89 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include <Eigen/Core>
#include "paddle/fluid/lite/kernels/x86/fc_compute.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
#include "paddle/fluid/lite/operators/fc_op.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
>
void
fc_compute_eigen
(
const
T
*
x
,
int
x_h
,
int
x_w
,
//
const
T
*
w
,
int
w_h
,
int
w_w
,
//
const
T
*
b
,
//
T
*
out
)
{
using
matrix_t
=
Eigen
::
Matrix
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
,
Eigen
::
RowMajor
>
;
Eigen
::
Map
<
const
matrix_t
>
X
(
x
,
x_h
,
x_w
);
Eigen
::
Map
<
const
matrix_t
>
W
(
w
,
w_h
,
w_w
);
Eigen
::
Map
<
matrix_t
>
Out
(
out
,
x_h
,
w_w
);
Out
=
X
*
W
;
if
(
b
)
{
Eigen
::
Map
<
const
Eigen
::
Matrix
<
T
,
Eigen
::
Dynamic
,
1
>>
B
(
b
,
w_w
);
Out
=
Out
.
array
().
rowwise
()
+
B
.
transpose
().
array
();
}
}
template
<
typename
T
>
void
fc_compute_naive
(
const
T
*
x
,
int
x_h
,
int
x_w
,
//
const
T
*
w
,
int
w_h
,
int
w_w
,
//
const
T
*
b
,
//
T
*
out
)
{
CHECK_EQ
(
x_w
,
w_h
);
// out shape: (x_h, w_w)
memset
(
out
,
0
,
x_h
*
w_w
*
sizeof
(
T
));
for
(
int
i
=
0
;
i
<
x_h
;
i
++
)
{
for
(
int
j
=
0
;
j
<
w_w
;
j
++
)
{
T
tmp
=
static_cast
<
T
>
(
0
);
for
(
int
k
=
0
;
k
<
x_w
;
k
++
)
{
tmp
+=
x
[
i
*
x_w
+
k
]
*
w
[
k
*
w_w
+
j
];
}
out
[
i
*
w_w
+
j
]
=
tmp
+
b
[
j
];
}
}
}
template
<
typename
T
>
class
FcCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
FcParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
CHECK_GE
(
param
.
input
->
dims
().
size
(),
2UL
);
CHECK_EQ
(
param
.
output
->
dims
().
size
(),
2UL
);
fc_compute_eigen
(
param
.
input
->
data
<
T
>
(),
// x
param
.
input
->
dims
().
Slice
(
0
,
param
.
in_num_col_dims
).
production
(),
param
.
input
->
dims
()
.
Slice
(
param
.
in_num_col_dims
,
param
.
input
->
dims
().
size
())
.
production
(),
param
.
w
->
data
<
T
>
(),
// w
param
.
w
->
dims
()[
0
],
// w_h
param
.
w
->
dims
()[
1
],
// w_w
param
.
bias
->
data
<
T
>
(),
// b
param
.
output
->
mutable_data
<
T
>
());
}
virtual
~
FcCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
fc
,
kX86
,
kFloat
,
kNCHW
,
REGISTER_LITE_KERNEL
(
fc
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
FcCompute
<
float
>
,
def
)
paddle
::
lite
::
kernels
::
x86
::
FcCompute
<
float
>
,
def
)
...
...
paddle/fluid/lite/kernels/x86/fc_compute.h
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <Eigen/Core>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
#include "paddle/fluid/lite/operators/fc_op.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
>
void
fc_compute_eigen
(
const
T
*
x
,
int
x_h
,
int
x_w
,
//
const
T
*
w
,
int
w_h
,
int
w_w
,
//
const
T
*
b
,
//
T
*
out
)
{
using
matrix_t
=
Eigen
::
Matrix
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
,
Eigen
::
RowMajor
>
;
Eigen
::
Map
<
const
matrix_t
>
X
(
x
,
x_h
,
x_w
);
Eigen
::
Map
<
const
matrix_t
>
W
(
w
,
w_h
,
w_w
);
Eigen
::
Map
<
matrix_t
>
Out
(
out
,
x_h
,
w_w
);
Out
=
X
*
W
;
if
(
b
)
{
Eigen
::
Map
<
const
Eigen
::
Matrix
<
T
,
Eigen
::
Dynamic
,
1
>>
B
(
b
,
w_w
);
Out
=
Out
.
array
().
rowwise
()
+
B
.
transpose
().
array
();
}
}
template
<
typename
T
>
void
fc_compute_naive
(
const
T
*
x
,
int
x_h
,
int
x_w
,
//
const
T
*
w
,
int
w_h
,
int
w_w
,
//
const
T
*
b
,
//
T
*
out
)
{
CHECK_EQ
(
x_w
,
w_h
);
// out shape: (x_h, w_w)
memset
(
out
,
0
,
x_h
*
w_w
*
sizeof
(
T
));
for
(
int
i
=
0
;
i
<
x_h
;
i
++
)
{
for
(
int
j
=
0
;
j
<
w_w
;
j
++
)
{
T
tmp
=
static_cast
<
T
>
(
0
);
for
(
int
k
=
0
;
k
<
x_w
;
k
++
)
{
tmp
+=
x
[
i
*
x_w
+
k
]
*
w
[
k
*
w_w
+
j
];
}
out
[
i
*
w_w
+
j
]
=
tmp
+
b
[
j
];
}
}
}
template
<
typename
T
>
class
FcCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
FcParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
CHECK_GE
(
param
.
input
->
dims
().
size
(),
2UL
);
CHECK_EQ
(
param
.
output
->
dims
().
size
(),
2UL
);
fc_compute_eigen
(
param
.
input
->
data
<
T
>
(),
// x
param
.
input
->
dims
().
Slice
(
0
,
param
.
in_num_col_dims
).
production
(),
param
.
input
->
dims
()
.
Slice
(
param
.
in_num_col_dims
,
param
.
input
->
dims
().
size
())
.
production
(),
param
.
w
->
data
<
T
>
(),
// w
param
.
w
->
dims
()[
0
],
// w_h
param
.
w
->
dims
()[
1
],
// w_w
param
.
bias
->
data
<
T
>
(),
// b
param
.
output
->
mutable_data
<
T
>
());
}
virtual
~
FcCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/kernels/x86/fc_compute_test.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/x86/fc_compute.h"
#include <gtest/gtest.h>
#include <vector>
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
TEST
(
fc_x86
,
retrive_op
)
{
auto
fc
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
(
"fc"
);
ASSERT_FALSE
(
fc
.
empty
());
ASSERT_TRUE
(
fc
.
front
());
}
TEST
(
fc_x86
,
init
)
{
FcCompute
<
float
>
fc
;
ASSERT_EQ
(
fc
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
fc
.
target
(),
TARGET
(
kX86
));
}
TEST
(
fc_x86
,
run_test
)
{
lite
::
Tensor
x
,
w
,
b
,
out
;
constexpr
int
batch_size
=
2
;
std
::
vector
<
int64_t
>
x_shape
{
batch_size
,
3
};
x
.
Resize
(
lite
::
DDim
(
x_shape
));
std
::
vector
<
int64_t
>
w_shape
{
3
,
4
};
w
.
Resize
(
lite
::
DDim
(
w_shape
));
std
::
vector
<
int64_t
>
b_shape
{
1
,
4
};
b
.
Resize
(
lite
::
DDim
(
b_shape
));
std
::
vector
<
int64_t
>
out_shape
{
1
,
4
};
out
.
Resize
(
lite
::
DDim
(
out_shape
));
auto
x_data
=
x
.
mutable_data
<
float
>
();
auto
w_data
=
w
.
mutable_data
<
float
>
();
auto
b_data
=
b
.
mutable_data
<
float
>
();
auto
out_data
=
out
.
mutable_data
<
float
>
();
for
(
int64_t
i
=
0
;
i
<
x
.
dims
().
production
();
i
++
)
{
x_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
for
(
int64_t
i
=
0
;
i
<
w
.
dims
().
production
();
i
++
)
{
w_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
for
(
int64_t
i
=
0
;
i
<
b
.
dims
().
production
();
i
++
)
{
b_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
/* lite::x86::math::fc_compute_eigen(x_data, batch_size, 3, //
w_data, 3, 4, //
b_data, ref_data); */
// FcCompute fc;
FcCompute
<
float
>
fc
;
operators
::
FcParam
param
;
param
.
in_num_col_dims
=
1
;
param
.
input
=
&
x
;
param
.
w
=
&
w
;
param
.
bias
=
&
b
;
param
.
output
=
&
out
;
param
.
in_mat_dims
=
x
.
dims
();
// std::unique_ptr<KernelContext> ctx(new KernelContext);
// ctx->As<X86Context>();
fc
.
SetParam
(
param
);
// fc.SetContext(std::move(ctx));
fc
.
Run
();
VLOG
(
3
)
<<
"output vs ref"
;
for
(
int
i
=
0
;
i
<
out
.
dims
().
production
();
i
++
)
{
VLOG
(
3
)
<<
out_data
[
i
];
}
/* for (int i = 0; i < out.dims().product(); ++i) {
EXPECT_NEAR(out_data[i], ref_data[i], 1e-5);
}*/
}
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
fc
,
kX86
,
kFloat
,
kNCHW
,
def
);
paddle/fluid/lite/kernels/x86/mul_compute.cc
浏览文件 @
e8ebb084
...
@@ -12,122 +12,7 @@
...
@@ -12,122 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/kernels/x86/mul_compute.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/types.h"
#include "paddle/fluid/operators/math/blas.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
using
Tensor
=
framework
::
Tensor
;
template
<
typename
T
>
class
MulCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
MulParam
;
void
Run
()
override
{
auto
&
context
=
ctx_
->
As
<
X86Context
>
();
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
MulParam
>
();
CHECK
(
context
.
x86_device_context
());
param
.
output
->
template
mutable_data
<
T
>();
auto
*
x
=
&
param
.
x
->
raw_tensor
();
auto
*
y
=
&
param
.
y
->
raw_tensor
();
const
Tensor
x_matrix
=
x
->
dims
().
size
()
>
2
?
framework
::
ReshapeToMatrix
(
*
x
,
param
.
x_num_col_dims
)
:
*
x
;
const
Tensor
y_matrix
=
y
->
dims
().
size
()
>
2
?
framework
::
ReshapeToMatrix
(
*
y
,
param
.
y_num_col_dims
)
:
*
y
;
auto
*
z
=
&
param
.
output
->
raw_tensor
();
auto
z_dim
=
z
->
dims
();
if
(
z_dim
.
size
()
!=
2
)
{
z
->
Resize
({
x_matrix
.
dims
()[
0
],
y_matrix
.
dims
()[
1
]});
}
auto
blas
=
paddle
::
operators
::
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
*
context
.
x86_device_context
());
blas
.
MatMul
(
x_matrix
,
y_matrix
,
z
);
if
(
z_dim
.
size
()
!=
2
)
{
z
->
Resize
(
z_dim
);
}
}
virtual
~
MulCompute
()
=
default
;
};
template
<
typename
T
>
class
MulGradCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
void
Run
()
override
{
auto
&
context
=
ctx_
->
As
<
X86Context
>
();
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
MulGradParam
>
();
CHECK
(
context
.
x86_device_context
());
auto
*
x
=
&
param
.
x
->
raw_tensor
();
auto
*
y
=
&
param
.
y
->
raw_tensor
();
auto
x_matrix
=
x
->
dims
().
size
()
>
2
?
framework
::
ReshapeToMatrix
(
*
x
,
param
.
x_num_col_dims
)
:
static_cast
<
const
Tensor
&>
(
*
x
);
auto
y_matrix
=
y
->
dims
().
size
()
>
2
?
framework
::
ReshapeToMatrix
(
*
y
,
param
.
y_num_col_dims
)
:
static_cast
<
const
Tensor
&>
(
*
y
);
auto
*
dout
=
&
param
.
output_grad
->
raw_tensor
();
Tensor
dout_mat
;
dout_mat
.
ShareDataWith
(
*
dout
);
dout_mat
.
Resize
(
{
framework
::
flatten_to_2d
(
x
->
dims
(),
param
.
x_num_col_dims
)[
0
],
framework
::
flatten_to_2d
(
y
->
dims
(),
param
.
y_num_col_dims
)[
1
]});
auto
*
dx
=
&
param
.
x_grad
->
raw_tensor
();
auto
*
dy
=
&
param
.
y_grad
->
raw_tensor
();
if
(
dx
!=
nullptr
)
{
dx
->
set_lod
(
x
->
lod
());
}
if
(
dy
!=
nullptr
)
{
dy
->
set_lod
(
y
->
lod
());
}
auto
blas
=
paddle
::
operators
::
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
*
context
.
x86_device_context
());
if
(
dx
)
{
// dx->mutable_data<T>(context.x86_device_context->GetPlace());
param
.
x_grad
->
template
mutable_data
<
T
>();
Tensor
dx_matrix
=
dx
->
dims
().
size
()
>
2
?
framework
::
ReshapeToMatrix
(
*
dx
,
param
.
x_num_col_dims
)
:
*
dx
;
// dx = dout * y'. dx: M x K, dout : M x N, y : K x N
blas
.
MatMul
(
dout_mat
,
false
,
y_matrix
,
true
,
&
dx_matrix
);
}
if
(
dy
)
{
// dy->yutable_data<T>(context.x86_device_context->GetPlace());
param
.
y_grad
->
template
mutable_data
<
T
>();
Tensor
dy_matrix
=
dy
->
dims
().
size
()
>
2
?
framework
::
ReshapeToMatrix
(
*
dy
,
param
.
y_num_col_dims
)
:
*
dy
;
// dy = x' * dout. dy K x N, dout : M x N, x : M x K
blas
.
MatMul
(
x_matrix
,
true
,
dout_mat
,
false
,
&
dy_matrix
);
}
}
virtual
~
MulGradCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
mul
,
kX86
,
kFloat
,
kNCHW
,
REGISTER_LITE_KERNEL
(
mul
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
MulCompute
<
float
>
,
def
)
paddle
::
lite
::
kernels
::
x86
::
MulCompute
<
float
>
,
def
)
...
...
paddle/fluid/lite/kernels/x86/mul_compute.h
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/types.h"
#include "paddle/fluid/operators/math/blas.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
using
Tensor
=
framework
::
Tensor
;
template
<
typename
T
>
class
MulCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
MulParam
;
void
Run
()
override
{
auto
&
context
=
ctx_
->
As
<
X86Context
>
();
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
MulParam
>
();
CHECK
(
context
.
x86_device_context
());
param
.
output
->
template
mutable_data
<
T
>();
auto
*
x
=
&
param
.
x
->
raw_tensor
();
auto
*
y
=
&
param
.
y
->
raw_tensor
();
const
Tensor
x_matrix
=
x
->
dims
().
size
()
>
2
?
framework
::
ReshapeToMatrix
(
*
x
,
param
.
x_num_col_dims
)
:
*
x
;
const
Tensor
y_matrix
=
y
->
dims
().
size
()
>
2
?
framework
::
ReshapeToMatrix
(
*
y
,
param
.
y_num_col_dims
)
:
*
y
;
auto
*
z
=
&
param
.
output
->
raw_tensor
();
auto
z_dim
=
z
->
dims
();
if
(
z_dim
.
size
()
!=
2
)
{
z
->
Resize
({
x_matrix
.
dims
()[
0
],
y_matrix
.
dims
()[
1
]});
}
auto
blas
=
paddle
::
operators
::
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
*
context
.
x86_device_context
());
blas
.
MatMul
(
x_matrix
,
y_matrix
,
z
);
if
(
z_dim
.
size
()
!=
2
)
{
z
->
Resize
(
z_dim
);
}
}
virtual
~
MulCompute
()
=
default
;
};
template
<
typename
T
>
class
MulGradCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
void
Run
()
override
{
auto
&
context
=
ctx_
->
As
<
X86Context
>
();
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
MulGradParam
>
();
CHECK
(
context
.
x86_device_context
());
auto
*
x
=
&
param
.
x
->
raw_tensor
();
auto
*
y
=
&
param
.
y
->
raw_tensor
();
auto
x_matrix
=
x
->
dims
().
size
()
>
2
?
framework
::
ReshapeToMatrix
(
*
x
,
param
.
x_num_col_dims
)
:
static_cast
<
const
Tensor
&>
(
*
x
);
auto
y_matrix
=
y
->
dims
().
size
()
>
2
?
framework
::
ReshapeToMatrix
(
*
y
,
param
.
y_num_col_dims
)
:
static_cast
<
const
Tensor
&>
(
*
y
);
auto
*
dout
=
&
param
.
output_grad
->
raw_tensor
();
Tensor
dout_mat
;
dout_mat
.
ShareDataWith
(
*
dout
);
dout_mat
.
Resize
(
{
framework
::
flatten_to_2d
(
x
->
dims
(),
param
.
x_num_col_dims
)[
0
],
framework
::
flatten_to_2d
(
y
->
dims
(),
param
.
y_num_col_dims
)[
1
]});
auto
*
dx
=
&
param
.
x_grad
->
raw_tensor
();
auto
*
dy
=
&
param
.
y_grad
->
raw_tensor
();
if
(
dx
!=
nullptr
)
{
dx
->
set_lod
(
x
->
lod
());
}
if
(
dy
!=
nullptr
)
{
dy
->
set_lod
(
y
->
lod
());
}
auto
blas
=
paddle
::
operators
::
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
*
context
.
x86_device_context
());
if
(
dx
)
{
// dx->mutable_data<T>(context.x86_device_context->GetPlace());
param
.
x_grad
->
template
mutable_data
<
T
>();
Tensor
dx_matrix
=
dx
->
dims
().
size
()
>
2
?
framework
::
ReshapeToMatrix
(
*
dx
,
param
.
x_num_col_dims
)
:
*
dx
;
// dx = dout * y'. dx: M x K, dout : M x N, y : K x N
blas
.
MatMul
(
dout_mat
,
false
,
y_matrix
,
true
,
&
dx_matrix
);
}
if
(
dy
)
{
// dy->yutable_data<T>(context.x86_device_context->GetPlace());
param
.
y_grad
->
template
mutable_data
<
T
>();
Tensor
dy_matrix
=
dy
->
dims
().
size
()
>
2
?
framework
::
ReshapeToMatrix
(
*
dy
,
param
.
y_num_col_dims
)
:
*
dy
;
// dy = x' * dout. dy K x N, dout : M x N, x : M x K
blas
.
MatMul
(
x_matrix
,
true
,
dout_mat
,
false
,
&
dy_matrix
);
}
}
virtual
~
MulGradCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/kernels/x86/mul_compute_test.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/x86/mul_compute.h"
#include <gtest/gtest.h>
#include <iostream>
#include <vector>
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
TEST
(
mul_x86
,
retrive_op
)
{
auto
mul
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
(
"mul"
);
ASSERT_FALSE
(
mul
.
empty
());
ASSERT_TRUE
(
mul
.
front
());
}
TEST
(
mul_x86
,
init
)
{
MulCompute
<
float
>
mul
;
ASSERT_EQ
(
mul
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
mul
.
target
(),
TARGET
(
kX86
));
}
TEST
(
mul_x86
,
run_test
)
{
lite
::
Tensor
x
,
y
,
out
;
constexpr
int
batch_size
=
1
;
std
::
vector
<
int64_t
>
x_shape
{
batch_size
,
3
};
x
.
Resize
(
lite
::
DDim
(
x_shape
));
std
::
vector
<
int64_t
>
y_shape
{
3
,
4
};
y
.
Resize
(
lite
::
DDim
(
y_shape
));
std
::
vector
<
int64_t
>
out_shape
{
batch_size
,
4
};
out
.
Resize
(
lite
::
DDim
(
out_shape
));
auto
x_data
=
x
.
mutable_data
<
float
>
();
auto
y_data
=
y
.
mutable_data
<
float
>
();
auto
out_data
=
out
.
mutable_data
<
float
>
();
for
(
int64_t
i
=
0
;
i
<
x
.
dims
().
production
();
i
++
)
{
x_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
for
(
int64_t
i
=
0
;
i
<
y
.
dims
().
production
();
i
++
)
{
y_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
// MulCompute mul;
MulCompute
<
float
>
mul
;
operators
::
MulParam
param
;
param
.
x
=
&
x
;
param
.
y
=
&
y
;
param
.
output
=
&
out
;
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
ctx
->
As
<
X86Context
>
();
mul
.
SetContext
(
std
::
move
(
ctx
));
mul
.
SetParam
(
param
);
mul
.
Run
();
LOG
(
INFO
)
<<
"output: "
;
for
(
int
i
=
0
;
i
<
out
.
dims
().
production
();
i
++
)
{
LOG
(
INFO
)
<<
out_data
[
i
];
}
}
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
mul
,
kX86
,
kFloat
,
kNCHW
,
def
);
paddle/fluid/lite/kernels/x86/pool_compute.cc
浏览文件 @
e8ebb084
...
@@ -12,69 +12,10 @@
...
@@ -12,69 +12,10 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include <Eigen/Core>
#include "paddle/fluid/lite/kernels/x86/pool_compute.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/types.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/pooling.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
>
class
PoolCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
PoolParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
if
(
param
.
global_pooling
)
{
for
(
size_t
i
=
0
;
i
<
param
.
ksize
.
size
();
++
i
)
{
param
.
paddings
[
i
]
=
0
;
param
.
ksize
[
i
]
=
static_cast
<
int
>
(
param
.
x
->
dims
()[
i
+
2
]);
}
}
switch
(
param
.
ksize
.
size
())
{
case
2
:
{
if
(
param
.
pooling_type
==
"max"
)
{
paddle
::
operators
::
math
::
Pool2dFunctor
<
platform
::
CPUDeviceContext
,
paddle
::
operators
::
math
::
MaxPool
<
T
>
,
T
>
pool2d_forward
;
paddle
::
operators
::
math
::
MaxPool
<
T
>
pool_process
;
pool2d_forward
(
platform
::
CPUDeviceContext
(),
param
.
x
->
raw_tensor
(),
param
.
ksize
,
param
.
strides
,
param
.
paddings
,
pool_process
,
true
,
false
,
&
(
param
.
output
->
raw_tensor
()));
}
else
if
(
param
.
pooling_type
==
"avg"
)
{
paddle
::
operators
::
math
::
Pool2dFunctor
<
platform
::
CPUDeviceContext
,
paddle
::
operators
::
math
::
AvgPool
<
T
>
,
T
>
pool2d_forward
;
paddle
::
operators
::
math
::
AvgPool
<
T
>
pool_process
;
pool2d_forward
(
platform
::
CPUDeviceContext
(),
param
.
x
->
raw_tensor
(),
param
.
ksize
,
param
.
strides
,
param
.
paddings
,
pool_process
,
param
.
exclusive
,
param
.
adaptive
,
&
(
param
.
output
->
raw_tensor
()));
}
}
break
;
case
3
:
{
}
break
;
}
}
virtual
~
PoolCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
pool2d
,
kX86
,
kFloat
,
kNCHW
,
REGISTER_LITE_KERNEL
(
pool2d
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
PoolCompute
<
float
>
,
def
)
paddle
::
lite
::
kernels
::
x86
::
PoolCompute
<
float
>
,
def
)
.
BindInput
(
"
X
"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindInput
(
"
x
"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
Finalize
();
.
Finalize
();
paddle/fluid/lite/kernels/x86/pool_compute.h
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <Eigen/Core>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/types.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/pooling.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
>
class
PoolCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
PoolParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
if
(
param
.
global_pooling
)
{
for
(
size_t
i
=
0
;
i
<
param
.
ksize
.
size
();
++
i
)
{
param
.
paddings
[
i
]
=
0
;
param
.
ksize
[
i
]
=
static_cast
<
int
>
(
param
.
x
->
dims
()[
i
+
2
]);
}
}
switch
(
param
.
ksize
.
size
())
{
case
2
:
{
if
(
param
.
pooling_type
==
"max"
)
{
paddle
::
operators
::
math
::
Pool2dFunctor
<
platform
::
CPUDeviceContext
,
paddle
::
operators
::
math
::
MaxPool
<
T
>
,
T
>
pool2d_forward
;
paddle
::
operators
::
math
::
MaxPool
<
T
>
pool_process
;
pool2d_forward
(
platform
::
CPUDeviceContext
(),
param
.
x
->
raw_tensor
(),
param
.
ksize
,
param
.
strides
,
param
.
paddings
,
pool_process
,
true
,
false
,
&
(
param
.
output
->
raw_tensor
()));
}
else
if
(
param
.
pooling_type
==
"avg"
)
{
paddle
::
operators
::
math
::
Pool2dFunctor
<
platform
::
CPUDeviceContext
,
paddle
::
operators
::
math
::
AvgPool
<
T
>
,
T
>
pool2d_forward
;
paddle
::
operators
::
math
::
AvgPool
<
T
>
pool_process
;
pool2d_forward
(
platform
::
CPUDeviceContext
(),
param
.
x
->
raw_tensor
(),
param
.
ksize
,
param
.
strides
,
param
.
paddings
,
pool_process
,
param
.
exclusive
,
param
.
adaptive
,
&
(
param
.
output
->
raw_tensor
()));
}
}
break
;
case
3
:
{
}
break
;
}
}
virtual
~
PoolCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/kernels/x86/pool_compute_test.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/x86/pool_compute.h"
#include <gtest/gtest.h>
#include <iostream>
#include <vector>
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
TEST
(
pool_x86
,
retrive_op
)
{
auto
pool2d
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
(
"pool2d"
);
ASSERT_FALSE
(
pool2d
.
empty
());
ASSERT_TRUE
(
pool2d
.
front
());
}
TEST
(
pool2d_x86
,
init
)
{
PoolCompute
<
float
>
pool2d
;
ASSERT_EQ
(
pool2d
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
pool2d
.
target
(),
TARGET
(
kX86
));
}
TEST
(
pool2d_x86
,
run_test
)
{
lite
::
Tensor
x
,
out
;
constexpr
int
batch_size
=
1
;
std
::
vector
<
int64_t
>
x_shape
{
batch_size
,
3
,
4
,
4
};
x
.
Resize
(
lite
::
DDim
(
x_shape
));
std
::
vector
<
int64_t
>
out_shape
{
batch_size
,
3
,
2
,
2
};
out
.
Resize
(
lite
::
DDim
(
out_shape
));
auto
x_data
=
x
.
mutable_data
<
float
>
();
auto
out_data
=
out
.
mutable_data
<
float
>
();
for
(
int64_t
i
=
0
;
i
<
x
.
dims
().
production
();
i
++
)
{
x_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
PoolCompute
<
float
>
pool2d
;
operators
::
PoolParam
param
;
param
.
x
=
&
x
;
param
.
output
=
&
out
;
param
.
strides
=
{
2
,
2
};
param
.
paddings
=
{
0
,
0
};
param
.
ksize
=
{
2
,
2
};
param
.
pooling_type
=
"max"
;
pool2d
.
SetParam
(
param
);
pool2d
.
Run
();
LOG
(
INFO
)
<<
"output: "
;
for
(
int
i
=
0
;
i
<
out
.
dims
().
production
();
i
++
)
{
LOG
(
INFO
)
<<
out_data
[
i
];
}
}
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
pool2d
,
kX86
,
kFloat
,
kNCHW
,
def
);
paddle/fluid/lite/kernels/x86/relu_compute.cc
浏览文件 @
e8ebb084
...
@@ -12,42 +12,7 @@
...
@@ -12,42 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include <Eigen/Core>
#include "paddle/fluid/lite/kernels/x86/relu_compute.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
#include "paddle/fluid/lite/operators/relu_op.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
>
class
ReluCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ReluParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
n
=
param
.
input
->
dims
().
production
();
const
float
*
input
=
param
.
input
->
data
<
float
>
();
float
*
output
=
param
.
output
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
output
[
i
]
=
std
::
max
(
0.
f
,
input
[
i
]);
}
}
virtual
~
ReluCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
relu
,
kX86
,
kFloat
,
kNCHW
,
REGISTER_LITE_KERNEL
(
relu
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
ReluCompute
<
float
>
,
def
)
paddle
::
lite
::
kernels
::
x86
::
ReluCompute
<
float
>
,
def
)
...
...
paddle/fluid/lite/kernels/x86/relu_compute.h
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <Eigen/Core>
#include <algorithm>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
#include "paddle/fluid/lite/operators/relu_op.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
>
class
ReluCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ReluParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
n
=
param
.
input
->
dims
().
production
();
const
float
*
input
=
param
.
input
->
data
<
float
>
();
float
*
output
=
param
.
output
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
output
[
i
]
=
std
::
max
(
0.
f
,
input
[
i
]);
}
}
virtual
~
ReluCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/kernels/x86/relu_compute_test.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/x86/relu_compute.h"
#include <gtest/gtest.h>
#include <iostream>
#include <vector>
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
TEST
(
relu_x86
,
retrive_op
)
{
auto
relu
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
(
"relu"
);
ASSERT_FALSE
(
relu
.
empty
());
ASSERT_TRUE
(
relu
.
front
());
}
TEST
(
relu_x86
,
init
)
{
ReluCompute
<
float
>
relu
;
ASSERT_EQ
(
relu
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
relu
.
target
(),
TARGET
(
kX86
));
}
TEST
(
relu_x86
,
run_test
)
{
lite
::
Tensor
x
,
out
;
constexpr
int
batch_size
=
1
;
std
::
vector
<
int64_t
>
x_shape
{
batch_size
,
3
,
2
,
2
};
x
.
Resize
(
lite
::
DDim
(
x_shape
));
std
::
vector
<
int64_t
>
out_shape
{
batch_size
,
3
,
2
,
2
};
out
.
Resize
(
lite
::
DDim
(
out_shape
));
auto
x_data
=
x
.
mutable_data
<
float
>
();
auto
out_data
=
out
.
mutable_data
<
float
>
();
for
(
int64_t
i
=
0
;
i
<
x
.
dims
().
production
();
i
++
)
{
int
sign
=
i
%
2
==
0
?
1
:
-
1
;
x_data
[
i
]
=
static_cast
<
float
>
(
i
*
sign
);
}
// ReluCompute relu;
ReluCompute
<
float
>
relu
;
operators
::
ReluParam
param
;
param
.
input
=
&
x
;
param
.
output
=
&
out
;
relu
.
SetParam
(
param
);
relu
.
Run
();
LOG
(
INFO
)
<<
"output: "
;
for
(
int
i
=
0
;
i
<
out
.
dims
().
production
();
i
++
)
{
LOG
(
INFO
)
<<
out_data
[
i
];
}
}
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
relu
,
kX86
,
kFloat
,
kNCHW
,
def
);
paddle/fluid/lite/kernels/x86/scale_compute.cc
浏览文件 @
e8ebb084
...
@@ -12,48 +12,7 @@
...
@@ -12,48 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include <Eigen/Core>
#include "paddle/fluid/lite/kernels/x86/scale_compute.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
#include "paddle/fluid/lite/operators/relu_op.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
>
void
scale_compute
(
const
T
*
x
,
T
*
out
,
int
size
,
float
scale
,
float
bias
,
bool
bias_before
)
{
if
(
bias_before
)
bias
*=
scale
;
for
(
int
i
=
0
;
i
<
size
;
i
++
)
{
out
[
i
]
=
x
[
i
]
*
scale
+
bias
;
}
}
template
<
typename
T
>
class
ScaleCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ScaleParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
scale_compute
(
param
.
x
->
data
<
T
>
(),
param
.
output
->
mutable_data
<
T
>
(),
param
.
x
->
dims
().
production
(),
param
.
scale
,
param
.
bias
,
param
.
bias_after_scale
);
}
virtual
~
ScaleCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
scale
,
kX86
,
kFloat
,
kNCHW
,
REGISTER_LITE_KERNEL
(
scale
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
ScaleCompute
<
float
>
,
def
)
paddle
::
lite
::
kernels
::
x86
::
ScaleCompute
<
float
>
,
def
)
...
...
paddle/fluid/lite/kernels/x86/scale_compute.h
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <Eigen/Core>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
#include "paddle/fluid/lite/operators/relu_op.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
>
void
scale_compute
(
const
T
*
x
,
T
*
out
,
int
size
,
float
scale
,
float
bias
,
bool
bias_before
)
{
if
(
bias_before
)
bias
*=
scale
;
for
(
int
i
=
0
;
i
<
size
;
i
++
)
{
out
[
i
]
=
x
[
i
]
*
scale
+
bias
;
}
}
template
<
typename
T
>
class
ScaleCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ScaleParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
scale_compute
(
param
.
x
->
data
<
T
>
(),
param
.
output
->
mutable_data
<
T
>
(),
param
.
x
->
dims
().
production
(),
param
.
scale
,
param
.
bias
,
param
.
bias_after_scale
);
}
virtual
~
ScaleCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/kernels/x86/scale_compute_test.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/x86/scale_compute.h"
#include <gtest/gtest.h>
#include <iostream>
#include <vector>
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
TEST
(
scale_x86
,
retrive_op
)
{
auto
scale
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
(
"scale"
);
ASSERT_FALSE
(
scale
.
empty
());
ASSERT_TRUE
(
scale
.
front
());
}
TEST
(
scale_x86
,
init
)
{
ScaleCompute
<
float
>
scale
;
ASSERT_EQ
(
scale
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
scale
.
target
(),
TARGET
(
kX86
));
}
TEST
(
scale_x86
,
run_test
)
{
lite
::
Tensor
x
,
y
,
out
;
constexpr
int
batch_size
=
1
;
std
::
vector
<
int64_t
>
x_shape
{
batch_size
,
3
,
2
,
2
};
x
.
Resize
(
lite
::
DDim
(
x_shape
));
std
::
vector
<
int64_t
>
out_shape
{
batch_size
,
3
,
2
,
2
};
out
.
Resize
(
lite
::
DDim
(
out_shape
));
auto
x_data
=
x
.
mutable_data
<
float
>
();
auto
out_data
=
out
.
mutable_data
<
float
>
();
for
(
int64_t
i
=
0
;
i
<
x
.
dims
().
production
();
i
++
)
{
x_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
// ScaleCompute scale;
ScaleCompute
<
float
>
scale
;
operators
::
ScaleParam
param
;
param
.
x
=
&
x
;
param
.
scale
=
0.5
;
param
.
bias
=
0
;
param
.
output
=
&
out
;
scale
.
SetParam
(
param
);
scale
.
Run
();
LOG
(
INFO
)
<<
"output: "
;
for
(
int
i
=
0
;
i
<
out
.
dims
().
production
();
i
++
)
{
LOG
(
INFO
)
<<
out_data
[
i
];
}
}
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
scale
,
kX86
,
kFloat
,
kNCHW
,
def
);
paddle/fluid/lite/kernels/x86/softmax_compute.cc
浏览文件 @
e8ebb084
...
@@ -12,76 +12,7 @@
...
@@ -12,76 +12,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/lite/kernels/x86/softmax_compute.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
static
inline
int
CanonicalAxis
(
const
int
axis
,
const
int
rank
)
{
if
(
axis
<
0
)
{
return
axis
+
rank
;
}
return
axis
;
}
static
inline
int
SizeToAxis
(
const
int
axis
,
lite
::
DDim
dims
)
{
int
size
=
1
;
for
(
int
i
=
0
;
i
<
axis
;
i
++
)
{
size
*=
dims
[
i
];
}
return
size
;
}
static
inline
int
SizeFromAxis
(
const
int
axis
,
lite
::
DDim
dims
)
{
int
size
=
1
;
for
(
int
i
=
axis
;
i
<
dims
.
size
();
i
++
)
{
size
*=
dims
[
i
];
}
return
size
;
}
template
<
typename
T
>
class
SoftmaxCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
SoftmaxParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
SoftmaxParam
>
();
// auto& context = context_->As<X86Context>();
CHECK
(
param
.
output
);
CHECK
(
param
.
x
);
const
int
rank
=
param
.
x
->
dims
().
size
();
const
int
axis
=
CanonicalAxis
(
param
.
axis
,
rank
);
int
axis_dim
=
param
.
x
->
dims
()[
axis
];
const
int
n
=
SizeToAxis
(
axis
,
param
.
x
->
dims
());
const
int
d
=
SizeFromAxis
(
axis
,
param
.
x
->
dims
());
std
::
vector
<
int64_t
>
shape
{
n
,
d
};
lite
::
Tensor
input_2d
,
out_2d
;
input_2d
.
ShareDataWith
(
*
param
.
x
);
input_2d
.
Resize
(
lite
::
DDim
(
shape
));
out_2d
.
ShareDataWith
(
*
param
.
output
);
out_2d
.
Resize
(
lite
::
DDim
(
shape
));
paddle
::
operators
::
math
::
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
T
,
true
>
()(
platform
::
CPUDeviceContext
(),
axis_dim
,
&
input_2d
.
raw_tensor
(),
&
out_2d
.
raw_tensor
());
}
virtual
~
SoftmaxCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
softmax
,
kX86
,
kFloat
,
kNCHW
,
REGISTER_LITE_KERNEL
(
softmax
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
SoftmaxCompute
<
float
>
,
def
)
paddle
::
lite
::
kernels
::
x86
::
SoftmaxCompute
<
float
>
,
def
)
...
...
paddle/fluid/lite/kernels/x86/softmax_compute.h
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/operators/math/softmax.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
static
inline
int
CanonicalAxis
(
const
int
axis
,
const
int
rank
)
{
if
(
axis
<
0
)
{
return
axis
+
rank
;
}
return
axis
;
}
static
inline
int
SizeToAxis
(
const
int
axis
,
lite
::
DDim
dims
)
{
int
size
=
1
;
for
(
int
i
=
0
;
i
<
axis
;
i
++
)
{
size
*=
dims
[
i
];
}
return
size
;
}
static
inline
int
SizeFromAxis
(
const
int
axis
,
lite
::
DDim
dims
)
{
int
size
=
1
;
for
(
size_t
i
=
axis
;
i
<
dims
.
size
();
i
++
)
{
size
*=
dims
[
i
];
}
return
size
;
}
template
<
typename
T
>
class
SoftmaxCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
SoftmaxParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
SoftmaxParam
>
();
// auto& context = context_->As<X86Context>();
CHECK
(
param
.
output
);
CHECK
(
param
.
x
);
const
int
rank
=
param
.
x
->
dims
().
size
();
const
int
axis
=
CanonicalAxis
(
param
.
axis
,
rank
);
int
axis_dim
=
param
.
x
->
dims
()[
axis
];
const
int
n
=
SizeToAxis
(
axis
,
param
.
x
->
dims
());
const
int
d
=
SizeFromAxis
(
axis
,
param
.
x
->
dims
());
std
::
vector
<
int64_t
>
shape
{
n
,
d
};
lite
::
Tensor
input_2d
,
out_2d
;
input_2d
.
ShareDataWith
(
*
param
.
x
);
input_2d
.
Resize
(
lite
::
DDim
(
shape
));
out_2d
.
ShareDataWith
(
*
param
.
output
);
out_2d
.
Resize
(
lite
::
DDim
(
shape
));
paddle
::
operators
::
math
::
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
T
,
true
>
()(
platform
::
CPUDeviceContext
(),
axis_dim
,
&
input_2d
.
raw_tensor
(),
&
out_2d
.
raw_tensor
());
}
virtual
~
SoftmaxCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/kernels/x86/softmax_compute_test.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/x86/softmax_compute.h"
#include <gtest/gtest.h>
#include <iostream>
#include <vector>
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
TEST
(
softmax_x86
,
retrive_op
)
{
auto
softmax
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
(
"softmax"
);
ASSERT_FALSE
(
softmax
.
empty
());
ASSERT_TRUE
(
softmax
.
front
());
}
TEST
(
softmax_x86
,
init
)
{
SoftmaxCompute
<
float
>
softmax
;
ASSERT_EQ
(
softmax
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
softmax
.
target
(),
TARGET
(
kX86
));
}
TEST
(
softmax_x86
,
run_test
)
{
lite
::
Tensor
x
,
out
;
constexpr
int
batch_size
=
1
;
std
::
vector
<
int64_t
>
x_shape
{
batch_size
,
3
,
3
,
3
};
x
.
Resize
(
lite
::
DDim
(
x_shape
));
std
::
vector
<
int64_t
>
out_shape
{
batch_size
,
3
,
3
,
3
};
out
.
Resize
(
lite
::
DDim
(
out_shape
));
auto
x_data
=
x
.
mutable_data
<
float
>
();
auto
out_data
=
out
.
mutable_data
<
float
>
();
for
(
int64_t
i
=
0
;
i
<
x
.
dims
().
production
();
i
++
)
{
x_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
SoftmaxCompute
<
float
>
softmax
;
operators
::
SoftmaxParam
param
;
param
.
x
=
&
x
;
param
.
output
=
&
out
;
softmax
.
SetParam
(
param
);
softmax
.
Run
();
LOG
(
INFO
)
<<
"output: "
;
for
(
int
i
=
0
;
i
<
out
.
dims
().
production
();
i
++
)
{
LOG
(
INFO
)
<<
out_data
[
i
];
}
}
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
softmax
,
kX86
,
kFloat
,
kNCHW
,
def
);
paddle/fluid/lite/model_parser/CMakeLists.txt
浏览文件 @
e8ebb084
#cc_library(runtime_lite SRCS runtime.cc)
#cc_library(runtime_lite SRCS runtime.cc)
#TODO(Superjomn) enable it again.
#TODO(Superjomn) enable it again.
if
(
NOT LITE_
WITH_LIGHT_WEIGHT_FRAMEWORK
)
if
(
NOT LITE_
ON_MOBILE
)
lite_cc_test
(
test_model_parser_lite SRCS model_parser_test.cc
lite_cc_test
(
test_model_parser_lite SRCS model_parser_test.cc
DEPS model_parser_lite framework_proto_lite
DEPS model_parser_lite framework_proto_lite
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/lite_naive_model
)
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/lite_naive_model
)
...
@@ -13,18 +13,15 @@ endif()
...
@@ -13,18 +13,15 @@ endif()
cc_library
(
compatible_pb_lite SRCS compatible_pb.cc DEPS op_desc_lite framework_proto_lite var_desc_lite
)
cc_library
(
compatible_pb_lite SRCS compatible_pb.cc DEPS op_desc_lite framework_proto_lite var_desc_lite
)
set
(
model_parser_deps variable_lite scope_lite
${
tensor_lite
}
scope_lite
lite_cc_library
(
model_parser_lite SRCS model_parser.cc DEPS
target_wrapper_host
variable_lite scope_lite
${
tensor_lite
}
scope_lite
compatible_pb_lite
target_wrapper_host
memory_lite
compatible_pb_lite
)
memory_lite
if
(
LITE_WITH_CUDA
)
CUDA_DEPS target_wrapper_cuda
)
set
(
model_parser_deps
${
model_parser_deps
}
target_wrapper_cuda
)
endif
()
cc_library
(
model_parser_lite SRCS model_parser.cc DEPS
${
model_parser_deps
}
)
lite_cc_test
(
test_op_desc_lite SRCS op_desc_test.cc DEPS cpp_op_desc_lite op_desc_lite compatible_pb_lite
)
lite_cc_test
(
test_op_desc_lite SRCS op_desc_test.cc DEPS cpp_op_desc_lite op_desc_lite compatible_pb_lite
)
add_subdirectory
(
pb
)
add_subdirectory
(
pb
)
add_subdirectory
(
cpp
)
add_subdirectory
(
cpp
)
paddle/fluid/lite/model_parser/model_parser.cc
浏览文件 @
e8ebb084
...
@@ -209,7 +209,7 @@ void TensorToStream(std::ostream &os, const lite::Tensor &tensor) {
...
@@ -209,7 +209,7 @@ void TensorToStream(std::ostream &os, const lite::Tensor &tensor) {
os
.
write
(
out
.
data
(),
size
);
os
.
write
(
out
.
data
(),
size
);
}
}
{
// the 3rd field, tensor data
{
// the 3rd field, tensor data
uint64_t
size
=
tensor
.
data
_size
();
uint64_t
size
=
tensor
.
memory
_size
();
CHECK_LT
(
size
,
std
::
numeric_limits
<
std
::
streamsize
>::
max
())
CHECK_LT
(
size
,
std
::
numeric_limits
<
std
::
streamsize
>::
max
())
<<
"Index overflow when writing tensor"
;
<<
"Index overflow when writing tensor"
;
...
...
paddle/fluid/lite/operators/CMakeLists.txt
浏览文件 @
e8ebb084
...
@@ -14,13 +14,15 @@ cc_library(fetch_op_lite SRCS fetch_op.cc DEPS ${op_DEPS})
...
@@ -14,13 +14,15 @@ cc_library(fetch_op_lite SRCS fetch_op.cc DEPS ${op_DEPS})
cc_library
(
io_copy_op_lite SRCS io_copy_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
io_copy_op_lite SRCS io_copy_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
activation_ops_lite SRCS activation_ops.cc DEPS
${
op_DEPS
}
)
cc_library
(
activation_ops_lite SRCS activation_ops.cc DEPS
${
op_DEPS
}
)
cc_library
(
elementwise_ops_lite SRCS elementwise_ops.cc DEPS
${
op_DEPS
}
)
cc_library
(
elementwise_ops_lite SRCS elementwise_ops.cc DEPS
${
op_DEPS
}
)
cc_library
(
fusion_elementwise_activation_ops_lite SRCS fusion_elementwise_activation_ops.cc DEPS elementwise_ops_lite
${
op_DEPS
}
)
cc_library
(
mean_op_lite SRCS mean_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
mean_op_lite SRCS mean_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
fill_constant_op_lite SRCS fill_constant_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
fill_constant_op_lite SRCS fill_constant_op.cc DEPS
${
op_DEPS
}
)
#cc_library(sgd_op_lite SRCS sgd_op.cc DEPS ${op_DEPS})
#cc_library(sgd_op_lite SRCS sgd_op.cc DEPS ${op_DEPS})
cc_library
(
op_params_lite SRCS op_params.cc DEPS
${
tensor_lite
}
any_lite framework_proto_lite
)
cc_library
(
op_params_lite SRCS op_params.cc DEPS
${
tensor_lite
}
any_lite framework_proto_lite
)
cc_library
(
dropout_op_lite SRCS dropout_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
dropout_op_lite SRCS dropout_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
concat_op_lite SRCS concat_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
concat_op_lite SRCS concat_op.cc DEPS
${
op_DEPS
}
)
# cc_library(split_op_lite SRCS split_op.cc DEPS ${op_DEPS})
cc_library
(
split_op_lite SRCS split_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
transpose_op_lite SRCS transpose_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
fake_quant SRCS fake_quantize_moving_avg_max_abs.cc DEPS
${
op_DEPS
}
)
cc_library
(
fake_quant SRCS fake_quantize_moving_avg_max_abs.cc DEPS
${
op_DEPS
}
)
cc_library
(
fake_dequant SRCS fake_dequantize_max_abs.cc DEPS
${
op_DEPS
}
)
cc_library
(
fake_dequant SRCS fake_dequantize_max_abs.cc DEPS
${
op_DEPS
}
)
...
@@ -38,12 +40,14 @@ set(ops_lite
...
@@ -38,12 +40,14 @@ set(ops_lite
fetch_op_lite
fetch_op_lite
io_copy_op_lite
io_copy_op_lite
elementwise_ops_lite
elementwise_ops_lite
fusion_elementwise_activation_ops_lite
mean_op_lite
mean_op_lite
fill_constant_op_lite
fill_constant_op_lite
activation_ops_lite
activation_ops_lite
dropout_op_lite
dropout_op_lite
concat_op_lite
concat_op_lite
#split_op_lite
split_op_lite
transpose_op_lite
fake_quant
fake_quant
fake_dequant
fake_dequant
PARENT_SCOPE
)
PARENT_SCOPE
)
...
@@ -60,3 +64,7 @@ lite_cc_test(test_softmax_op_lite SRCS softmax_op_test.cc DEPS softmax_op_lite m
...
@@ -60,3 +64,7 @@ lite_cc_test(test_softmax_op_lite SRCS softmax_op_test.cc DEPS softmax_op_lite m
lite_cc_test
(
test_reshape_op_lite SRCS reshape_op_test.cc DEPS reshape_op_lite memory_lite
)
lite_cc_test
(
test_reshape_op_lite SRCS reshape_op_test.cc DEPS reshape_op_lite memory_lite
)
lite_cc_test
(
test_batch_norm_op_lite SRCS batch_norm_op_test.cc DEPS batch_norm_op_lite memory_lite
)
lite_cc_test
(
test_batch_norm_op_lite SRCS batch_norm_op_test.cc DEPS batch_norm_op_lite memory_lite
)
lite_cc_test
(
test_concat_op_lite SRCS concat_op_test.cc DEPS concat_op_lite memory_lite
)
lite_cc_test
(
test_concat_op_lite SRCS concat_op_test.cc DEPS concat_op_lite memory_lite
)
lite_cc_test
(
test_fusion_elementwise_activation_ops_lite
SRCS fusion_elementwise_activation_ops_test.cc
DEPS fusion_elementwise_activation_ops_lite memory_lite
)
lite_cc_test
(
test_transpose_op_lite SRCS transpose_op_test.cc DEPS transpose_op_lite memory_lite
)
paddle/fluid/lite/operators/dropout_op.cc
浏览文件 @
e8ebb084
...
@@ -52,7 +52,7 @@ class DropoutOpLite : public OpLite {
...
@@ -52,7 +52,7 @@ class DropoutOpLite : public OpLite {
param_
.
mask
=
GetMutableVar
<
lite
::
Tensor
>
(
scope
,
Mask
);
param_
.
mask
=
GetMutableVar
<
lite
::
Tensor
>
(
scope
,
Mask
);
param_
.
dropout_prob
=
op_desc
.
GetAttr
<
float
>
(
"dropout_prob"
);
param_
.
dropout_prob
=
op_desc
.
GetAttr
<
float
>
(
"dropout_prob"
);
if
(
op_desc
.
HasAttr
(
"
axis
"
))
{
if
(
op_desc
.
HasAttr
(
"
is_test
"
))
{
param_
.
is_test
=
op_desc
.
GetAttr
<
bool
>
(
"is_test"
);
param_
.
is_test
=
op_desc
.
GetAttr
<
bool
>
(
"is_test"
);
}
}
param_
.
fix_seed
=
op_desc
.
GetAttr
<
bool
>
(
"fix_seed"
);
param_
.
fix_seed
=
op_desc
.
GetAttr
<
bool
>
(
"fix_seed"
);
...
...
paddle/fluid/lite/operators/elementwise_ops.cc
浏览文件 @
e8ebb084
...
@@ -12,92 +12,67 @@
...
@@ -12,92 +12,67 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/lite/
core/op_lite
.h"
#include "paddle/fluid/lite/
operators/elementwise_ops
.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
namespace
operators
{
namespace
operators
{
class
ElementwiseOp
:
public
OpLite
{
bool
ElementwiseOp
::
CheckShape
()
const
{
public:
CHECK_OR_FALSE
(
param_
.
X
);
explicit
ElementwiseOp
(
const
std
::
string
&
type
)
:
OpLite
(
type
)
{}
CHECK_OR_FALSE
(
param_
.
Y
);
CHECK_OR_FALSE
(
param_
.
Out
);
bool
CheckShape
()
const
override
{
return
true
;
CHECK_OR_FALSE
(
param_
.
X
);
}
CHECK_OR_FALSE
(
param_
.
Y
);
CHECK_OR_FALSE
(
param_
.
Out
);
bool
ElementwiseOp
::
InferShape
()
const
{
return
true
;
CHECK_OR_FALSE
(
param_
.
X
->
dims
().
size
()
>=
param_
.
Y
->
dims
().
size
());
}
param_
.
Out
->
Resize
(
param_
.
X
->
dims
());
return
true
;
bool
InferShape
()
const
override
{
}
CHECK_OR_FALSE
(
param_
.
X
->
dims
().
size
()
>=
param_
.
Y
->
dims
().
size
());
param_
.
Out
->
Resize
(
param_
.
X
->
dims
());
bool
ElementwiseOp
::
AttachImpl
(
const
cpp
::
OpDesc
&
opdesc
,
lite
::
Scope
*
scope
)
{
return
true
;
auto
X_name
=
opdesc
.
Input
(
"X"
).
front
();
}
auto
Y_name
=
opdesc
.
Input
(
"Y"
).
front
();
auto
Out_name
=
opdesc
.
Output
(
"Out"
).
front
();
bool
AttachImpl
(
const
cpp
::
OpDesc
&
opdesc
,
lite
::
Scope
*
scope
)
override
{
auto
X_name
=
opdesc
.
Input
(
"X"
).
front
();
param_
.
X
=
GetVar
<
lite
::
Tensor
>
(
scope
,
X_name
);
auto
Y_name
=
opdesc
.
Input
(
"Y"
).
front
();
param_
.
Y
=
GetVar
<
lite
::
Tensor
>
(
scope
,
Y_name
);
auto
Out_name
=
opdesc
.
Output
(
"Out"
).
front
();
param_
.
Out
=
GetMutableVar
<
lite
::
Tensor
>
(
scope
,
Out_name
);
param_
.
axis
=
opdesc
.
GetAttr
<
int
>
(
"axis"
);
param_
.
X
=
GetVar
<
lite
::
Tensor
>
(
scope
,
X_name
);
return
true
;
param_
.
Y
=
GetVar
<
lite
::
Tensor
>
(
scope
,
Y_name
);
}
param_
.
Out
=
GetMutableVar
<
lite
::
Tensor
>
(
scope
,
Out_name
);
param_
.
axis
=
opdesc
.
GetAttr
<
int
>
(
"axis"
);
return
true
;
}
void
AttachKernel
(
KernelBase
*
kernel
)
override
{
kernel
->
SetParam
(
param_
);
}
std
::
string
DebugString
()
const
override
{
return
"elementwise_op"
;
}
private:
mutable
operators
::
ElementwiseParam
param_
;
};
#ifdef LITE_WITH_X86
#ifdef LITE_WITH_X86
class
ElementwiseGradExplicitOp
:
public
OpLite
{
bool
ElementwiseGradExplicitOp
::
CheckShape
()
const
{
public:
CHECK_OR_FALSE
(
param_
.
Y
);
explicit
ElementwiseGradExplicitOp
(
const
std
::
string
&
type
)
:
OpLite
(
type
)
{}
CHECK_OR_FALSE
(
param_
.
X_grad
);
CHECK_OR_FALSE
(
param_
.
Y_grad
);
bool
CheckShape
()
const
override
{
CHECK_OR_FALSE
(
param_
.
Out_grad
);
CHECK_OR_FALSE
(
param_
.
Y
);
return
true
;
CHECK_OR_FALSE
(
param_
.
X_grad
);
}
CHECK_OR_FALSE
(
param_
.
Y_grad
);
CHECK_OR_FALSE
(
param_
.
Out_grad
);
bool
ElementwiseGradExplicitOp
::
InferShape
()
const
{
return
true
;
param_
.
X_grad
->
Resize
(
param_
.
Out_grad
->
dims
());
}
param_
.
Y_grad
->
Resize
(
param_
.
Y
->
dims
());
return
true
;
bool
InferShape
()
const
override
{
}
param_
.
X_grad
->
Resize
(
param_
.
Out_grad
->
dims
());
param_
.
Y_grad
->
Resize
(
param_
.
Y
->
dims
());
bool
ElementwiseGradExplicitOp
::
AttachImpl
(
const
cpp
::
OpDesc
&
opdesc
,
return
true
;
lite
::
Scope
*
scope
)
{
}
CHECK_EQ
(
opdesc
.
InputArgumentNames
().
size
(),
1UL
);
auto
Out_name
=
opdesc
.
Input
(
framework
::
GradVarName
(
"Out"
)).
front
();
bool
AttachImpl
(
const
cpp
::
OpDesc
&
opdesc
,
lite
::
Scope
*
scope
)
override
{
auto
X_name
=
opdesc
.
Output
(
framework
::
GradVarName
(
"X"
)).
front
();
CHECK_EQ
(
opdesc
.
InputArgumentNames
().
size
(),
1UL
);
auto
Y_name
=
opdesc
.
Output
(
framework
::
GradVarName
(
"Y"
)).
front
();
auto
Out_name
=
opdesc
.
Input
(
framework
::
GradVarName
(
"Out"
)).
front
();
auto
X_name
=
opdesc
.
Output
(
framework
::
GradVarName
(
"X"
)).
front
();
param_
.
Out_grad
=
GetVar
<
lite
::
Tensor
>
(
scope
,
Out_name
);
auto
Y_name
=
opdesc
.
Output
(
framework
::
GradVarName
(
"Y"
)).
front
();
param_
.
X_grad
=
GetMutableVar
<
lite
::
Tensor
>
(
scope
,
X_name
);
param_
.
Y_grad
=
GetMutableVar
<
Tensor
>
(
scope
,
Y_name
);
param_
.
Out_grad
=
GetVar
<
lite
::
Tensor
>
(
scope
,
Out_name
);
param_
.
axis
=
opdesc
.
GetAttr
<
int
>
(
"axis"
);
param_
.
X_grad
=
GetMutableVar
<
lite
::
Tensor
>
(
scope
,
X_name
);
param_
.
Y_grad
=
GetMutableVar
<
Tensor
>
(
scope
,
Y_name
);
return
true
;
param_
.
axis
=
opdesc
.
GetAttr
<
int
>
(
"axis"
);
}
return
true
;
}
void
AttachKernel
(
KernelBase
*
kernel
)
override
{
kernel
->
SetParam
(
param_
);
}
std
::
string
DebugString
()
const
override
{
return
"elementwise_grad_explicit_op"
;
}
private:
mutable
operators
::
ElementwiseGradParam
param_
;
};
#endif
#endif
}
// namespace operators
}
// namespace operators
...
...
paddle/fluid/lite/operators/elementwise_ops.h
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/lite/core/op_lite.h"
namespace
paddle
{
namespace
lite
{
namespace
operators
{
class
ElementwiseOp
:
public
OpLite
{
public:
explicit
ElementwiseOp
(
const
std
::
string
&
op_type
)
:
OpLite
(
op_type
)
{}
bool
CheckShape
()
const
override
;
bool
InferShape
()
const
override
;
bool
AttachImpl
(
const
cpp
::
OpDesc
&
opdesc
,
lite
::
Scope
*
scope
)
override
;
void
AttachKernel
(
KernelBase
*
kernel
)
override
{
kernel
->
SetParam
(
param_
);
}
std
::
string
DebugString
()
const
override
{
return
"elementwise_op"
;
}
private:
mutable
operators
::
ElementwiseParam
param_
;
};
#ifdef LITE_WITH_X86
class
ElementwiseGradExplicitOp
:
public
OpLite
{
public:
explicit
ElementwiseGradExplicitOp
(
const
std
::
string
&
type
)
:
OpLite
(
type
)
{}
bool
CheckShape
()
const
override
;
bool
InferShape
()
const
override
;
bool
AttachImpl
(
const
cpp
::
OpDesc
&
opdesc
,
lite
::
Scope
*
scope
)
override
;
void
AttachKernel
(
KernelBase
*
kernel
)
override
{
kernel
->
SetParam
(
param_
);
}
std
::
string
DebugString
()
const
override
{
return
"elementwise_grad_explicit_op"
;
}
private:
mutable
operators
::
ElementwiseGradParam
param_
;
};
#endif
}
// namespace operators
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/operators/fusion_elementwise_activation_ops.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/operators/fusion_elementwise_activation_ops.h"
#include <string>
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
operators
{
bool
FusionElementwiseActivationOp
::
AttachImpl
(
const
cpp
::
OpDesc
&
opdesc
,
lite
::
Scope
*
scope
)
{
ElementwiseOp
::
AttachImpl
(
opdesc
,
scope
);
param_
.
act_type
=
opdesc
.
GetAttr
<
std
::
string
>
(
"act_type"
);
// TODO(sangoly): support more activation types.
CHECK
(
param_
.
act_type
==
"relu"
)
<<
"Only relu activation be supported now"
;
return
true
;
}
#ifdef LITE_WITH_X86
bool
FusionElementwiseActivationGradExplicitOp
::
AttachImpl
(
const
cpp
::
OpDesc
&
opdesc
,
lite
::
Scope
*
scope
)
{
ElementwiseGradExplicitOp
::
AttachImpl
(
opdesc
,
scope
);
param_
.
act_type
=
opdesc
.
GetAttr
<
std
::
string
>
(
"act_type"
);
// TODO(sangoly): support more activation types.
CHECK
(
param_
.
act_type
==
"relu"
)
<<
"Only relu activation be supported now"
;
return
true
;
}
#endif
}
// namespace operators
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_OP
(
fusion_elementwise_sub_activation
,
paddle
::
lite
::
operators
::
FusionElementwiseActivationOp
);
#ifdef LITE_WITH_X86
REGISTER_LITE_OP
(
fusion_elementwise_sub_activation_grad
,
paddle
::
lite
::
operators
::
FusionElementwiseActivationGradExplicitOp
);
#endif
REGISTER_LITE_OP
(
fusion_elementwise_add_activation
,
paddle
::
lite
::
operators
::
FusionElementwiseActivationOp
);
paddle/fluid/lite/operators/fusion_elementwise_activation_ops.h
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/operators/elementwise_ops.h"
namespace
paddle
{
namespace
lite
{
namespace
operators
{
class
FusionElementwiseActivationOp
:
public
ElementwiseOp
{
public:
explicit
FusionElementwiseActivationOp
(
const
std
::
string
&
type
)
:
ElementwiseOp
(
type
)
{}
bool
AttachImpl
(
const
cpp
::
OpDesc
&
opdesc
,
lite
::
Scope
*
scope
)
override
;
std
::
string
DebugString
()
const
override
{
return
"fusion_elementwise_activation_op"
;
}
private:
mutable
operators
::
FusionElementwiseActivationParam
param_
;
};
#ifdef LITE_WITH_X86
class
FusionElementwiseActivationGradExplicitOp
:
public
ElementwiseGradExplicitOp
{
public:
explicit
FusionElementwiseActivationGradExplicitOp
(
const
std
::
string
&
type
)
:
ElementwiseGradExplicitOp
(
type
)
{}
bool
AttachImpl
(
const
cpp
::
OpDesc
&
opdesc
,
lite
::
Scope
*
scope
)
override
;
std
::
string
DebugString
()
const
override
{
return
"fusion_elementwise_activation_grad_explicit_op"
;
}
private:
mutable
operators
::
FusionElementwiseActivationGradParam
param_
;
};
#endif
}
// namespace operators
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/operators/fusion_elementwise_activation_ops_test.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/operators/fusion_elementwise_activation_ops.h"
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
operators
{
TEST
(
fusion_elementwise_activation_op_lite
,
test
)
{
// prepare variables
lite
::
Scope
scope
;
auto
*
x
=
scope
.
Var
(
"x"
)
->
GetMutable
<
lite
::
Tensor
>
();
auto
*
y
=
scope
.
Var
(
"y"
)
->
GetMutable
<
lite
::
Tensor
>
();
auto
*
out
=
scope
.
Var
(
"out"
)
->
GetMutable
<
lite
::
Tensor
>
();
x
->
Resize
(
lite
::
DDim
(
std
::
vector
<
int64_t
>
({
10
,
20
})));
y
->
Resize
(
lite
::
DDim
(
std
::
vector
<
int64_t
>
({
10
,
20
})));
out
->
Resize
(
lite
::
DDim
(
std
::
vector
<
int64_t
>
{
10
,
20
}));
// set data
for
(
int
i
=
0
;
i
<
10
*
20
;
i
++
)
{
x
->
mutable_data
<
float
>
()[
i
]
=
i
;
}
for
(
int
i
=
0
;
i
<
10
*
20
;
i
++
)
{
y
->
mutable_data
<
float
>
()[
i
]
=
i
;
}
for
(
int
i
=
0
;
i
<
10
*
20
;
i
++
)
{
out
->
mutable_data
<
float
>
()[
i
]
=
0.
;
}
// prepare op desc
cpp
::
OpDesc
desc
;
desc
.
SetType
(
"fusion_elementwise_add_activation"
);
desc
.
SetInput
(
"X"
,
{
"x"
});
desc
.
SetInput
(
"Y"
,
{
"y"
});
desc
.
SetOutput
(
"Out"
,
{
"out"
});
desc
.
SetAttr
(
"axis"
,
static_cast
<
int
>
(
1
));
desc
.
SetAttr
(
"act_type"
,
std
::
string
(
"relu"
));
FusionElementwiseActivationOp
fuse_op
(
"fusion_elementwise_add_activation"
);
fuse_op
.
SetValidPlaces
({
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)}});
fuse_op
.
Attach
(
desc
,
&
scope
);
}
}
// namespace operators
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/operators/op_params.h
浏览文件 @
e8ebb084
...
@@ -203,6 +203,15 @@ struct SplitParam {
...
@@ -203,6 +203,15 @@ struct SplitParam {
std
::
vector
<
int
>
sections
;
std
::
vector
<
int
>
sections
;
};
};
// For Transpose op
struct
TransposeParam
{
const
lite
::
Tensor
*
x
{};
lite
::
Tensor
*
output
{};
std
::
vector
<
int
>
axis
;
bool
use_mkldnn
{
false
};
std
::
string
data_format
{
"AnyLayout"
};
};
/// ----------------------- element wise operators ----------------------
/// ----------------------- element wise operators ----------------------
struct
ElementwiseParam
{
struct
ElementwiseParam
{
const
lite
::
Tensor
*
X
{};
const
lite
::
Tensor
*
X
{};
...
@@ -219,6 +228,14 @@ struct ElementwiseGradParam {
...
@@ -219,6 +228,14 @@ struct ElementwiseGradParam {
int
axis
{
-
1
};
// for broadcasting.
int
axis
{
-
1
};
// for broadcasting.
};
};
struct
FusionElementwiseActivationParam
:
public
ElementwiseParam
{
std
::
string
act_type
;
};
struct
FusionElementwiseActivationGradParam
:
public
ElementwiseGradParam
{
std
::
string
act_type
;
};
/// ----------------------- activation operators ----------------------
/// ----------------------- activation operators ----------------------
struct
ActivationParam
{
struct
ActivationParam
{
const
lite
::
Tensor
*
X
{};
const
lite
::
Tensor
*
X
{};
...
...
paddle/fluid/lite/operators/split_op.cc
浏览文件 @
e8ebb084
...
@@ -48,7 +48,7 @@ bool SplitOp::InferShape() const {
...
@@ -48,7 +48,7 @@ bool SplitOp::InferShape() const {
outs_dims
.
push_back
(
dim
);
outs_dims
.
push_back
(
dim
);
}
}
}
else
if
(
sections
.
size
()
>
0
)
{
}
else
if
(
sections
.
size
()
>
0
)
{
for
(
size_
t
i
=
0
;
i
<
outs_number
;
++
i
)
{
for
(
in
t
i
=
0
;
i
<
outs_number
;
++
i
)
{
auto
dim
=
in_dims
;
auto
dim
=
in_dims
;
dim
[
axis
]
=
sections
[
i
];
dim
[
axis
]
=
sections
[
i
];
outs_dims
.
push_back
(
dim
);
outs_dims
.
push_back
(
dim
);
...
@@ -66,9 +66,9 @@ bool SplitOp::AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) {
...
@@ -66,9 +66,9 @@ bool SplitOp::AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) {
param_
.
axis
=
opdesc
.
GetAttr
<
int
>
(
"axis"
);
param_
.
axis
=
opdesc
.
GetAttr
<
int
>
(
"axis"
);
param_
.
num
=
opdesc
.
GetAttr
<
int
>
(
"num"
);
param_
.
num
=
opdesc
.
GetAttr
<
int
>
(
"num"
);
param_
.
sections
=
opdesc
.
GetAttr
<
std
::
vector
<
int
>>
(
"sections"
);
param_
.
sections
=
opdesc
.
GetAttr
<
std
::
vector
<
int
>>
(
"sections"
);
param_
.
x
=
const_cast
<
lite
::
Tensor
*>
(
auto
input
=
opdesc
.
Input
(
"Input"
).
front
();
&
scope
->
FindVar
(
opdesc
.
Input
(
"X"
).
front
())
->
Get
<
lite
::
Tensor
>
());
auto
outs
=
opdesc
.
Output
(
"Out"
);
auto
outs
=
opdesc
.
Output
(
"Out"
);
param_
.
x
=
scope
->
FindVar
(
input
)
->
GetMutable
<
lite
::
Tensor
>
();
for
(
auto
var
:
outs
)
{
for
(
auto
var
:
outs
)
{
param_
.
output
.
push_back
(
scope
->
FindVar
(
var
)
->
GetMutable
<
lite
::
Tensor
>
());
param_
.
output
.
push_back
(
scope
->
FindVar
(
var
)
->
GetMutable
<
lite
::
Tensor
>
());
}
}
...
...
paddle/fluid/lite/operators/transpose_op.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/operators/transpose_op.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
operators
{
// Transpose
bool
TransposeOp
::
CheckShape
()
const
{
CHECK_OR_FALSE
(
param_
.
x
);
CHECK_OR_FALSE
(
param_
.
output
);
auto
x_dims
=
param_
.
x
->
dims
();
auto
x_rank
=
x_dims
.
size
();
std
::
vector
<
int
>
axis
=
param_
.
axis
;
size_t
axis_size
=
axis
.
size
();
// "The input tensor's rank(%d) should be equal to the axis's size(%d)",
// x_rank, axis_size
CHECK_OR_FALSE
(
x_rank
==
axis_size
);
std
::
vector
<
int
>
count
(
axis_size
,
0
);
for
(
size_t
i
=
0
;
i
<
axis_size
;
i
++
)
{
// Each element of Attribute axis should be a unique value
// range from 0 to (dims - 1),
// where the dims is the axis's size
CHECK_OR_FALSE
(
axis
[
i
]
<
static_cast
<
int
>
(
axis_size
)
&&
++
count
[
axis
[
i
]]
==
1
);
}
return
true
;
}
bool
TransposeOp
::
InferShape
()
const
{
CHECK_OR_FALSE
(
param_
.
x
);
CHECK_OR_FALSE
(
param_
.
output
);
auto
x_dims
=
param_
.
x
->
dims
();
auto
x_rank
=
x_dims
.
size
();
std
::
vector
<
int
>
axis
=
param_
.
axis
;
size_t
axis_size
=
axis
.
size
();
// "The input tensor's rank(%d) should be equal to the axis's size(%d)",
// x_rank, axis_size
CHECK_OR_FALSE
(
x_rank
==
axis_size
);
std
::
vector
<
int
>
count
(
axis_size
,
0
);
for
(
size_t
i
=
0
;
i
<
axis_size
;
i
++
)
{
// Each element of Attribute axis should be a unique value
// range from 0 to (dims - 1),
// where the dims is the axis's size
CHECK_OR_FALSE
(
axis
[
i
]
<
static_cast
<
int
>
(
axis_size
)
&&
++
count
[
axis
[
i
]]
==
1
);
}
lite
::
DDim
out_dims
(
x_dims
);
for
(
size_t
i
=
0
;
i
<
axis_size
;
i
++
)
{
out_dims
[
i
]
=
x_dims
[
axis
[
i
]];
}
param_
.
output
->
Resize
(
out_dims
);
return
true
;
}
bool
TransposeOp
::
AttachImpl
(
const
cpp
::
OpDesc
&
op_desc
,
lite
::
Scope
*
scope
)
{
auto
x
=
op_desc
.
Input
(
"X"
).
front
();
auto
out
=
op_desc
.
Output
(
"Out"
).
front
();
CHECK
(
scope
->
FindVar
(
x
));
CHECK
(
scope
->
FindVar
(
out
));
param_
.
x
=
GetVar
<
lite
::
Tensor
>
(
scope
,
x
);
param_
.
output
=
GetMutableVar
<
lite
::
Tensor
>
(
scope
,
out
);
param_
.
axis
=
op_desc
.
GetAttr
<
std
::
vector
<
int
>>
(
"axis"
);
if
(
op_desc
.
HasAttr
(
"use_mkldnn"
))
{
param_
.
use_mkldnn
=
op_desc
.
GetAttr
<
bool
>
(
"use_mkldnn"
);
}
if
(
op_desc
.
HasAttr
(
"data_format"
))
{
param_
.
data_format
=
op_desc
.
GetAttr
<
std
::
string
>
(
"data_format"
);
}
return
true
;
}
// Transpose2
bool
Transpose2Op
::
CheckShape
()
const
{
CHECK_OR_FALSE
(
param_
.
x
);
CHECK_OR_FALSE
(
param_
.
output
);
auto
x_dims
=
param_
.
x
->
dims
();
auto
x_rank
=
x_dims
.
size
();
std
::
vector
<
int
>
axis
=
param_
.
axis
;
size_t
axis_size
=
axis
.
size
();
// "The input tensor's rank(%d) should be equal to the axis's size(%d)",
// x_rank, axis_size
CHECK_OR_FALSE
(
x_rank
==
axis_size
);
std
::
vector
<
int
>
count
(
axis_size
,
0
);
for
(
size_t
i
=
0
;
i
<
axis_size
;
i
++
)
{
// Each element of Attribute axis should be a unique value
// range from 0 to (dims - 1),
// where the dims is the axis's size
CHECK_OR_FALSE
(
axis
[
i
]
<
static_cast
<
int
>
(
axis_size
)
&&
++
count
[
axis
[
i
]]
==
1
);
}
return
true
;
}
bool
Transpose2Op
::
InferShape
()
const
{
CHECK_OR_FALSE
(
param_
.
x
);
CHECK_OR_FALSE
(
param_
.
output
);
auto
x_dims
=
param_
.
x
->
dims
();
auto
x_rank
=
x_dims
.
size
();
std
::
vector
<
int
>
axis
=
param_
.
axis
;
size_t
axis_size
=
axis
.
size
();
// "The input tensor's rank(%d) should be equal to the axis's size(%d)",
// x_rank, axis_size
CHECK_OR_FALSE
(
x_rank
==
axis_size
);
std
::
vector
<
int
>
count
(
axis_size
,
0
);
for
(
size_t
i
=
0
;
i
<
axis_size
;
i
++
)
{
// Each element of Attribute axis should be a unique value
// range from 0 to (dims - 1),
// where the dims is the axis's size
CHECK_OR_FALSE
(
axis
[
i
]
<
static_cast
<
int
>
(
axis_size
)
&&
++
count
[
axis
[
i
]]
==
1
);
}
lite
::
DDim
out_dims
(
x_dims
);
for
(
size_t
i
=
0
;
i
<
axis_size
;
i
++
)
{
out_dims
[
i
]
=
x_dims
[
axis
[
i
]];
}
param_
.
output
->
Resize
(
out_dims
);
return
true
;
}
bool
Transpose2Op
::
AttachImpl
(
const
cpp
::
OpDesc
&
op_desc
,
lite
::
Scope
*
scope
)
{
auto
x
=
op_desc
.
Input
(
"X"
).
front
();
auto
out
=
op_desc
.
Output
(
"Out"
).
front
();
CHECK
(
scope
->
FindVar
(
x
));
CHECK
(
scope
->
FindVar
(
out
));
param_
.
x
=
GetVar
<
lite
::
Tensor
>
(
scope
,
x
);
param_
.
output
=
GetMutableVar
<
lite
::
Tensor
>
(
scope
,
out
);
param_
.
axis
=
op_desc
.
GetAttr
<
std
::
vector
<
int
>>
(
"axis"
);
if
(
op_desc
.
HasAttr
(
"use_mkldnn"
))
{
param_
.
use_mkldnn
=
op_desc
.
GetAttr
<
bool
>
(
"use_mkldnn"
);
}
if
(
op_desc
.
HasAttr
(
"data_format"
))
{
param_
.
data_format
=
op_desc
.
GetAttr
<
std
::
string
>
(
"data_format"
);
}
return
true
;
}
}
// namespace operators
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_OP
(
transpose
,
paddle
::
lite
::
operators
::
TransposeOp
);
REGISTER_LITE_OP
(
transpose2
,
paddle
::
lite
::
operators
::
Transpose2Op
);
paddle/fluid/lite/operators/transpose_op.h
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/scope.h"
#include "paddle/fluid/lite/utils/all.h"
namespace
paddle
{
namespace
lite
{
namespace
operators
{
// Transpose
class
TransposeOp
:
public
OpLite
{
public:
TransposeOp
()
{}
explicit
TransposeOp
(
const
std
::
string
&
op_type
)
:
OpLite
(
op_type
)
{}
bool
CheckShape
()
const
override
;
bool
InferShape
()
const
override
;
bool
AttachImpl
(
const
cpp
::
OpDesc
&
opdesc
,
lite
::
Scope
*
scope
)
override
;
void
AttachKernel
(
KernelBase
*
kernel
)
override
{
kernel
->
SetParam
(
param_
);
}
std
::
string
DebugString
()
const
override
{
return
"transpose"
;
}
private:
mutable
TransposeParam
param_
;
};
// Transpose2
class
Transpose2Op
:
public
OpLite
{
public:
Transpose2Op
()
{}
explicit
Transpose2Op
(
const
std
::
string
&
op_type
)
:
OpLite
(
op_type
)
{}
bool
CheckShape
()
const
override
;
bool
InferShape
()
const
override
;
bool
AttachImpl
(
const
cpp
::
OpDesc
&
opdesc
,
lite
::
Scope
*
scope
)
override
;
void
AttachKernel
(
KernelBase
*
kernel
)
override
{
kernel
->
SetParam
(
param_
);
}
std
::
string
DebugString
()
const
override
{
return
"transpose2"
;
}
private:
mutable
TransposeParam
param_
;
};
}
// namespace operators
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/operators/transpose_op_test.cc
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/operators/transpose_op.h"
#include <gtest/gtest.h>
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
operators
{
// Transpose
TEST
(
transpose_op_lite
,
test
)
{
// prepare variables
Scope
scope
;
auto
*
x
=
scope
.
Var
(
"x"
)
->
GetMutable
<
Tensor
>
();
auto
*
output
=
scope
.
Var
(
"output"
)
->
GetMutable
<
Tensor
>
();
const
int
h
=
10
;
const
int
w
=
20
;
x
->
Resize
(
DDim
(
std
::
vector
<
int64_t
>
({
h
,
w
})));
output
->
Resize
(
DDim
(
std
::
vector
<
int64_t
>
{
w
,
h
}));
// set data
for
(
int
i
=
0
;
i
<
h
*
w
;
i
++
)
{
x
->
mutable_data
<
float
>
()[
i
]
=
i
;
}
for
(
int
i
=
0
;
i
<
w
*
h
;
i
++
)
{
output
->
mutable_data
<
float
>
()[
i
]
=
0.
;
}
// prepare op desc
cpp
::
OpDesc
desc
;
desc
.
SetType
(
"transpose"
);
desc
.
SetInput
(
"X"
,
{
"x"
});
desc
.
SetOutput
(
"Out"
,
{
"output"
});
// axis change for shape in mobilenetssd: [1, 24, 2, 2] => [1, 2, 2, 24]
std
::
vector
<
int
>
axis
{
0
,
2
,
3
,
1
};
desc
.
SetAttr
(
"axis"
,
axis
);
TransposeOp
transpose
(
"transpose"
);
transpose
.
SetValidPlaces
({
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)}});
transpose
.
Attach
(
desc
,
&
scope
);
}
// Transpose2
TEST
(
transpose2_op_lite
,
test
)
{
// prepare variables
Scope
scope
;
auto
*
x
=
scope
.
Var
(
"x"
)
->
GetMutable
<
Tensor
>
();
auto
*
output
=
scope
.
Var
(
"output"
)
->
GetMutable
<
Tensor
>
();
const
int
h
=
10
;
const
int
w
=
20
;
x
->
Resize
(
DDim
(
std
::
vector
<
int64_t
>
({
h
,
w
})));
output
->
Resize
(
DDim
(
std
::
vector
<
int64_t
>
{
w
,
h
}));
// set data
for
(
int
i
=
0
;
i
<
h
*
w
;
i
++
)
{
x
->
mutable_data
<
float
>
()[
i
]
=
i
;
}
for
(
int
i
=
0
;
i
<
w
*
h
;
i
++
)
{
output
->
mutable_data
<
float
>
()[
i
]
=
0.
;
}
// prepare op desc
cpp
::
OpDesc
desc
;
desc
.
SetType
(
"transpose2"
);
desc
.
SetInput
(
"X"
,
{
"x"
});
desc
.
SetOutput
(
"Out"
,
{
"output"
});
// axis change for shape in mobilenetssd: [1, 24, 2, 2] => [1, 2, 2, 24]
std
::
vector
<
int
>
axis
{
0
,
2
,
3
,
1
};
desc
.
SetAttr
(
"axis"
,
axis
);
Transpose2Op
transpose2
(
"transpose2"
);
transpose2
.
SetValidPlaces
({
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)}});
transpose2
.
Attach
(
desc
,
&
scope
);
}
}
// namespace operators
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/operators/use_ops.h
0 → 100644
浏览文件 @
e8ebb084
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
/*
* ATTENTION this header file can only include in .cc file.
*/
USE_LITE_OP
(
mul
);
USE_LITE_OP
(
fc
);
USE_LITE_OP
(
relu
);
USE_LITE_OP
(
scale
);
USE_LITE_OP
(
feed
);
USE_LITE_OP
(
fetch
);
USE_LITE_OP
(
io_copy
);
USE_LITE_OP
(
elementwise_add
)
USE_LITE_OP
(
elementwise_sub
)
USE_LITE_OP
(
square
)
USE_LITE_OP
(
softmax
)
USE_LITE_OP
(
dropout
)
USE_LITE_OP
(
concat
)
USE_LITE_OP
(
conv2d
)
USE_LITE_OP
(
depthwise_conv2d
)
USE_LITE_OP
(
pool2d
)
USE_LITE_OP
(
batch_norm
)
paddle/fluid/lite/tools/build.sh
浏览文件 @
e8ebb084
...
@@ -56,7 +56,8 @@ function check_style {
...
@@ -56,7 +56,8 @@ function check_style {
function
cmake_arm
{
function
cmake_arm
{
# $1: ARM_TARGET_OS in "android" , "armlinux"
# $1: ARM_TARGET_OS in "android" , "armlinux"
# $2: ARM_TARGET_ARCH_ABI in "arm64-v8a", "armeabi-v7a" ,"armeabi-v7a-hf"
# $2: ARM_TARGET_ARCH_ABI in "armv8", "armv7" ,"armv7hf"
# $3: ARM_TARGET_LANG in "gcc" "clang"
cmake ..
\
cmake ..
\
-DWITH_GPU
=
OFF
\
-DWITH_GPU
=
OFF
\
-DWITH_MKL
=
OFF
\
-DWITH_MKL
=
OFF
\
...
@@ -66,7 +67,7 @@ function cmake_arm {
...
@@ -66,7 +67,7 @@ function cmake_arm {
-DLITE_WITH_ARM
=
ON
\
-DLITE_WITH_ARM
=
ON
\
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK
=
ON
\
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK
=
ON
\
-DWITH_TESTING
=
ON
\
-DWITH_TESTING
=
ON
\
-DARM_TARGET_OS
=
$1
-DARM_TARGET_ARCH_ABI
=
$2
-DARM_TARGET_OS
=
$1
-DARM_TARGET_ARCH_ABI
=
$2
-DARM_TARGET_LANG
=
$3
}
}
function
build_single
{
function
build_single
{
...
@@ -75,7 +76,7 @@ function build_single {
...
@@ -75,7 +76,7 @@ function build_single {
}
}
function
build
{
function
build
{
make lite_compile_deps
-j
$NUM_CORES_FOR_COMPILE
make lite_compile_deps
-j
$NUM_CORES_FOR_COMPILE
}
}
# It will eagerly test all lite related unittests.
# It will eagerly test all lite related unittests.
...
@@ -113,22 +114,91 @@ function test_arm_android {
...
@@ -113,22 +114,91 @@ function test_arm_android {
echo
"test name:
${
test_name
}
"
echo
"test name:
${
test_name
}
"
adb_work_dir
=
"/data/local/tmp"
adb_work_dir
=
"/data/local/tmp"
skip_list
=
"test_model_parser_lite"
# add more with space
[[
$skip_list
=
~
(
^|[[:space:]]
)
$test_name
(
$|
[[
:space:]]
)
]]
&&
continue
||
echo
'skip $test_name'
skip_list
=(
"test_model_parser_lite"
"test_cxx_api_lite"
)
for
skip_name
in
${
skip_list
[@]
}
;
do
[[
$skip_name
=
~
(
^|[[:space:]]
)
$test_name
(
$|
[[
:space:]]
)
]]
&&
echo
"skip
$test_name
"
&&
return
done
testpath
=
$(
find ./paddle/fluid
-name
${
test_name
}
)
testpath
=
$(
find ./paddle/fluid
-name
${
test_name
}
)
adb
-s
emulator-
${
port
}
push
${
testpath
}
${
adb_work_dir
}
adb
-s
emulator-
${
port
}
push
${
testpath
}
${
adb_work_dir
}
adb
-s
emulator-
${
port
}
shell
chmod
+x
"
${
adb_work_dir
}
/
${
test_name
}
"
adb
-s
emulator-
${
port
}
shell
chmod
+x
"
${
adb_work_dir
}
/
${
test_name
}
"
adb
-s
emulator-
${
port
}
shell
"./
${
adb_work_dir
}
/
${
test_name
}
"
adb
-s
emulator-
${
port
}
shell
"./
${
adb_work_dir
}
/
${
test_name
}
"
}
}
function
test_arm_model
{
local
test_name
=
$1
local
port
=
$2
local
model_dir
=
$3
if
[[
"
${
test_name
}
x"
==
"x"
]]
;
then
echo
"test_name can not be empty"
exit
1
fi
if
[[
"
${
port
}
x"
==
"x"
]]
;
then
echo
"Port can not be empty"
exit
1
fi
if
[[
"
${
model_dir
}
x"
==
"x"
]]
;
then
echo
"Model dir can not be empty"
exit
1
fi
echo
"test name:
${
test_name
}
"
adb_work_dir
=
"/data/local/tmp"
testpath
=
$(
find ./paddle/fluid
-name
${
test_name
}
)
adb
-s
emulator-
${
port
}
push
${
model_dir
}
${
adb_work_dir
}
adb
-s
emulator-
${
port
}
push
${
testpath
}
${
adb_work_dir
}
adb
-s
emulator-
${
port
}
shell
chmod
+x
"
${
adb_work_dir
}
/
${
test_name
}
"
local
adb_model_path
=
"./
${
adb_work_dir
}
/
`
basename
${
model_dir
}
`
"
adb
-s
emulator-
${
port
}
shell
"./
${
adb_work_dir
}
/
${
test_name
}
--eval_model_dir=
$adb_model_path
"
}
# Build the code and run lite arm tests. This is executed in the CI system.
# Build the code and run lite arm tests. This is executed in the CI system.
function
build_test_arm
{
function
build_test_arm
{
# 1. Build goes first
cur_dir
=
$(
pwd
)
for
lang
in
"gcc"
"clang"
;
do
for
os
in
"android"
"armlinux"
;
do
if
[[
${
os
}
==
"armlinux"
&&
${
lang
}
==
"clang"
]]
;
then
continue
fi
for
abi
in
"armv8"
"armv7"
"armv7hf"
;
do
# TODO(hongming): enable compile armv7 and armv7hf on armlinux
if
[[
${
abi
}
==
"armv7hf"
]]
;
then
echo
"armv7hf is not supported on both android and armlinux yet"
continue
fi
# TODO(hongming): enable armv7 on armlinux
if
[[
${
os
}
==
"armlinux"
&&
${
abi
}
==
"armv7"
]]
;
then
echo
"armv7 is not supported on armlinux yet"
continue
fi
if
[[
${
os
}
==
"android"
&&
${
abi
}
==
"armv7hf"
]]
;
then
echo
"android do not need armv7hf"
continue
fi
build_dir
=
$cur_dir
/build.lite.
${
os
}
.
${
abi
}
.
${
lang
}
mkdir
-p
$build_dir
cd
$build_dir
cmake_arm
${
os
}
${
abi
}
${
lang
}
build
$TESTS_FILE
done
done
done
# 2. Then test
port_armv8
=
5554
port_armv8
=
5554
port_armv7
=
5556
port_armv7
=
5556
adb kill-server
adb kill-server
adb devices |
grep
emulator |
cut
-f1
|
while
read
line
;
do
adb
-s
$line
emu
kill
;
done
adb devices |
grep
emulator |
cut
-f1
|
while
read
line
;
do
adb
-s
$line
emu
kill
;
done
# start android arm
64-v8a armeabi-v7a
emulators first
# start android arm
v8 and armv7
emulators first
echo
n | avdmanager create avd
-f
-n
paddle-armv8
-k
"system-images;android-24;google_apis;arm64-v8a"
echo
n | avdmanager create avd
-f
-n
paddle-armv8
-k
"system-images;android-24;google_apis;arm64-v8a"
echo
-ne
'\n'
|
${
ANDROID_HOME
}
/emulator/emulator
-avd
paddle-armv8
-noaudio
-no-window
-gpu
off
-verbose
-port
${
port_armv8
}
&
echo
-ne
'\n'
|
${
ANDROID_HOME
}
/emulator/emulator
-avd
paddle-armv8
-noaudio
-no-window
-gpu
off
-verbose
-port
${
port_armv8
}
&
sleep
1m
sleep
1m
...
@@ -136,55 +206,37 @@ function build_test_arm {
...
@@ -136,55 +206,37 @@ function build_test_arm {
echo
-ne
'\n'
|
${
ANDROID_HOME
}
/emulator/emulator
-avd
paddle-armv7
-noaudio
-no-window
-gpu
off
-verbose
-port
${
port_armv7
}
&
echo
-ne
'\n'
|
${
ANDROID_HOME
}
/emulator/emulator
-avd
paddle-armv7
-noaudio
-no-window
-gpu
off
-verbose
-port
${
port_armv7
}
&
sleep
1m
sleep
1m
cur_dir
=
$(
pwd
)
# now can only test android.
for
lang
in
"gcc"
"clang"
;
do
for
os
in
"android"
"armlinux"
;
do
for
abi
in
"armv8"
"armv7"
;
do
for
abi
in
"arm64-v8a"
"armeabi-v7a"
"armeabi-v7a-hf"
;
do
# TODO(yuanshuai): enable armv7 on android
# TODO(TJ): enable compile on v7-hf on andorid and all v7 on armlinux
if
[[
${
abi
}
==
"armv7"
]]
;
then
if
[[
${
abi
}
==
"armeabi-v7a-hf"
]]
;
then
echo
"armeabi-v7a-hf is not supported on both android and armlinux"
continue
fi
if
[[
${
os
}
==
"armlinux"
&&
${
abi
}
==
"armeabi-v7a"
]]
;
then
echo
"armeabi-v7a is not supported on armlinux yet"
continue
continue
fi
fi
build_dir
=
$cur_dir
/build.lite.
${
os
}
.
${
abi
}
build_dir
=
$cur_dir
/build.lite.android.
${
abi
}
.
${
lang
}
mkdir
-p
$build_dir
cd
$build_dir
cd
$build_dir
cmake_arm
${
os
}
${
abi
}
local
port
=
build
$TESTS_FILE
if
[[
${
abi
}
==
"armv7"
]]
;
then
port
=
${
port_armv7
}
# armlinux need in another docker
fi
# TODO(TJ): enable test with armlinux
if
[[
${
os
}
==
"android"
]]
;
then
adb_abi
=
${
abi
}
if
[[
${
adb_abi
}
==
"armeabi-v7a-hf"
]]
;
then
adb_abi
=
"armeabi-v7a"
fi
if
[[
${
adb_abi
}
==
"armeabi-v7a"
]]
;
then
# skip all armv7 tests
# TODO(TJ): enable test with armv7
continue
fi
local
port
=
if
[[
${
adb_abi
}
==
"armeabi-v7a"
]]
;
then
port
=
${
port_armv7
}
fi
if
[[
${
adb_abi
}
==
"arm64-v8a"
]]
;
then
if
[[
${
abi
}
==
"armv8"
]]
;
then
port
=
${
port_armv8
}
port
=
${
port_armv8
}
fi
echo
"test file:
${
TESTS_FILE
}
"
for
_test
in
$(
cat
$TESTS_FILE
)
;
do
test_arm_android
$_test
$port
done
fi
fi
echo
"test file:
${
TESTS_FILE
}
"
for
_test
in
$(
cat
$TESTS_FILE
)
;
do
test_arm_android
$_test
$port
done
# TODO(sangoly): refine this
test_arm_model
"test_cxx_api_lite"
$port
"./third_party/install/mobilenet_v2_relu"
done
done
done
done
# armlinux need in another docker
# TODO(hongming): enable test armlinux on armv8, armv7 and armv7hf
adb devices |
grep
emulator |
cut
-f1
|
while
read
line
;
do
adb
-s
$line
emu
kill
;
done
adb devices |
grep
emulator |
cut
-f1
|
while
read
line
;
do
adb
-s
$line
emu
kill
;
done
echo
"Done"
echo
"Done"
}
}
...
...
paddle/fluid/lite/tools/mobile_readme.md
浏览文件 @
e8ebb084
...
@@ -17,8 +17,16 @@ $ git checkout incubate/lite
...
@@ -17,8 +17,16 @@ $ git checkout incubate/lite
### 主要cmake选项
### 主要cmake选项
-
`ARM_TARGET_OS`
代表目标操作系统, 目前支持 "android" "armlinux", 模型是Android
-
`ARM_TARGET_OS`
代表目标操作系统, 目前支持 "android" "armlinux", 默认是Android
-
`ARM_TARGET_ARCH_ABI`
代表ARCH, 目前支持 "arm64-v8a" "armeabi-v7a"。 模型是arm64-v8a
-
`ARM_TARGET_ARCH_ABI`
代表ARCH,支持输入"armv8"和"armv7",针对OS不一样选择不一样。
-
`-DARM_TARGET_OS="android"`
时
-
"armv8", 等效于 "arm64-v8a"。 default值为这个。
-
"armv7", 等效于 "armeabi-v7a"。
-
`-DARM_TARGET_OS="armlinux"`
时
-
"armv8", 等效于 "arm64"。 default值为这个。
-
"armv7hf", 等效于使用
`eabihf`
且
`-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 `
。
-
"armv7", 等效于使用
`eabi`
且
`-march=armv7-a -mfloat-abi=softfp -mfpu=neon-vfpv4`
。
-
`ARM_TARGET_LANG`
代表目标编译的语言, 默认为gcc,支持 gcc和clang两种。
### 编译
### 编译
...
...
paddle/fluid/lite/x86/CMakeLists.txt
浏览文件 @
e8ebb084
...
@@ -3,5 +3,3 @@ if (NOT LITE_WITH_X86)
...
@@ -3,5 +3,3 @@ if (NOT LITE_WITH_X86)
endif
()
endif
()
cc_library
(
target_wrapper_x86 SRCS target_wrapper.cc
)
cc_library
(
target_wrapper_x86 SRCS target_wrapper.cc
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录