Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
a29db744
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a29db744
编写于
3月 14, 2019
作者:
xiebaiyuan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove mali
上级
ad3844d6
变更
66
隐藏空白更改
内联
并排
Showing
66 changed file
with
38 addition
and
3569 deletion
+38
-3569
CMakeLists.txt
CMakeLists.txt
+0
-26
doc/build.md
doc/build.md
+0
-1
doc/design_doc.md
doc/design_doc.md
+0
-5
src/framework/load_ops.h
src/framework/load_ops.h
+21
-31
src/framework/op_registry.h
src/framework/op_registry.h
+0
-3
src/framework/operator.h
src/framework/operator.h
+0
-10
src/operators/activation_op.cpp
src/operators/activation_op.cpp
+0
-3
src/operators/batchnorm_op.cpp
src/operators/batchnorm_op.cpp
+0
-3
src/operators/bilinear_interp_op.cpp
src/operators/bilinear_interp_op.cpp
+1
-2
src/operators/box_coder_op.cpp
src/operators/box_coder_op.cpp
+0
-2
src/operators/concat_op.cpp
src/operators/concat_op.cpp
+1
-3
src/operators/conv_op.cpp
src/operators/conv_op.cpp
+1
-3
src/operators/conv_transpose_op.cpp
src/operators/conv_transpose_op.cpp
+1
-2
src/operators/crf_op.cpp
src/operators/crf_op.cpp
+1
-2
src/operators/elementwise_add_op.cpp
src/operators/elementwise_add_op.cpp
+0
-3
src/operators/elementwise_mul_op.cpp
src/operators/elementwise_mul_op.cpp
+0
-3
src/operators/elementwise_sub_op.cpp
src/operators/elementwise_sub_op.cpp
+0
-3
src/operators/feed_op.cpp
src/operators/feed_op.cpp
+0
-3
src/operators/fetch_op.cpp
src/operators/fetch_op.cpp
+1
-3
src/operators/fusion_conv_add_op.cpp
src/operators/fusion_conv_add_op.cpp
+0
-3
src/operators/fusion_deconv_add_bn_op.cpp
src/operators/fusion_deconv_add_bn_op.cpp
+1
-2
src/operators/fusion_deconv_add_bn_relu_op.cpp
src/operators/fusion_deconv_add_bn_relu_op.cpp
+1
-2
src/operators/fusion_deconv_add_op.cpp
src/operators/fusion_deconv_add_op.cpp
+1
-2
src/operators/fusion_deconv_add_relu_op.cpp
src/operators/fusion_deconv_add_relu_op.cpp
+1
-2
src/operators/fusion_deconv_bn_relu_op.cpp
src/operators/fusion_deconv_bn_relu_op.cpp
+1
-2
src/operators/fusion_deconv_relu_op.cpp
src/operators/fusion_deconv_relu_op.cpp
+1
-2
src/operators/fusion_elementwise_add_relu_op.cpp
src/operators/fusion_elementwise_add_relu_op.cpp
+0
-4
src/operators/fusion_fc_op.cpp
src/operators/fusion_fc_op.cpp
+0
-3
src/operators/fusion_fc_relu_op.cpp
src/operators/fusion_fc_relu_op.cpp
+0
-3
src/operators/gru_unit_op.cpp
src/operators/gru_unit_op.cpp
+1
-2
src/operators/increment_op.cpp
src/operators/increment_op.cpp
+1
-2
src/operators/is_empty_op.cpp
src/operators/is_empty_op.cpp
+1
-2
src/operators/kernel/mali/ACL_Android
src/operators/kernel/mali/ACL_Android
+0
-1
src/operators/kernel/mali/acl_operator.cc
src/operators/kernel/mali/acl_operator.cc
+0
-220
src/operators/kernel/mali/acl_operator.h
src/operators/kernel/mali/acl_operator.h
+0
-1145
src/operators/kernel/mali/acl_tensor.cc
src/operators/kernel/mali/acl_tensor.cc
+0
-160
src/operators/kernel/mali/acl_tensor.h
src/operators/kernel/mali/acl_tensor.h
+0
-128
src/operators/kernel/mali/batchnorm_kernel.cpp
src/operators/kernel/mali/batchnorm_kernel.cpp
+0
-164
src/operators/kernel/mali/concat_kernel.cpp
src/operators/kernel/mali/concat_kernel.cpp
+0
-137
src/operators/kernel/mali/conv_add_kernel.cpp
src/operators/kernel/mali/conv_add_kernel.cpp
+0
-232
src/operators/kernel/mali/conv_kernel.cpp
src/operators/kernel/mali/conv_kernel.cpp
+0
-230
src/operators/kernel/mali/elementwise_add_kernel.cpp
src/operators/kernel/mali/elementwise_add_kernel.cpp
+0
-52
src/operators/kernel/mali/feed_kernel.cpp
src/operators/kernel/mali/feed_kernel.cpp
+0
-36
src/operators/kernel/mali/fetch_kernel.cpp
src/operators/kernel/mali/fetch_kernel.cpp
+0
-36
src/operators/kernel/mali/fushion_fc_kernel.cpp
src/operators/kernel/mali/fushion_fc_kernel.cpp
+0
-75
src/operators/kernel/mali/lrn_kernel.cpp
src/operators/kernel/mali/lrn_kernel.cpp
+0
-157
src/operators/kernel/mali/mul_kernel.cpp
src/operators/kernel/mali/mul_kernel.cpp
+0
-59
src/operators/kernel/mali/pool_kernel.cpp
src/operators/kernel/mali/pool_kernel.cpp
+0
-220
src/operators/kernel/mali/relu_kernel.cpp
src/operators/kernel/mali/relu_kernel.cpp
+0
-134
src/operators/kernel/mali/reshape_kernel.cpp
src/operators/kernel/mali/reshape_kernel.cpp
+0
-61
src/operators/kernel/mali/softmax_kernel.cpp
src/operators/kernel/mali/softmax_kernel.cpp
+0
-139
src/operators/lookup_op.cpp
src/operators/lookup_op.cpp
+1
-2
src/operators/lrn_op.cpp
src/operators/lrn_op.cpp
+0
-3
src/operators/mul_op.cpp
src/operators/mul_op.cpp
+0
-3
src/operators/norm_op.cpp
src/operators/norm_op.cpp
+1
-2
src/operators/pool_op.cpp
src/operators/pool_op.cpp
+0
-3
src/operators/prelu_op.cpp
src/operators/prelu_op.cpp
+0
-3
src/operators/prior_box_op.cpp
src/operators/prior_box_op.cpp
+0
-2
src/operators/reshape2_op.cpp
src/operators/reshape2_op.cpp
+0
-3
src/operators/reshape_op.cpp
src/operators/reshape_op.cpp
+0
-3
src/operators/resize_op.cpp
src/operators/resize_op.cpp
+0
-3
src/operators/scale_op.cpp
src/operators/scale_op.cpp
+0
-3
src/operators/shape_op.cpp
src/operators/shape_op.cpp
+0
-2
src/operators/slice_op.cpp
src/operators/slice_op.cpp
+0
-3
src/operators/softmax_op.cpp
src/operators/softmax_op.cpp
+0
-3
src/operators/sum_op.cpp
src/operators/sum_op.cpp
+0
-3
未找到文件。
CMakeLists.txt
浏览文件 @
a29db744
...
@@ -9,7 +9,6 @@ option(WITH_TEST "build with unit tests" ON)
...
@@ -9,7 +9,6 @@ option(WITH_TEST "build with unit tests" ON)
# select the platform to build
# select the platform to build
option
(
CPU
"build with arm CPU support"
ON
)
option
(
CPU
"build with arm CPU support"
ON
)
option
(
GPU_MALI
"build with arm mali GPU support"
OFF
)
option
(
GPU_CL
"build with OpenCL support"
OFF
)
option
(
GPU_CL
"build with OpenCL support"
OFF
)
option
(
FPGA
"build with FPGA support"
OFF
)
option
(
FPGA
"build with FPGA support"
OFF
)
if
(
FPGA
)
if
(
FPGA
)
...
@@ -97,31 +96,6 @@ else()
...
@@ -97,31 +96,6 @@ else()
endforeach
()
endforeach
()
endif
()
endif
()
if
(
GPU_MALI
)
add_definitions
(
-DPADDLE_MOBILE_MALI_GPU
)
add_definitions
(
-DUSE_ACL=1
)
add_definitions
(
-DUSE_OPENCL
)
set
(
ACL_ROOT
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/operators/kernel/mali/ACL_Android
)
include_directories
(
${
ACL_ROOT
}
${
ACL_ROOT
}
/include
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-L
${
ACL_ROOT
}
/build"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-larm_compute"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-larm_compute_core"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-larm_compute_graph"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-L
${
ACL_ROOT
}
/build/opencl-1.2-stubs"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-lOpenCL"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-DUSE_ACL=1"
)
else
()
file
(
GLOB_RECURSE _tmp_list src/operators/kernel/mali/*.cpp src/operators/kernel/mali/*.cc
)
foreach
(
f
${
_tmp_list
}
)
list
(
REMOVE_ITEM PADDLE_MOBILE_CC
${
f
}
)
endforeach
()
file
(
GLOB_RECURSE _tmp_list_h src/operators/kernel/mali/*.h
)
foreach
(
f
${
_tmp_list_h
}
)
list
(
REMOVE_ITEM PADDLE_MOBILE_H
${
f
}
)
endforeach
()
endif
()
if
(
FPGA
)
if
(
FPGA
)
add_definitions
(
-DPADDLE_MOBILE_FPGA
)
add_definitions
(
-DPADDLE_MOBILE_FPGA
)
file
(
GLOB_RECURSE _tmp_list src/operators/math/*.cpp src/operators/kernel/fpga/*.cc
)
file
(
GLOB_RECURSE _tmp_list src/operators/math/*.cpp src/operators/kernel/fpga/*.cc
)
...
...
doc/build.md
浏览文件 @
a29db744
...
@@ -46,7 +46,6 @@ root@5affd29d4fc5:/ # ccmake .
...
@@ -46,7 +46,6 @@ root@5affd29d4fc5:/ # ccmake .
DEBUGING ON
DEBUGING ON
FPGA OFF
FPGA OFF
LOG_PROFILE ON
LOG_PROFILE ON
MALI_GPU OFF
NET googlenet
NET googlenet
USE_EXCEPTION ON
USE_EXCEPTION ON
USE_OPENMP OFF
USE_OPENMP OFF
...
...
doc/design_doc.md
浏览文件 @
a29db744
...
@@ -109,11 +109,6 @@ USE_OP_CPU(conv2d);
...
@@ -109,11 +109,6 @@ USE_OP_CPU(conv2d);
REGISTER_OPERATOR_CPU
(
conv2d
,
ops
::
ConvOp
);
REGISTER_OPERATOR_CPU
(
conv2d
,
ops
::
ConvOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
USE_OP_MALI_GPU
(
conv2d
);
REGISTER_OPERATOR_MALI_GPU
(
conv2d
,
ops
::
ConvOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA
(
conv2d
);
USE_OP_FPGA
(
conv2d
);
REGISTER_OPERATOR_FPGA
(
conv2d
,
ops
::
ConvOp
);
REGISTER_OPERATOR_FPGA
(
conv2d
,
ops
::
ConvOp
);
...
...
src/framework/load_ops.h
浏览文件 @
a29db744
...
@@ -23,15 +23,6 @@ limitations under the License. */
...
@@ -23,15 +23,6 @@ limitations under the License. */
#define LOAD_CPU_OP(op_type)
#define LOAD_CPU_OP(op_type)
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#define LOAD_MALI_GPU_OP(op_type) \
extern int TouchOpRegistrar_##op_type##_##mali_gpu(); \
static int use_op_itself_##op_type##_##mali_gpu __attribute__((unused)) = \
TouchOpRegistrar_##op_type##_##mali_gpu()
#else
#define LOAD_MALI_GPU_OP(op_type)
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
#define LOAD_FPGA_OP(op_type) \
#define LOAD_FPGA_OP(op_type) \
extern int TouchOpRegistrar_##op_type##_##fpga(); \
extern int TouchOpRegistrar_##op_type##_##fpga(); \
...
@@ -46,9 +37,8 @@ limitations under the License. */
...
@@ -46,9 +37,8 @@ limitations under the License. */
static int use_fusion_matcher_itself_##op_type __attribute__((unused)) = \
static int use_fusion_matcher_itself_##op_type __attribute__((unused)) = \
TouchFusionMatcherRegistrar_##op_type();
TouchFusionMatcherRegistrar_##op_type();
#define LOAD_OP(op_type) \
#define LOAD_OP(op_type) \
LOAD_CPU_OP(op_type); \
LOAD_CPU_OP(op_type); \
LOAD_MALI_GPU_OP(op_type); \
LOAD_FPGA_OP(op_type);
LOAD_FPGA_OP(op_type);
#define LOAD_OP1(op_type, device_type) LOAD_##device_type##_OP(op_type);
#define LOAD_OP1(op_type, device_type) LOAD_##device_type##_OP(op_type);
...
@@ -68,7 +58,7 @@ LOAD_OP(fetch)
...
@@ -68,7 +58,7 @@ LOAD_OP(fetch)
LOAD_OP
(
fill_constant
)
LOAD_OP
(
fill_constant
)
#endif
#endif
#ifdef BATCHNORM_OP
#ifdef BATCHNORM_OP
LOAD_OP
2
(
batch_norm
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
batch_norm
,
C
PU
);
#endif
#endif
#ifdef BILINEAR_INTERP_OP
#ifdef BILINEAR_INTERP_OP
LOAD_OP1
(
bilinear_interp
,
CPU
);
LOAD_OP1
(
bilinear_interp
,
CPU
);
...
@@ -77,40 +67,40 @@ LOAD_OP1(bilinear_interp, CPU);
...
@@ -77,40 +67,40 @@ LOAD_OP1(bilinear_interp, CPU);
LOAD_OP1
(
box_coder
,
CPU
);
LOAD_OP1
(
box_coder
,
CPU
);
#endif
#endif
#ifdef CONCAT_OP
#ifdef CONCAT_OP
LOAD_OP
3
(
concat
,
CPU
,
MALI_G
PU
,
FPGA
);
LOAD_OP
2
(
concat
,
C
PU
,
FPGA
);
#endif
#endif
#ifdef CONV_OP
#ifdef CONV_OP
LOAD_OP
3
(
conv2d
,
CPU
,
MALI_G
PU
,
FPGA
);
LOAD_OP
2
(
conv2d
,
C
PU
,
FPGA
);
#endif
#endif
#ifdef LRN_OP
#ifdef LRN_OP
LOAD_OP
2
(
lrn
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
lrn
,
C
PU
);
#endif
#endif
#ifdef SIGMOID_OP
#ifdef SIGMOID_OP
LOAD_OP1
(
sigmoid
,
CPU
);
LOAD_OP1
(
sigmoid
,
CPU
);
#endif
#endif
#ifdef FUSION_FC_RELU_OP
#ifdef FUSION_FC_RELU_OP
LOAD_OP3
(
fusion_fc_relu
,
CPU
,
MALI_GPU
,
FPGA
);
LOAD_OP3
(
fusion_fc_relu
,
CPU
,
FPGA
);
LOAD_FUSION_MATCHER
(
fusion_fc_relu
);
LOAD_FUSION_MATCHER
(
fusion_fc_relu
);
#endif
#endif
#ifdef FUSION_ELEMENTWISEADDRELU_OP
#ifdef FUSION_ELEMENTWISEADDRELU_OP
LOAD_OP3
(
fusion_elementwise_add_relu
,
CPU
,
MALI_GPU
,
FPGA
);
LOAD_OP3
(
fusion_elementwise_add_relu
,
CPU
,
FPGA
);
LOAD_FUSION_MATCHER
(
fusion_elementwise_add_relu
);
LOAD_FUSION_MATCHER
(
fusion_elementwise_add_relu
);
#endif
#endif
#ifdef SPLIT_OP
#ifdef SPLIT_OP
LOAD_OP1
(
split
,
CPU
);
LOAD_OP1
(
split
,
CPU
);
#endif
#endif
#ifdef RESIZE_OP
#ifdef RESIZE_OP
LOAD_OP
2
(
resize
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
resize
,
C
PU
);
#endif
#endif
#ifdef FUSION_CONVADDBNRELU_OP
#ifdef FUSION_CONVADDBNRELU_OP
LOAD_OP2
(
fusion_conv_add_bn_relu
,
CPU
,
FPGA
);
LOAD_OP2
(
fusion_conv_add_bn_relu
,
CPU
,
FPGA
);
LOAD_FUSION_MATCHER
(
fusion_conv_add_bn_relu
);
LOAD_FUSION_MATCHER
(
fusion_conv_add_bn_relu
);
#endif
#endif
#ifdef RESHAPE_OP
#ifdef RESHAPE_OP
LOAD_OP
2
(
reshape
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
reshape
,
C
PU
);
#endif
#endif
#ifdef RESHAPE2_OP
#ifdef RESHAPE2_OP
LOAD_OP
2
(
reshape2
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
reshape2
,
C
PU
);
#endif
#endif
#ifdef TRANSPOSE_OP
#ifdef TRANSPOSE_OP
LOAD_OP1
(
transpose
,
CPU
);
LOAD_OP1
(
transpose
,
CPU
);
...
@@ -126,11 +116,11 @@ LOAD_OP2(fusion_conv_add_relu, CPU, FPGA);
...
@@ -126,11 +116,11 @@ LOAD_OP2(fusion_conv_add_relu, CPU, FPGA);
LOAD_FUSION_MATCHER
(
fusion_conv_add_relu
);
LOAD_FUSION_MATCHER
(
fusion_conv_add_relu
);
#endif
#endif
#ifdef FUSION_CONVADD_OP
#ifdef FUSION_CONVADD_OP
LOAD_OP
2
(
fusion_conv_add
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
fusion_conv_add
,
C
PU
);
LOAD_FUSION_MATCHER
(
fusion_conv_add
);
LOAD_FUSION_MATCHER
(
fusion_conv_add
);
#endif
#endif
#ifdef SOFTMAX_OP
#ifdef SOFTMAX_OP
LOAD_OP
2
(
softmax
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
softmax
,
C
PU
);
#endif
#endif
#ifdef SHAPE_OP
#ifdef SHAPE_OP
LOAD_OP1
(
shape
,
CPU
);
LOAD_OP1
(
shape
,
CPU
);
...
@@ -142,13 +132,13 @@ LOAD_OP1(depthwise_conv2d, CPU);
...
@@ -142,13 +132,13 @@ LOAD_OP1(depthwise_conv2d, CPU);
LOAD_OP1
(
conv2d_transpose
,
CPU
);
LOAD_OP1
(
conv2d_transpose
,
CPU
);
#endif
#endif
#ifdef SCALE_OP
#ifdef SCALE_OP
LOAD_OP
2
(
scale
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
scale
,
C
PU
);
#endif
#endif
#ifdef ELEMENTWISEADD_OP
#ifdef ELEMENTWISEADD_OP
LOAD_OP
2
(
elementwise_add
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
elementwise_add
,
C
PU
);
#endif
#endif
#ifdef PRELU_OP
#ifdef PRELU_OP
LOAD_OP
2
(
prelu
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
prelu
,
C
PU
);
#endif
#endif
#ifdef FLATTEN_OP
#ifdef FLATTEN_OP
LOAD_OP1
(
flatten
,
CPU
);
LOAD_OP1
(
flatten
,
CPU
);
...
@@ -182,13 +172,13 @@ LOAD_FUSION_MATCHER(fusion_dwconv_bn_relu);
...
@@ -182,13 +172,13 @@ LOAD_FUSION_MATCHER(fusion_dwconv_bn_relu);
LOAD_OP1
(
crf_decoding
,
CPU
);
LOAD_OP1
(
crf_decoding
,
CPU
);
#endif
#endif
#ifdef MUL_OP
#ifdef MUL_OP
LOAD_OP
2
(
mul
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
mul
,
C
PU
);
#endif
#endif
#ifdef NORM_OP
#ifdef NORM_OP
LOAD_OP1
(
norm
,
CPU
);
LOAD_OP1
(
norm
,
CPU
);
#endif
#endif
#ifdef RELU_OP
#ifdef RELU_OP
LOAD_OP
2
(
relu
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
relu
,
C
PU
);
LOAD_OP1
(
relu6
,
CPU
);
LOAD_OP1
(
relu6
,
CPU
);
#endif
#endif
#ifdef IM2SEQUENCE_OP
#ifdef IM2SEQUENCE_OP
...
@@ -198,11 +188,11 @@ LOAD_OP1(im2sequence, CPU);
...
@@ -198,11 +188,11 @@ LOAD_OP1(im2sequence, CPU);
LOAD_OP1
(
lookup_table
,
CPU
);
LOAD_OP1
(
lookup_table
,
CPU
);
#endif
#endif
#ifdef FUSION_FC_OP
#ifdef FUSION_FC_OP
LOAD_OP
3
(
fusion_fc
,
CPU
,
MALI_G
PU
,
FPGA
);
LOAD_OP
2
(
fusion_fc
,
C
PU
,
FPGA
);
LOAD_FUSION_MATCHER
(
fusion_fc
);
LOAD_FUSION_MATCHER
(
fusion_fc
);
#endif
#endif
#ifdef POOL_OP
#ifdef POOL_OP
LOAD_OP
3
(
pool2d
,
CPU
,
MALI_G
PU
,
FPGA
);
LOAD_OP
2
(
pool2d
,
C
PU
,
FPGA
);
#endif
#endif
#ifdef MULTICLASSNMS_OP
#ifdef MULTICLASSNMS_OP
LOAD_OP1
(
multiclass_nms
,
CPU
);
LOAD_OP1
(
multiclass_nms
,
CPU
);
...
@@ -217,7 +207,7 @@ LOAD_OP1(sum, CPU);
...
@@ -217,7 +207,7 @@ LOAD_OP1(sum, CPU);
LOAD_OP1
(
elementwise_mul
,
CPU
);
LOAD_OP1
(
elementwise_mul
,
CPU
);
#endif
#endif
#ifdef SLICE_OP
#ifdef SLICE_OP
LOAD_OP
2
(
slice
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
slice
,
C
PU
);
#endif
#endif
#ifdef FUSION_CONVBN_OP
#ifdef FUSION_CONVBN_OP
LOAD_OP2
(
fusion_conv_bn
,
CPU
,
FPGA
);
LOAD_OP2
(
fusion_conv_bn
,
CPU
,
FPGA
);
...
...
src/framework/op_registry.h
浏览文件 @
a29db744
...
@@ -115,9 +115,6 @@ class OpRegistry {
...
@@ -115,9 +115,6 @@ class OpRegistry {
#define REGISTER_OPERATOR_CPU(op_type, op_class) \
#define REGISTER_OPERATOR_CPU(op_type, op_class) \
REGISTER_OPERATOR(op_type, op_class, cpu, paddle_mobile::CPU);
REGISTER_OPERATOR(op_type, op_class, cpu, paddle_mobile::CPU);
#define REGISTER_OPERATOR_MALI_GPU(op_type, op_class) \
REGISTER_OPERATOR(op_type, op_class, mali_gpu, paddle_mobile::GPU_MALI);
#define REGISTER_OPERATOR_FPGA(op_type, op_class) \
#define REGISTER_OPERATOR_FPGA(op_type, op_class) \
REGISTER_OPERATOR(op_type, op_class, fpga, paddle_mobile::FPGA);
REGISTER_OPERATOR(op_type, op_class, fpga, paddle_mobile::FPGA);
...
...
src/framework/operator.h
浏览文件 @
a29db744
...
@@ -130,13 +130,6 @@ class OpKernelBase {
...
@@ -130,13 +130,6 @@ class OpKernelBase {
}
}
#endif
#endif
#ifdef PADDLE_McOBILE_MALI_GPU
OpKernelBase
()
{
acl_op_
=
nullptr
;
}
void
*
GetAclOp
()
const
{
return
acl_op_
;
}
void
SetAclOp
(
void
*
op
,
void
*
ob
)
const
{
reinterpret_cast
<
OpKernelBase
<
Dtype
,
P
>
*>
(
ob
)
->
acl_op_
=
op
;
}
#endif
virtual
void
Compute
(
const
P
&
para
)
=
0
;
virtual
void
Compute
(
const
P
&
para
)
=
0
;
virtual
bool
Init
(
P
*
para
)
{
return
true
;
}
virtual
bool
Init
(
P
*
para
)
{
return
true
;
}
virtual
~
OpKernelBase
()
=
default
;
virtual
~
OpKernelBase
()
=
default
;
...
@@ -147,9 +140,6 @@ class OpKernelBase {
...
@@ -147,9 +140,6 @@ class OpKernelBase {
#endif
#endif
private:
private:
#ifdef PADDLE_MOBILE_MALI_GPU
void
*
acl_op_
;
#endif
};
};
class
FusionOpMatcher
{
class
FusionOpMatcher
{
...
...
src/operators/activation_op.cpp
浏览文件 @
a29db744
...
@@ -55,9 +55,6 @@ namespace ops = paddle_mobile::operators;
...
@@ -55,9 +55,6 @@ namespace ops = paddle_mobile::operators;
REGISTER_OPERATOR_CPU
(
relu
,
ops
::
ReluOp
);
REGISTER_OPERATOR_CPU
(
relu
,
ops
::
ReluOp
);
REGISTER_OPERATOR_CPU
(
relu6
,
ops
::
Relu6Op
);
REGISTER_OPERATOR_CPU
(
relu6
,
ops
::
Relu6Op
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
relu
,
ops
::
ReluOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
relu
,
ops
::
ReluOp
);
REGISTER_OPERATOR_FPGA
(
relu
,
ops
::
ReluOp
);
#endif
#endif
...
...
src/operators/batchnorm_op.cpp
浏览文件 @
a29db744
...
@@ -34,9 +34,6 @@ namespace ops = paddle_mobile::operators;
...
@@ -34,9 +34,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
batch_norm
,
ops
::
BatchNormOp
);
REGISTER_OPERATOR_CPU
(
batch_norm
,
ops
::
BatchNormOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
batch_norm
,
ops
::
BatchNormOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
...
...
src/operators/bilinear_interp_op.cpp
浏览文件 @
a29db744
...
@@ -48,8 +48,7 @@ namespace ops = paddle_mobile::operators;
...
@@ -48,8 +48,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
bilinear_interp
,
ops
::
BilinearOp
);
REGISTER_OPERATOR_CPU
(
bilinear_interp
,
ops
::
BilinearOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
...
...
src/operators/box_coder_op.cpp
浏览文件 @
a29db744
...
@@ -58,8 +58,6 @@ REGISTER_OPERATOR_CPU(box_coder, ops::BoxCoderOp);
...
@@ -58,8 +58,6 @@ REGISTER_OPERATOR_CPU(box_coder, ops::BoxCoderOp);
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL
(
box_coder
,
ops
::
BoxCoderOp
);
REGISTER_OPERATOR_CL
(
box_coder
,
ops
::
BoxCoderOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
...
...
src/operators/concat_op.cpp
浏览文件 @
a29db744
...
@@ -69,9 +69,7 @@ REGISTER_OPERATOR_CPU(concat, ops::ConcatOp);
...
@@ -69,9 +69,7 @@ REGISTER_OPERATOR_CPU(concat, ops::ConcatOp);
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL
(
concat
,
ops
::
ConcatOp
);
REGISTER_OPERATOR_CL
(
concat
,
ops
::
ConcatOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
concat
,
ops
::
ConcatOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
concat
,
ops
::
ConcatOp
);
REGISTER_OPERATOR_FPGA
(
concat
,
ops
::
ConcatOp
);
#endif
#endif
...
...
src/operators/conv_op.cpp
浏览文件 @
a29db744
...
@@ -55,9 +55,7 @@ namespace ops = paddle_mobile::operators;
...
@@ -55,9 +55,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
conv2d
,
ops
::
ConvOp
);
REGISTER_OPERATOR_CPU
(
conv2d
,
ops
::
ConvOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
conv2d
,
ops
::
ConvOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
conv2d
,
ops
::
ConvOp
);
REGISTER_OPERATOR_FPGA
(
conv2d
,
ops
::
ConvOp
);
#endif
#endif
...
...
src/operators/conv_transpose_op.cpp
浏览文件 @
a29db744
...
@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators;
...
@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
conv2d_transpose
,
ops
::
ConvOpTranspose
);
REGISTER_OPERATOR_CPU
(
conv2d_transpose
,
ops
::
ConvOpTranspose
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
conv2d_transpose
,
ops
::
ConvOpTranspose
);
REGISTER_OPERATOR_FPGA
(
conv2d_transpose
,
ops
::
ConvOpTranspose
);
#endif
#endif
...
...
src/operators/crf_op.cpp
浏览文件 @
a29db744
...
@@ -48,8 +48,7 @@ namespace ops = paddle_mobile::operators;
...
@@ -48,8 +48,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
crf_decoding
,
ops
::
CrfOp
);
REGISTER_OPERATOR_CPU
(
crf_decoding
,
ops
::
CrfOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
...
...
src/operators/elementwise_add_op.cpp
浏览文件 @
a29db744
...
@@ -33,9 +33,6 @@ namespace ops = paddle_mobile::operators;
...
@@ -33,9 +33,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
elementwise_add
,
ops
::
ElementwiseAddOp
);
REGISTER_OPERATOR_CPU
(
elementwise_add
,
ops
::
ElementwiseAddOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
elementwise_add
,
ops
::
ElementwiseAddOp
);
#endif
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL
(
elementwise_add
,
ops
::
ElementwiseAddOp
);
REGISTER_OPERATOR_CL
(
elementwise_add
,
ops
::
ElementwiseAddOp
);
...
...
src/operators/elementwise_mul_op.cpp
浏览文件 @
a29db744
...
@@ -32,9 +32,6 @@ namespace ops = paddle_mobile::operators;
...
@@ -32,9 +32,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
elementwise_mul
,
ops
::
ElementwiseMulOp
);
REGISTER_OPERATOR_CPU
(
elementwise_mul
,
ops
::
ElementwiseMulOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
elementwise_mul
,
ops
::
ElementwiseMulOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
elementwise_mul
,
ops
::
ElementwiseMulOp
);
REGISTER_OPERATOR_FPGA
(
elementwise_mul
,
ops
::
ElementwiseMulOp
);
#endif
#endif
...
...
src/operators/elementwise_sub_op.cpp
浏览文件 @
a29db744
...
@@ -32,9 +32,6 @@ namespace ops = paddle_mobile::operators;
...
@@ -32,9 +32,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
elementwise_sub
,
ops
::
ElementwiseSubOp
);
REGISTER_OPERATOR_CPU
(
elementwise_sub
,
ops
::
ElementwiseSubOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
elementwise_sub
,
ops
::
ElementwiseSubOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
...
...
src/operators/feed_op.cpp
浏览文件 @
a29db744
...
@@ -38,9 +38,6 @@ namespace ops = paddle_mobile::operators;
...
@@ -38,9 +38,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
feed
,
ops
::
FeedOp
);
REGISTER_OPERATOR_CPU
(
feed
,
ops
::
FeedOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
feed
,
ops
::
FeedOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
feed
,
ops
::
FeedOp
);
REGISTER_OPERATOR_FPGA
(
feed
,
ops
::
FeedOp
);
#endif
#endif
...
...
src/operators/fetch_op.cpp
浏览文件 @
a29db744
...
@@ -30,9 +30,7 @@ namespace ops = paddle_mobile::operators;
...
@@ -30,9 +30,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
fetch
,
ops
::
FetchOp
);
REGISTER_OPERATOR_CPU
(
fetch
,
ops
::
FetchOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
fetch
,
ops
::
FetchOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fetch
,
ops
::
FetchOp
);
REGISTER_OPERATOR_FPGA
(
fetch
,
ops
::
FetchOp
);
#endif
#endif
...
...
src/operators/fusion_conv_add_op.cpp
浏览文件 @
a29db744
...
@@ -54,9 +54,6 @@ REGISTER_FUSION_MATCHER(fusion_conv_add, ops::FusionConvAddMatcher);
...
@@ -54,9 +54,6 @@ REGISTER_FUSION_MATCHER(fusion_conv_add, ops::FusionConvAddMatcher);
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
fusion_conv_add
,
ops
::
FusionConvAddOp
);
REGISTER_OPERATOR_CPU
(
fusion_conv_add
,
ops
::
FusionConvAddOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
fusion_conv_add
,
ops
::
FusionConvAddOp
);
#endif
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL
(
fusion_conv_add
,
ops
::
FusionConvAddOp
);
REGISTER_OPERATOR_CL
(
fusion_conv_add
,
ops
::
FusionConvAddOp
);
...
...
src/operators/fusion_deconv_add_bn_op.cpp
浏览文件 @
a29db744
...
@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators;
...
@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators;
REGISTER_FUSION_MATCHER
(
fusion_deconv_add_bn
,
ops
::
FusionDeconvAddBNMatcher
);
REGISTER_FUSION_MATCHER
(
fusion_deconv_add_bn
,
ops
::
FusionDeconvAddBNMatcher
);
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_deconv_add_bn
,
ops
::
FusionDeconvAddBNOp
);
REGISTER_OPERATOR_FPGA
(
fusion_deconv_add_bn
,
ops
::
FusionDeconvAddBNOp
);
#endif
#endif
...
...
src/operators/fusion_deconv_add_bn_relu_op.cpp
浏览文件 @
a29db744
...
@@ -25,8 +25,7 @@ REGISTER_FUSION_MATCHER(fusion_deconv_add_bn_relu,
...
@@ -25,8 +25,7 @@ REGISTER_FUSION_MATCHER(fusion_deconv_add_bn_relu,
ops
::
FusionDeconvAddBNReluMatcher
);
ops
::
FusionDeconvAddBNReluMatcher
);
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_deconv_add_bn_relu
,
ops
::
FusionDeconvAddBNReluOp
);
REGISTER_OPERATOR_FPGA
(
fusion_deconv_add_bn_relu
,
ops
::
FusionDeconvAddBNReluOp
);
#endif
#endif
...
...
src/operators/fusion_deconv_add_op.cpp
浏览文件 @
a29db744
...
@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators;
...
@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators;
REGISTER_FUSION_MATCHER
(
fusion_deconv_add
,
ops
::
FusionDeconvAddMatcher
);
REGISTER_FUSION_MATCHER
(
fusion_deconv_add
,
ops
::
FusionDeconvAddMatcher
);
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_deconv_add
,
ops
::
FusionDeconvAddOp
);
REGISTER_OPERATOR_FPGA
(
fusion_deconv_add
,
ops
::
FusionDeconvAddOp
);
#endif
#endif
...
...
src/operators/fusion_deconv_add_relu_op.cpp
浏览文件 @
a29db744
...
@@ -25,8 +25,7 @@ REGISTER_FUSION_MATCHER(fusion_deconv_add_relu,
...
@@ -25,8 +25,7 @@ REGISTER_FUSION_MATCHER(fusion_deconv_add_relu,
ops
::
FusionDeconvAddReluMatcher
);
ops
::
FusionDeconvAddReluMatcher
);
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_deconv_add_relu
,
ops
::
FusionDeconvAddReluOp
);
REGISTER_OPERATOR_FPGA
(
fusion_deconv_add_relu
,
ops
::
FusionDeconvAddReluOp
);
#endif
#endif
...
...
src/operators/fusion_deconv_bn_relu_op.cpp
浏览文件 @
a29db744
...
@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators;
...
@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators;
REGISTER_FUSION_MATCHER
(
fusion_deconv_bn_relu
,
ops
::
FusionDeconvBNReluMatcher
);
REGISTER_FUSION_MATCHER
(
fusion_deconv_bn_relu
,
ops
::
FusionDeconvBNReluMatcher
);
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_deconv_bn_relu
,
ops
::
FusionDeconvBNReluOp
);
REGISTER_OPERATOR_FPGA
(
fusion_deconv_bn_relu
,
ops
::
FusionDeconvBNReluOp
);
#endif
#endif
...
...
src/operators/fusion_deconv_relu_op.cpp
浏览文件 @
a29db744
...
@@ -23,8 +23,7 @@ namespace operators {}
...
@@ -23,8 +23,7 @@ namespace operators {}
namespace
ops
=
paddle_mobile
::
operators
;
namespace
ops
=
paddle_mobile
::
operators
;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_deconv_relu
,
ops
::
FusionDeconvReluOp
);
REGISTER_OPERATOR_FPGA
(
fusion_deconv_relu
,
ops
::
FusionDeconvReluOp
);
#endif
#endif
...
...
src/operators/fusion_elementwise_add_relu_op.cpp
浏览文件 @
a29db744
...
@@ -36,10 +36,6 @@ REGISTER_FUSION_MATCHER(fusion_elementwise_add_relu,
...
@@ -36,10 +36,6 @@ REGISTER_FUSION_MATCHER(fusion_elementwise_add_relu,
// REGISTER_OPERATOR_CPU(fusion_elementwise_add_relu,
// REGISTER_OPERATOR_CPU(fusion_elementwise_add_relu,
// ops::FusionElementwiseAddReluOp);
// ops::FusionElementwiseAddReluOp);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
// REGISTER_OPERATOR_MALI_GPU(fusion_elementwise_add_relu,
// ops::FusionElementwiseAddReluOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_elementwise_add_relu
,
REGISTER_OPERATOR_FPGA
(
fusion_elementwise_add_relu
,
ops
::
FusionElementwiseAddReluOp
);
ops
::
FusionElementwiseAddReluOp
);
...
...
src/operators/fusion_fc_op.cpp
浏览文件 @
a29db744
...
@@ -63,9 +63,6 @@ REGISTER_OPERATOR_CPU(fusion_fc, ops::FusionFcOp);
...
@@ -63,9 +63,6 @@ REGISTER_OPERATOR_CPU(fusion_fc, ops::FusionFcOp);
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL
(
fusion_fc
,
ops
::
FusionFcOp
);
REGISTER_OPERATOR_CL
(
fusion_fc
,
ops
::
FusionFcOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
fusion_fc
,
ops
::
FusionFcOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_fc
,
ops
::
FusionFcOp
);
REGISTER_OPERATOR_FPGA
(
fusion_fc
,
ops
::
FusionFcOp
);
#endif
#endif
...
...
src/operators/fusion_fc_relu_op.cpp
浏览文件 @
a29db744
...
@@ -60,9 +60,6 @@ REGISTER_FUSION_MATCHER(fusion_fc_relu, ops::FusionFcReluMatcher);
...
@@ -60,9 +60,6 @@ REGISTER_FUSION_MATCHER(fusion_fc_relu, ops::FusionFcReluMatcher);
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
fusion_fc_relu
,
ops
::
FusionFcReluOp
);
REGISTER_OPERATOR_CPU
(
fusion_fc_relu
,
ops
::
FusionFcReluOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
fusion_fc_relu
,
ops
::
FusionFcReluOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_fc_relu
,
ops
::
FusionFcReluOp
);
REGISTER_OPERATOR_FPGA
(
fusion_fc_relu
,
ops
::
FusionFcReluOp
);
#endif
#endif
...
...
src/operators/gru_unit_op.cpp
浏览文件 @
a29db744
...
@@ -59,8 +59,7 @@ namespace ops = paddle_mobile::operators;
...
@@ -59,8 +59,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
gru_unit
,
ops
::
GruUnitOp
);
REGISTER_OPERATOR_CPU
(
gru_unit
,
ops
::
GruUnitOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
...
...
src/operators/increment_op.cpp
浏览文件 @
a29db744
...
@@ -37,8 +37,7 @@ namespace ops = paddle_mobile::operators;
...
@@ -37,8 +37,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
increment
,
ops
::
IncrementOp
);
REGISTER_OPERATOR_CPU
(
increment
,
ops
::
IncrementOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
...
...
src/operators/is_empty_op.cpp
浏览文件 @
a29db744
...
@@ -34,8 +34,7 @@ namespace ops = paddle_mobile::operators;
...
@@ -34,8 +34,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
is_empty
,
ops
::
IsEmptyOp
);
REGISTER_OPERATOR_CPU
(
is_empty
,
ops
::
IsEmptyOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
...
...
ACL_Android
@
591027fc
比较
591027fc
...
591027fc
Subproject commit 591027fcffea084100c756e48356e0f8a48e35e5
src/operators/kernel/mali/acl_operator.cc
已删除
100755 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#if USE_ACL == 1
#include "acl_operator.h"
unsigned
int
bypass_acl_class_layer
=
(
0
|
FLAGS_ENABLE_ACL_CONCAT
|
/*0xffffffff |*/
/*FLAGS_ENABLE_ACL_FC |*/
/*FLAGS_ENABLE_ACL_LRN
|*/
0
);
int
enable_schedule
=
0
;
#ifdef USE_PROFILING
#include "arm_neon.h"
unsigned
int
acl_log_flags
=
(
0
|
MASK_LOG_APP_TIME
|
/*MASK_LOG_ALLOCATE | */
/*MASK_LOG_ALLOCATE | */
/*MASK_LOG_RUN | */
/*MASK_LOG_CONFIG | */
/*MASK_LOG_COPY | */
MASK_LOG_ABSVAL
|
MASK_LOG_BNLL
|
MASK_LOG_CONV
|
MASK_LOG_FC
|
MASK_LOG_LRN
|
MASK_LOG_POOLING
|
MASK_LOG_RELU
|
MASK_LOG_SIGMOID
|
MASK_LOG_SOFTMAX
|
MASK_LOG_TANH
|
MASK_LOG_LC
|
MASK_LOG_BN
|
MASK_LOG_CONCAT
|
0
);
#include <stdio.h>
/* printf */
#include <stdlib.h>
/* getenv */
#endif // USE_PROFILING
static
bool
force_enable_gpu
=
false
;
bool
AclEnableSchedule
(
int
enable
)
{
enable_schedule
=
enable
;
if
(
enable
)
{
force_enable_gpu
=
true
;
}
return
true
;
}
int
isScheduleEnable
()
{
return
enable_schedule
;
}
namespace
paddle_mobile
{
namespace
operators
{
namespace
acl
{
bool
ACLOperator
::
init_gpu_env
=
true
;
#ifdef USE_OPENCL
bool
ACLOperator
::
support_opencl_
=
false
;
bool
opencl_is_available
()
{
return
arm_compute
::
opencl_is_available
();
}
#elif defined(USE_OPENGLES)
bool
ACLOperator
::
support_opengles_
=
false
;
#endif
ACLOperator
::
ACLOperator
(
bool
is_gpu
)
:
operator_state_
(
operator_not_init
),
force_bypass_acl_path_
(
false
),
target_hint_
(
TargetHint
::
DONT_CARE
),
convolution_method_hint_
(
ConvolutionMethodHint
::
GEMM
),
_group
(
1
),
name_
(
""
),
input_idx_
(
0
),
output_idx_
(
0
),
is_gpu_
(
is_gpu
)
{
const
char
*
pBypassACL
;
if
(
init_gpu_env
)
{
#ifdef USE_OPENCL
try
{
if
(
opencl_is_available
())
{
arm_compute
::
CLScheduler
::
get
().
default_init
();
support_opencl_
=
true
;
}
}
catch
(
std
::
exception
&
e
)
{
support_opencl_
=
false
;
}
#elif defined(USE_OPENGLES)
try
{
arm_compute
::
GCScheduler
::
get
().
default_init
();
support_opengles_
=
true
;
}
catch
(
std
::
exception
&
e
)
{
support_opengles_
=
false
;
}
#endif
init_gpu_env
=
false
;
}
if
(
force_enable_gpu
)
is_gpu_
=
true
;
pBypassACL
=
getenv
(
"BYPASSACL"
);
if
(
pBypassACL
)
{
unsigned
int
bacl
;
sscanf
(
pBypassACL
,
"%i"
,
&
bacl
);
if
(
bacl
!=
bypass_acl_class_layer
)
{
bypass_acl_class_layer
=
bacl
;
printf
(
"BYPASSACL<%s>
\n
"
,
pBypassACL
);
printf
(
"BYPASSACL: %x
\n
"
,
bypass_acl_class_layer
);
}
}
#ifdef USE_PROFILING
const
char
*
pLogACL
;
pLogACL
=
getenv
(
"LOGACL"
);
if
(
pLogACL
)
{
unsigned
int
alf
;
sscanf
(
pLogACL
,
"%i"
,
&
alf
);
if
(
alf
!=
acl_log_flags
)
{
acl_log_flags
=
alf
;
printf
(
"LOGACL<%s>
\n
"
,
pLogACL
);
printf
(
"LOGACL: %x
\n
"
,
acl_log_flags
);
}
}
#endif // USE_PROFILING
const
char
*
pEnableSchedule
;
pEnableSchedule
=
getenv
(
"ENABLESCHEDULE"
);
if
(
pEnableSchedule
)
{
int
bshedule
;
sscanf
(
pEnableSchedule
,
"%i"
,
&
bshedule
);
if
(
bshedule
!=
enable_schedule
)
{
enable_schedule
=
bshedule
;
printf
(
"ENABLESCHEDULE<%s>
\n
"
,
pEnableSchedule
);
printf
(
"ENABLESCHEDULE: %x
\n
"
,
enable_schedule
);
}
if
(
enable_schedule
)
{
AclEnableSchedule
(
1
);
}
}
}
ACLOperator
::~
ACLOperator
()
{}
bool
ACLOperator
::
new_tensor
(
std
::
unique_ptr
<
ACLTensor
>&
tensor
,
arm_compute
::
TensorShape
&
shape
,
void
*
mem
,
bool
commit
)
{
auto
acl_tensor
=
new
ACLTensor
(
arm_compute
::
TensorInfo
(
shape
,
arm_compute
::
Format
::
F32
));
acl_tensor
->
set_target
(
getTargetHint
());
acl_tensor
->
bindmem
(
mem
);
if
(
commit
)
acl_tensor
->
commit
();
tensor
=
(
std
::
unique_ptr
<
ACLTensor
>
)
std
::
move
(
acl_tensor
);
return
true
;
}
bool
ACLOperator
::
new_tensor
(
std
::
unique_ptr
<
ACLSubTensor
>&
tensor
,
std
::
unique_ptr
<
ACLTensor
>&
parent
,
arm_compute
::
TensorShape
&
shape
,
arm_compute
::
Coordinates
&
coord
)
{
auto
acl_tensor
=
new
ACLSubTensor
(
parent
,
shape
,
coord
);
acl_tensor
->
set_target
(
getTargetHint
());
tensor
=
(
std
::
unique_ptr
<
ACLSubTensor
>
)
std
::
move
(
acl_tensor
);
return
true
;
}
void
ACLTensor
::
commit
(
TensorType
type
)
{
settensortype
(
type
);
if
(
mem_
)
{
if
(
!
allocate_
)
{
#ifdef USE_PROFILING
logtime_util
log_time
(
ACL_ALLOCATE_INFO
);
#endif // USE_PROFILING
allocate
();
allocate_
=
true
;
}
if
(
type_
!=
tensor_output
)
{
tensor_copy
(
mem_
);
}
mem_
=
nullptr
;
}
}
int
BaseACLTensor
::
tensor_copy
(
arm_compute
::
ITensor
*
tensor
,
void
*
mem
,
bool
toTensor
)
{
#ifdef USE_PROFILING
logtime_util
log_time
(
ACL_COPY_INFO
);
#endif // USE_PROFILING
arm_compute
::
Window
window
;
// Iterate through the rows (not each element)
window
.
use_tensor_dimensions
(
tensor
->
info
()
->
tensor_shape
(),
/* first_dimension =*/
arm_compute
::
Window
::
DimY
);
int
width
=
tensor
->
info
()
->
tensor_shape
()[
0
];
int
height
=
tensor
->
info
()
->
tensor_shape
()[
1
];
int
deepth
=
tensor
->
info
()
->
tensor_shape
()[
2
];
map
();
// Create an iterator:
arm_compute
::
Iterator
it
(
tensor
,
window
);
// Except it works for an arbitrary number of dimensions
if
(
toTensor
)
{
// mem->tensor
arm_compute
::
execute_window_loop
(
window
,
[
&
](
const
arm_compute
::
Coordinates
&
id
)
{
memcpy
(
it
.
ptr
(),
((
char
*
)
mem
)
+
((
id
[
3
]
*
(
width
*
height
*
deepth
)
+
id
.
z
()
*
(
width
*
height
)
+
id
.
y
()
*
width
+
id
.
x
())
*
tensor
->
info
()
->
element_size
()),
width
*
tensor
->
info
()
->
element_size
());
},
it
);
}
else
{
// tensor-->mem
arm_compute
::
execute_window_loop
(
window
,
[
&
](
const
arm_compute
::
Coordinates
&
id
)
{
memcpy
(((
char
*
)
mem
)
+
((
id
[
3
]
*
(
width
*
height
*
deepth
)
+
id
.
z
()
*
(
width
*
height
)
+
id
.
y
()
*
width
)
*
tensor
->
info
()
->
element_size
()),
it
.
ptr
(),
width
*
tensor
->
info
()
->
element_size
());
},
it
);
}
unmap
();
return
0
;
}
}
// namespace acl
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/mali/acl_operator.h
已删除
100755 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef ACL_OPERATOR_H_
#define ACL_OPERATOR_H_
#include <framework/tensor.h>
#include <operators/op_param.h>
#if USE_ACL == 1
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h"
#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
#include "arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h"
#include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h"
#include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h"
#include "arm_compute/runtime/Tensor.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
#include "arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h"
#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h"
#include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h"
#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
#include "arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h"
#include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h"
#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h"
#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h"
#endif
#ifdef USE_OPENGLES
#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
#include "arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h"
#include "arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h"
#include "arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h"
#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h"
#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h"
#include "arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h"
#include "arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h"
#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h"
#include "arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h"
#endif
#include "acl_tensor.h"
#define FLAGS_ENABLE_ACL_ABSVAL 0x00000001
#define FLAGS_ENABLE_ACL_BNLL 0x00000002
#define FLAGS_ENABLE_ACL_CONV 0x00000004
#define FLAGS_ENABLE_ACL_FC 0x00000008
#define FLAGS_ENABLE_ACL_LRN 0x00000010
#define FLAGS_ENABLE_ACL_POOLING 0x00000020
#define FLAGS_ENABLE_ACL_RELU 0x00000040
#define FLAGS_ENABLE_ACL_SIGMOID 0x00000080
#define FLAGS_ENABLE_ACL_SOFTMAX 0x00000100
#define FLAGS_ENABLE_ACL_TANH 0x00000200
#define FLAGS_ENABLE_ACL_LC 0x00000400
#define FLAGS_ENABLE_ACL_BN 0x00000800
#define FLAGS_ENABLE_ACL_CONCAT 0x00001000
extern
unsigned
int
bypass_acl_class_layer
;
#ifdef USE_PROFILING
#include <sys/time.h>
#define NANO_SEC_CONV 1000000
#define MASK_LOG_APP_TIME 0x00000001
#define MASK_LOG_ALLOCATE 0x00000002
#define MASK_LOG_RUN 0x00000004
#define MASK_LOG_CONFIG 0x00000008
#define MASK_LOG_COPY 0x00000010
#define MASK_LOG_ABSVAL 0x00000020
#define MASK_LOG_BNLL 0x00000040
#define MASK_LOG_CONV 0x00000080
#define MASK_LOG_FC 0x00000100
#define MASK_LOG_LRN 0x00000200
#define MASK_LOG_POOLING 0x00000400
#define MASK_LOG_RELU 0x00000800
#define MASK_LOG_SIGMOID 0x00001000
#define MASK_LOG_SOFTMAX 0x00002000
#define MASK_LOG_TANH 0x00004000
#define MASK_LOG_LC 0x00008000
#define MASK_LOG_BN 0x00010000
#define MASK_LOG_CONCAT 0x00020000
#define APP_TIME_INFO MASK_LOG_APP_TIME, "time: \t"
#define ACL_ALLOCATE_INFO MASK_LOG_ALLOCATE, "allocate: \t\t"
#define ACL_RUN_INFO MASK_LOG_RUN, "run: \t\t\t"
#define ACL_CONFIG_INFO MASK_LOG_CONFIG, "configure: \t\t\t\t"
#define ACL_COPY_INFO MASK_LOG_COPY, "tensor_copy:\t\t\t\t\t"
#define ACL_ABSVAL_INFO MASK_LOG_ABSVAL, "ACL_ABSVAL :\t\t\t\t\t\t"
#define ACL_BNLL_INFO MASK_LOG_BNLL, "ACL_BNLL :\t\t\t\t\t\t\t"
#define ACL_CONV_INFO MASK_LOG_CONV, "ACL_CONV :\t\t\t\t\t\t\t\t"
#define ACL_FC_INFO MASK_LOG_FC, "ACL_FC :\t\t\t\t\t\t\t\t\t"
#define ACL_LRN_INFO MASK_LOG_LRN, "ACL_LRN :\t\t\t\t\t\t\t\t\t\t"
#define ACL_POOLING_INFO MASK_LOG_POOLING, "ACL_POOLING:\t\t\t\t\t\t\t\t\t\t\t"
#define ACL_RELU_INFO MASK_LOG_RELU, "ACL_RELU :\t\t\t\t\t\t\t\t\t\t\t\t"
#define ACL_SIGMOID_INFO \
MASK_LOG_SIGMOID, "ACL_SIGMOID:\t\t\t\t\t\t\t\t\t\t\t\t\t"
#define ACL_SOFTMAX_INFO \
MASK_LOG_SOFTMAX, "ACL_SOFTMAX:\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
#define ACL_TANH_INFO \
MASK_LOG_TANH, "ACL_TANH :\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
#define ACL_LC_INFO MASK_LOG_LC, "ACL_LC :\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
#define ACL_BN_INFO \
MASK_LOG_BN, "ACL_BN :\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
#define ACL_CONCAT_INFO \
MASK_LOG_CONCAT, "ACL_CONCAT :\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
extern
unsigned
int
acl_log_flags
;
class
logtime_util
{
public:
logtime_util
()
{
mask
=
0
;
}
logtime_util
(
int
mask_
,
const
char
*
information_
)
{
setlogtime_info
(
mask_
,
information_
);
}
void
setlogtime_info
(
int
mask_
,
const
char
*
information_
)
{
mask
=
mask_
;
if
(
acl_log_flags
&
mask
)
{
strncpy
(
information
,
information_
,
255
);
gettimeofday
(
&
tv
[
0
],
NULL
);
}
}
~
logtime_util
()
{
if
(
acl_log_flags
&
mask
)
{
int
time
[
2
];
gettimeofday
(
&
tv
[
1
],
NULL
);
time
[
0
]
=
tv
[
0
].
tv_sec
*
NANO_SEC_CONV
+
tv
[
0
].
tv_usec
;
time
[
1
]
=
tv
[
1
].
tv_sec
*
NANO_SEC_CONV
+
tv
[
1
].
tv_usec
;
printf
(
"%s %.6lf
\n
"
,
information
,
(((
double
)
time
[
1
]
-
time
[
0
])
/
NANO_SEC_CONV
));
}
}
void
log_time
(
bool
start
)
{
if
(
acl_log_flags
&
mask
)
{
if
(
start
)
{
gettimeofday
(
&
tv
[
0
],
NULL
);
}
else
{
int
time
[
2
];
gettimeofday
(
&
tv
[
1
],
NULL
);
time
[
0
]
=
tv
[
0
].
tv_sec
*
NANO_SEC_CONV
+
tv
[
0
].
tv_usec
;
time
[
1
]
=
tv
[
1
].
tv_sec
*
NANO_SEC_CONV
+
tv
[
1
].
tv_usec
;
printf
(
"%s %.6lf
\n
"
,
information
,
(((
double
)
time
[
1
]
-
time
[
0
])
/
NANO_SEC_CONV
));
}
}
}
private:
struct
timeval
tv
[
2
];
int
mask
;
char
information
[
256
];
};
#endif // USE_PROFILING
namespace
paddle_mobile
{
namespace
operators
{
namespace
acl
{
class
AclParameters
{
public:
AclParameters
()
{
dilated
=
false
;
dim
=
2
;
num_group
=
1
;
}
int
batch
;
int
in_depth
;
int
in_rows
;
int
in_cols
;
int
out_depth
;
int
out_rows
;
int
out_cols
;
int
out_num
;
int
filter_rows
;
int
filter_cols
;
int
stride_rows
;
int
stride_cols
;
int
pad_rows
;
int
pad_cols
;
int
dilation_rows
;
int
dilation_cols
;
int
num_group
;
bool
dilated
;
int
dim
;
int
epsilon
;
int
nsize
;
float
alpha
;
float
beta
;
float
knorm
;
void
*
input_data
;
void
*
output_data
;
void
*
weight_data
;
void
*
biases_data
;
void
*
mean_data
;
void
*
var_data
;
std
::
string
pool_type
;
std
::
string
act_type
;
std
::
string
data_layout
;
bool
is_global_pool
;
bool
is_channel_concat
;
bool
is_bypass
;
std
::
vector
<
framework
::
LoDTensor
*>
in_tensor
;
};
enum
TensorType
{
tensor_input
,
tensor_output
,
tensor_weights
,
tensor_biases
,
tensor_mean
,
tensor_var
,
tensor_beta
,
tensor_gamma
,
tensor_concat
,
tensor_data
,
};
enum
OperatorState
{
operator_not_init
,
operator_init_done
,
operator_reinit
,
};
enum
OperateType
{
operate_type_pooling
,
operate_type_activation
,
operate_type_lrn
,
operate_type_conv
,
operate_type_lc
,
operate_type_fc
,
operate_type_bn
,
operate_type_softmax
,
operate_type_concat
,
};
class
BaseACLTensor
{
public:
BaseACLTensor
()
:
type_
(
tensor_input
),
allocate_
(
false
)
{}
virtual
~
BaseACLTensor
()
{}
virtual
void
bindmem
(
void
*
mem
)
{
mem_
=
mem
;
}
virtual
void
settensortype
(
TensorType
type
)
{
type_
=
type
;
}
virtual
void
map
(
bool
blocking
=
true
)
{}
virtual
void
unmap
()
{}
virtual
void
commit
(
TensorType
type
=
tensor_data
)
{}
int
tensor_copy
(
arm_compute
::
ITensor
*
tensor
,
void
*
mem
,
bool
toTensor
=
true
);
protected:
void
*
mem_
;
TensorType
type_
;
bool
allocate_
;
};
class
ACLTensor
:
public
BaseACLTensor
,
public
Tensor
{
public:
explicit
ACLTensor
(
arm_compute
::
TensorInfo
&&
info
)
:
Tensor
(
info
)
{}
virtual
void
map
(
bool
blocking
=
true
)
{
if
(
!
allocate_
)
{
Tensor
::
allocate
();
allocate_
=
true
;
}
Tensor
::
map
(
blocking
);
}
virtual
int
tensor_copy
(
void
*
mem
,
bool
toTensor
=
true
)
{
auto
acl_tensor
=
this
;
arm_compute
::
ITensor
*
tensor
=
acl_tensor
->
tensor
();
BaseACLTensor
::
tensor_copy
(
tensor
,
mem
,
toTensor
);
return
0
;
}
virtual
void
unmap
()
{
Tensor
::
unmap
();
}
virtual
void
commit
(
TensorType
type
=
tensor_data
);
};
class
ACLSubTensor
:
public
BaseACLTensor
,
public
SubTensor
{
public:
ACLSubTensor
(
std
::
unique_ptr
<
ACLTensor
>
&
parent
,
arm_compute
::
TensorShape
&
shape
,
arm_compute
::
Coordinates
&
coord
)
:
SubTensor
(
parent
.
get
(),
shape
,
coord
)
{}
virtual
int
tensor_copy
(
void
*
mem
,
bool
toTensor
=
true
)
{
return
0
;
}
};
template
<
typename
T
>
class
TensorPair
{
public:
TensorPair
()
{}
~
TensorPair
()
{}
TensorType
type
;
std
::
unique_ptr
<
T
>
tensor
;
};
template
<
typename
T
>
std
::
unique_ptr
<
T
>
&
tensor_item
(
std
::
vector
<
std
::
unique_ptr
<
TensorPair
<
T
>>>
&
pool
,
TensorType
type
,
int
idx
)
{
int
count
=
0
;
for
(
auto
&
item
:
pool
)
{
if
(
item
.
get
()
->
type
==
type
)
{
++
count
;
}
if
(
item
.
get
()
->
type
==
type
&&
idx
==
count
-
1
)
{
return
item
.
get
()
->
tensor
;
}
}
pool
.
push_back
((
std
::
unique_ptr
<
TensorPair
<
T
>>
)
std
::
move
(
new
TensorPair
<
T
>
));
auto
item
=
pool
[
pool
.
size
()
-
1
].
get
();
item
->
type
=
type
;
item
->
tensor
=
NULL
;
return
item
->
tensor
;
}
class
ACLOperator
{
public:
virtual
void
commit
()
{
for
(
auto
&
item
:
tensor_pool_
)
{
if
(
item
.
get
()
->
tensor
)
item
.
get
()
->
tensor
->
commit
(
item
.
get
()
->
type
);
}
}
inline
void
run
()
{
commit
();
#ifdef USE_PROFILING
logtime_util
log_time
(
ACL_RUN_INFO
);
#endif // USE_PROFILING
for
(
auto
&
c
:
funcs_
)
{
c
->
run
();
}
}
inline
std
::
vector
<
std
::
unique_ptr
<
arm_compute
::
IFunction
>>
&
funcs
()
{
return
funcs_
;
}
inline
std
::
unique_ptr
<
ACLSubTensor
>
&
sinput
(
int
idx
=
0
)
{
return
tensor_item
(
subtensor_pool_
,
tensor_input
,
idx
);
}
inline
std
::
unique_ptr
<
ACLSubTensor
>
&
soutput
(
int
idx
=
0
)
{
return
tensor_item
(
subtensor_pool_
,
tensor_output
,
idx
);
}
inline
std
::
unique_ptr
<
ACLSubTensor
>
&
sweights
(
int
idx
=
0
)
{
return
tensor_item
(
subtensor_pool_
,
tensor_weights
,
idx
);
}
inline
std
::
unique_ptr
<
ACLSubTensor
>
&
sbiases
(
int
idx
=
0
)
{
return
tensor_item
(
subtensor_pool_
,
tensor_biases
,
idx
);
}
inline
std
::
unique_ptr
<
ACLTensor
>
&
cinput
(
int
idx
=
0
)
{
return
tensor_item
(
tensor_pool_
,
tensor_concat
,
idx
);
}
inline
std
::
unique_ptr
<
ACLTensor
>
&
input
(
int
idx
=
0
)
{
return
tensor_item
(
tensor_pool_
,
tensor_input
,
idx
);
}
inline
std
::
unique_ptr
<
ACLTensor
>
&
output
(
int
idx
=
0
)
{
return
tensor_item
(
tensor_pool_
,
tensor_output
,
idx
);
}
inline
std
::
unique_ptr
<
ACLTensor
>
&
weights
(
int
idx
=
0
)
{
return
tensor_item
(
tensor_pool_
,
tensor_weights
,
idx
);
}
inline
std
::
unique_ptr
<
ACLTensor
>
&
biases
(
int
idx
=
0
)
{
return
tensor_item
(
tensor_pool_
,
tensor_biases
,
idx
);
}
inline
std
::
unique_ptr
<
ACLTensor
>
&
mean
(
int
idx
=
0
)
{
return
tensor_item
(
tensor_pool_
,
tensor_mean
,
idx
);
}
inline
std
::
unique_ptr
<
ACLTensor
>
&
var
(
int
idx
=
0
)
{
return
tensor_item
(
tensor_pool_
,
tensor_var
,
idx
);
}
inline
std
::
unique_ptr
<
ACLTensor
>
&
beta
(
int
idx
=
0
)
{
return
tensor_item
(
tensor_pool_
,
tensor_beta
,
idx
);
}
inline
std
::
unique_ptr
<
ACLTensor
>
&
gamma
(
int
idx
=
0
)
{
return
tensor_item
(
tensor_pool_
,
tensor_gamma
,
idx
);
}
inline
std
::
unique_ptr
<
ACLTensor
>
&
tensor
(
TensorType
type
)
{
switch
(
type
)
{
case
tensor_biases
:
return
biases
();
break
;
case
tensor_weights
:
return
weights
();
break
;
case
tensor_output
:
return
output
();
break
;
default:
case
tensor_input
:
return
input
();
break
;
}
return
input
();
}
explicit
ACLOperator
(
bool
is_gpu
=
false
);
virtual
~
ACLOperator
();
inline
TargetHint
getTargetHint
()
{
#ifdef USE_OPENCL
if
(
target_hint_
==
TargetHint
::
DONT_CARE
)
{
if
(
is_gpu_
)
{
return
TargetHint
::
OPENCL
;
}
return
TargetHint
::
NEON
;
}
return
target_hint_
;
#elif defined(USE_OPENGLES)
if
(
target_hint_
==
TargetHint
::
DONT_CARE
)
{
if
(
is_gpu_
)
{
return
TargetHint
::
OPENGLES
;
}
return
TargetHint
::
NEON
;
}
return
target_hint_
;
#else
return
TargetHint
::
NEON
;
#endif
}
inline
void
setTargetHint
(
TargetHint
hint
)
{
target_hint_
=
hint
;
}
inline
ConvolutionMethodHint
&
getConvMethod
()
{
return
convolution_method_hint_
;
}
inline
void
setConvMethod
()
{
convolution_method_hint_
=
ConvolutionMethodHint
::
DIRECT
;
}
inline
bool
tensor_mem
(
std
::
unique_ptr
<
ACLTensor
>
&
tensor
,
void
*
mem
)
{
tensor
->
bindmem
(
mem
);
return
true
;
}
inline
bool
tensor_mem
(
void
*
mem
,
std
::
unique_ptr
<
ACLTensor
>
&
tensor
)
{
tensor
->
tensor_copy
(
mem
,
false
);
return
true
;
}
bool
new_tensor
(
std
::
unique_ptr
<
ACLTensor
>
&
tensor
,
arm_compute
::
TensorShape
&
shape
,
void
*
mem
=
nullptr
,
bool
commit
=
false
);
bool
new_tensor
(
std
::
unique_ptr
<
ACLSubTensor
>
&
tensor
,
std
::
unique_ptr
<
ACLTensor
>
&
parent
,
arm_compute
::
TensorShape
&
shape
,
arm_compute
::
Coordinates
&
coord
);
inline
int
&
group
()
{
return
_group
;
}
inline
void
set_operator_property
(
OperateType
type
,
const
char
*
name
)
{
name_
=
name
;
type_
=
type
;
}
inline
void
acl_run
(
void
*
input_data
,
void
*
output_data
)
{
if
(
input_data
)
tensor_mem
(
input
(),
input_data
);
run
();
tensor_mem
(
output_data
,
output
());
}
inline
int
&
input_idx
()
{
return
input_idx_
;
}
inline
int
&
output_idx
()
{
return
output_idx_
;
}
protected:
inline
bool
isGPUMode
()
{
#ifdef USE_OPENCL
if
(
!
support_opencl_
)
return
false
;
return
getTargetHint
()
==
TargetHint
::
OPENCL
;
#elif defined(USE_OPENGLES)
if
(
!
support_opengles_
)
return
false
;
return
getTargetHint
()
==
TargetHint
::
OPENGLES
;
#endif
return
false
;
}
inline
OperatorState
&
opstate
()
{
return
operator_state_
;
}
inline
bool
is_operator_init_done
(
arm_compute
::
TensorShape
shape
,
TensorType
type
=
tensor_input
)
{
checkreshape
(
shape
,
type
);
return
operator_state_
==
operator_init_done
;
}
inline
void
set_operator_init_done
()
{
opstate
()
=
operator_init_done
;
set_bypass_state
(
false
);
}
inline
void
set_bypass_state
(
bool
state
=
false
)
{
force_bypass_acl_path_
=
state
;
}
inline
OperatorState
checkreshape
(
arm_compute
::
TensorShape
shape
,
TensorType
type
=
tensor_input
)
{
opstate
()
=
reshape
(
shape
,
type
);
if
(
opstate
()
==
operator_reinit
)
{
freeres
();
}
return
opstate
();
}
inline
OperatorState
reshape
(
arm_compute
::
TensorShape
&
shape
,
TensorType
type
)
{
arm_compute
::
TensorShape
_shape
;
std
::
unique_ptr
<
ACLTensor
>
&
acl_tensor
=
tensor
(
type
);
if
(
!
acl_tensor
.
get
())
return
operator_not_init
;
_shape
=
acl_tensor
->
info
().
tensor_shape
();
if
(
_shape
.
total_size
()
==
shape
.
total_size
()
&&
_shape
[
0
]
==
shape
[
0
]
&&
_shape
[
1
]
==
shape
[
1
])
{
return
operator_init_done
;
}
return
operator_reinit
;
}
inline
void
freeres
()
{
tensor_pool_
.
clear
();
subtensor_pool_
.
clear
();
funcs_
.
clear
();
}
inline
const
char
*&
name
()
{
return
name_
;
}
inline
void
set_in_out_index
(
int
indata_idx
,
int
outdata_idx
)
{
input_idx
()
=
indata_idx
;
output_idx
()
=
outdata_idx
;
}
protected:
std
::
vector
<
std
::
unique_ptr
<
TensorPair
<
ACLTensor
>>>
tensor_pool_
;
std
::
vector
<
std
::
unique_ptr
<
TensorPair
<
ACLSubTensor
>>>
subtensor_pool_
;
std
::
vector
<
std
::
unique_ptr
<
arm_compute
::
IFunction
>>
funcs_
;
OperatorState
operator_state_
;
bool
force_bypass_acl_path_
;
TargetHint
target_hint_
;
ConvolutionMethodHint
convolution_method_hint_
;
static
bool
support_opengles_
;
static
bool
support_opencl_
;
static
bool
init_gpu_env
;
int
_group
;
const
char
*
name_
;
OperateType
type_
;
int
input_idx_
,
output_idx_
;
bool
is_gpu_
;
};
int
isScheduleEnable
();
template
<
typename
OperatorType
,
typename
TensorType
>
std
::
unique_ptr
<
arm_compute
::
IFunction
>
instantiate_function
(
arm_compute
::
ITensor
*
input
,
arm_compute
::
ITensor
*
output
)
{
auto
op
=
cpp14
::
make_unique
<
OperatorType
>
();
op
->
configure
(
dynamic_cast
<
TensorType
*>
(
input
),
dynamic_cast
<
TensorType
*>
(
output
));
return
std
::
move
(
op
);
}
template
<
typename
OperatorType
,
typename
TensorType
>
std
::
unique_ptr
<
arm_compute
::
IFunction
>
instantiate
(
arm_compute
::
ITensor
*
input
,
arm_compute
::
ITensor
*
output
)
{
return
instantiate_function
<
OperatorType
,
TensorType
>
(
input
,
output
);
}
template
<
typename
OpType
,
typename
OpTensor
>
std
::
unique_ptr
<
arm_compute
::
IFunction
>
instantiate_op_func
(
std
::
unique_ptr
<
ACLTensor
>
&
input
,
std
::
unique_ptr
<
ACLTensor
>
&
output
,
TargetHint
&
hint
)
{
std
::
unique_ptr
<
arm_compute
::
IFunction
>
func
;
func
=
instantiate
<
OpType
,
OpTensor
>
(
input
->
tensor
(),
output
->
tensor
());
return
func
;
}
template
<
typename
OperatorType
,
typename
TensorType
,
typename
VectorTensor
>
std
::
unique_ptr
<
arm_compute
::
IFunction
>
instantiate_function
(
VectorTensor
inputs
,
arm_compute
::
ITensor
*
output
)
{
auto
op
=
cpp14
::
make_unique
<
OperatorType
>
();
op
->
configure
(
inputs
,
dynamic_cast
<
TensorType
*>
(
output
));
return
std
::
move
(
op
);
}
template
<
typename
OperatorType
,
typename
TensorType
,
typename
VectorTensor
>
std
::
unique_ptr
<
arm_compute
::
IFunction
>
instantiate
(
VectorTensor
inputs
,
arm_compute
::
ITensor
*
output
)
{
return
instantiate_function
<
OperatorType
,
TensorType
,
VectorTensor
>
(
inputs
,
output
);
}
template
<
typename
OpType
,
typename
OpTensor
>
std
::
unique_ptr
<
arm_compute
::
IFunction
>
instantiate_op_func_lists
(
ACLOperator
*&
acl_op
,
std
::
unique_ptr
<
ACLTensor
>
&
output
,
int
num
,
TargetHint
&
hint
)
{
std
::
unique_ptr
<
arm_compute
::
IFunction
>
func
;
static
std
::
vector
<
OpTensor
*>
tensors
;
tensors
.
clear
();
for
(
int
i
=
0
;
i
<
num
;
++
i
)
{
tensors
.
push_back
(
dynamic_cast
<
OpTensor
*>
(
acl_op
->
cinput
(
i
).
get
()
->
tensor
()));
}
func
=
instantiate
<
OpType
,
OpTensor
,
std
::
vector
<
OpTensor
*>>
(
tensors
,
output
->
tensor
());
return
func
;
}
template
<
typename
OperatorType
,
typename
TensorType
,
typename
OperatorInfo
>
std
::
unique_ptr
<
arm_compute
::
IFunction
>
instantiate_function
(
arm_compute
::
ITensor
*
input
,
arm_compute
::
ITensor
*
output
,
const
OperatorInfo
&
info
)
{
auto
op
=
cpp14
::
make_unique
<
OperatorType
>
();
op
->
configure
(
dynamic_cast
<
TensorType
*>
(
input
),
dynamic_cast
<
TensorType
*>
(
output
),
info
);
return
std
::
move
(
op
);
}
template
<
typename
OperatorType
,
typename
TensorType
,
typename
OperatorInfo
>
std
::
unique_ptr
<
arm_compute
::
IFunction
>
instantiate
(
arm_compute
::
ITensor
*
input
,
arm_compute
::
ITensor
*
output
,
const
OperatorInfo
&
info
)
{
return
instantiate_function
<
OperatorType
,
TensorType
,
OperatorInfo
>
(
input
,
output
,
info
);
}
template
<
typename
OpType
,
typename
OpTensor
,
typename
OperatorInfo
>
std
::
unique_ptr
<
arm_compute
::
IFunction
>
instantiate_op_func
(
std
::
unique_ptr
<
ACLTensor
>
&
input
,
std
::
unique_ptr
<
ACLTensor
>
&
output
,
const
OperatorInfo
&
info
,
TargetHint
&
hint
)
{
std
::
unique_ptr
<
arm_compute
::
IFunction
>
func
;
func
=
instantiate
<
OpType
,
OpTensor
,
OperatorInfo
>
(
input
->
tensor
(),
output
->
tensor
(),
info
);
return
func
;
}
template
<
typename
OperatorType
,
typename
TensorType
,
typename
OperatorInfo
>
std
::
unique_ptr
<
arm_compute
::
IFunction
>
instantiate_function
(
arm_compute
::
ITensor
*
input
,
arm_compute
::
ITensor
*
weights
,
arm_compute
::
ITensor
*
biases
,
arm_compute
::
ITensor
*
output
,
const
OperatorInfo
&
info
)
{
auto
op
=
cpp14
::
make_unique
<
OperatorType
>
();
op
->
configure
(
dynamic_cast
<
TensorType
*>
(
input
),
dynamic_cast
<
TensorType
*>
(
weights
),
dynamic_cast
<
TensorType
*>
(
biases
),
dynamic_cast
<
TensorType
*>
(
output
),
info
);
return
std
::
move
(
op
);
}
template
<
typename
OperatorType
,
typename
TensorType
,
typename
OperatorInfo
>
std
::
unique_ptr
<
arm_compute
::
IFunction
>
instantiate
(
arm_compute
::
ITensor
*
input
,
arm_compute
::
ITensor
*
weights
,
arm_compute
::
ITensor
*
biases
,
arm_compute
::
ITensor
*
output
,
const
OperatorInfo
&
info
)
{
return
instantiate_function
<
OperatorType
,
TensorType
,
OperatorInfo
>
(
input
,
weights
,
biases
,
output
,
info
);
}
template
<
typename
OpType
,
typename
OpTensor
,
typename
OperatorInfo
,
typename
ACLTensor
>
std
::
unique_ptr
<
arm_compute
::
IFunction
>
instantiate_op_func
(
std
::
unique_ptr
<
ACLTensor
>
&
input
,
std
::
unique_ptr
<
ACLTensor
>
&
weights
,
std
::
unique_ptr
<
ACLTensor
>
&
biases
,
std
::
unique_ptr
<
ACLTensor
>
&
output
,
const
OperatorInfo
&
info
,
TargetHint
&
hint
)
{
std
::
unique_ptr
<
arm_compute
::
IFunction
>
func
;
arm_compute
::
ITensor
*
biases_tensor
=
NULL
;
if
(
biases
.
get
())
{
biases_tensor
=
biases
->
tensor
();
}
func
=
instantiate
<
OpType
,
OpTensor
,
OperatorInfo
>
(
input
->
tensor
(),
weights
->
tensor
(),
biases_tensor
,
output
->
tensor
(),
info
);
return
func
;
}
template
<
typename
Dtype
,
typename
OperatorType
,
typename
TensorType
>
std
::
unique_ptr
<
arm_compute
::
IFunction
>
instantiate_function
(
arm_compute
::
ITensor
*
input
,
arm_compute
::
ITensor
*
output
,
arm_compute
::
ITensor
*
mean
,
arm_compute
::
ITensor
*
var
,
arm_compute
::
ITensor
*
beta
,
arm_compute
::
ITensor
*
gamma
,
Dtype
&
eps
)
{
auto
op
=
cpp14
::
make_unique
<
OperatorType
>
();
op
->
configure
(
dynamic_cast
<
TensorType
*>
(
input
),
dynamic_cast
<
TensorType
*>
(
output
),
dynamic_cast
<
TensorType
*>
(
mean
),
dynamic_cast
<
TensorType
*>
(
var
),
dynamic_cast
<
TensorType
*>
(
beta
),
dynamic_cast
<
TensorType
*>
(
gamma
),
eps
);
return
std
::
move
(
op
);
}
template
<
typename
Dtype
,
typename
OperatorType
,
typename
TensorType
>
std
::
unique_ptr
<
arm_compute
::
IFunction
>
instantiate
(
arm_compute
::
ITensor
*
input
,
arm_compute
::
ITensor
*
output
,
arm_compute
::
ITensor
*
mean
,
arm_compute
::
ITensor
*
var
,
arm_compute
::
ITensor
*
beta
,
arm_compute
::
ITensor
*
gamma
,
Dtype
eps
)
{
return
instantiate_function
<
Dtype
,
OperatorType
,
TensorType
>
(
input
,
output
,
mean
,
var
,
beta
,
gamma
,
eps
);
}
template
<
typename
Dtype
,
typename
OpType
,
typename
OpTensor
>
std
::
unique_ptr
<
arm_compute
::
IFunction
>
instantiate_op_func
(
std
::
unique_ptr
<
ACLTensor
>
&
input
,
std
::
unique_ptr
<
ACLTensor
>
&
output
,
std
::
unique_ptr
<
ACLTensor
>
&
mean
,
std
::
unique_ptr
<
ACLTensor
>
&
var
,
std
::
unique_ptr
<
ACLTensor
>
&
beta
,
std
::
unique_ptr
<
ACLTensor
>
&
gamma
,
Dtype
eps
,
TargetHint
hint
)
{
std
::
unique_ptr
<
arm_compute
::
IFunction
>
func
;
func
=
instantiate
<
Dtype
,
OpType
,
OpTensor
>
(
input
->
tensor
(),
output
->
tensor
(),
mean
->
tensor
(),
var
->
tensor
(),
beta
->
tensor
(),
gamma
->
tensor
(),
eps
);
return
func
;
}
template
<
typename
OperatorInfo
>
bool
instantiate_op_pooling
(
ACLOperator
*
acl_op
,
std
::
vector
<
std
::
unique_ptr
<
arm_compute
::
IFunction
>>
&
func
,
std
::
unique_ptr
<
ACLTensor
>
&
input
,
std
::
unique_ptr
<
ACLTensor
>
&
output
,
TargetHint
hint
,
const
OperatorInfo
&
info
)
{
#ifdef USE_OPENCL
if
(
hint
==
TargetHint
::
OPENCL
)
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
CLPoolingLayer
,
arm_compute
::
ICLTensor
,
arm_compute
::
PoolingLayerInfo
>
(
input
,
output
,
info
,
hint
));
return
true
;
}
#elif defined(USE_OPENGLES)
if
(
hint
==
TargetHint
::
OPENGLES
)
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
GCPoolingLayer
,
arm_compute
::
IGCTensor
,
arm_compute
::
PoolingLayerInfo
>
(
input
,
output
,
info
,
hint
));
return
true
;
}
#endif
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
NEPoolingLayer
,
arm_compute
::
ITensor
,
arm_compute
::
PoolingLayerInfo
>
(
input
,
output
,
info
,
hint
));
}
return
true
;
}
template
<
typename
OperatorInfo
>
bool
instantiate_op_activation
(
ACLOperator
*
acl_op
,
std
::
vector
<
std
::
unique_ptr
<
arm_compute
::
IFunction
>>
&
func
,
std
::
unique_ptr
<
ACLTensor
>
&
input
,
std
::
unique_ptr
<
ACLTensor
>
&
output
,
TargetHint
hint
,
const
OperatorInfo
&
info
)
{
#ifdef USE_OPENCL
if
(
hint
==
TargetHint
::
OPENCL
)
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
CLActivationLayer
,
arm_compute
::
ICLTensor
,
arm_compute
::
ActivationLayerInfo
>
(
input
,
output
,
info
,
hint
));
return
true
;
}
#elif defined(USE_OPENGLES)
if
(
hint
==
TargetHint
::
OPENGLES
)
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
GCActivationLayer
,
arm_compute
::
IGCTensor
,
arm_compute
::
ActivationLayerInfo
>
(
input
,
output
,
info
,
hint
));
return
true
;
}
#endif
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
NEActivationLayer
,
arm_compute
::
ITensor
,
arm_compute
::
ActivationLayerInfo
>
(
input
,
output
,
info
,
hint
));
}
return
true
;
}
template
<
typename
OperatorInfo
>
bool
instantiate_op_lrn
(
ACLOperator
*
acl_op
,
std
::
vector
<
std
::
unique_ptr
<
arm_compute
::
IFunction
>>
&
func
,
std
::
unique_ptr
<
ACLTensor
>
&
input
,
std
::
unique_ptr
<
ACLTensor
>
&
output
,
TargetHint
hint
,
const
OperatorInfo
&
info
)
{
#ifdef USE_OPENCL
if
(
hint
==
TargetHint
::
OPENCL
)
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
CLNormalizationLayer
,
arm_compute
::
ICLTensor
,
arm_compute
::
NormalizationLayerInfo
>
(
input
,
output
,
info
,
hint
));
return
true
;
}
#elif defined(USE_OPENGLES)
if
(
hint
==
TargetHint
::
OPENGLES
)
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
GCNormalizationLayer
,
arm_compute
::
IGCTensor
,
arm_compute
::
NormalizationLayerInfo
>
(
input
,
output
,
info
,
hint
));
return
true
;
}
#endif
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
NENormalizationLayer
,
arm_compute
::
ITensor
,
arm_compute
::
NormalizationLayerInfo
>
(
input
,
output
,
info
,
hint
));
}
return
true
;
}
template
<
typename
OperatorInfo
>
bool
instantiate_op_conv
(
ACLOperator
*
acl_op
,
std
::
vector
<
std
::
unique_ptr
<
arm_compute
::
IFunction
>>
&
func
,
std
::
unique_ptr
<
ACLTensor
>
&
input
,
std
::
unique_ptr
<
ACLTensor
>
&
output
,
TargetHint
hint
,
const
OperatorInfo
&
info
)
{
std
::
unique_ptr
<
ACLTensor
>
&
weights
=
acl_op
->
weights
();
std
::
unique_ptr
<
ACLTensor
>
&
biases
=
acl_op
->
biases
();
ConvolutionMethodHint
&
conv_method
=
acl_op
->
getConvMethod
();
bool
has_biases
=
biases
.
get
()
?
true
:
false
;
int
&
groups
=
acl_op
->
group
();
arm_compute
::
TensorShape
input_shape
=
input
->
info
().
tensor_shape
();
arm_compute
::
TensorShape
weights_shape
=
weights
->
info
().
tensor_shape
();
arm_compute
::
TensorShape
biases_shape
;
if
(
has_biases
)
{
biases_shape
=
biases
->
info
().
tensor_shape
();
}
arm_compute
::
TensorShape
output_shape
=
output
->
info
().
tensor_shape
();
if
(
groups
==
1
)
{
if
(
conv_method
==
ConvolutionMethodHint
::
GEMM
)
{
#ifdef USE_OPENCL
if
(
hint
==
TargetHint
::
OPENCL
)
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
CLConvolutionLayer
,
arm_compute
::
ICLTensor
,
arm_compute
::
PadStrideInfo
>
(
acl_op
->
input
(),
acl_op
->
weights
(),
acl_op
->
biases
(),
acl_op
->
output
(),
info
,
hint
));
return
true
;
}
#elif defined(USE_OPENGLES)
if
(
hint
==
TargetHint
::
OPENGLES
)
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
GCConvolutionLayer
,
arm_compute
::
IGCTensor
,
arm_compute
::
PadStrideInfo
>
(
acl_op
->
input
(),
acl_op
->
weights
(),
acl_op
->
biases
(),
acl_op
->
output
(),
info
,
hint
));
return
true
;
}
#endif
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
NEConvolutionLayer
,
arm_compute
::
ITensor
,
arm_compute
::
PadStrideInfo
>
(
acl_op
->
input
(),
acl_op
->
weights
(),
acl_op
->
biases
(),
acl_op
->
output
(),
info
,
hint
));
}
}
else
{
#ifdef USE_OPENCL
if
(
hint
==
TargetHint
::
OPENCL
)
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
CLDirectConvolutionLayer
,
arm_compute
::
ICLTensor
,
arm_compute
::
PadStrideInfo
>
(
acl_op
->
input
(),
acl_op
->
weights
(),
acl_op
->
biases
(),
acl_op
->
output
(),
info
,
hint
));
return
true
;
}
#elif defined(USE_OPENGLES)
if
(
hint
==
TargetHint
::
OPENGLES
)
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
GCDirectConvolutionLayer
,
arm_compute
::
IGCTensor
,
arm_compute
::
PadStrideInfo
>
(
acl_op
->
input
(),
acl_op
->
weights
(),
acl_op
->
biases
(),
acl_op
->
output
(),
info
,
hint
));
return
true
;
}
#endif
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
NEDirectConvolutionLayer
,
arm_compute
::
ITensor
,
arm_compute
::
PadStrideInfo
>
(
acl_op
->
input
(),
acl_op
->
weights
(),
acl_op
->
biases
(),
acl_op
->
output
(),
info
,
hint
));
}
}
return
true
;
}
// Calculate sub-tensor splits
const
int
input_split
=
input_shape
.
z
()
/
groups
;
const
int
output_split
=
output_shape
.
z
()
/
groups
;
const
int
weights_split
=
weights_shape
[
3
]
/
groups
;
const
int
biases_split
=
biases_shape
.
x
()
/
groups
;
// Calculate sub-tensor shapes
input_shape
.
set
(
2
,
input_split
);
output_shape
.
set
(
2
,
output_split
);
weights_shape
.
set
(
3
,
weights_split
);
biases_shape
.
set
(
0
,
biases_split
);
for
(
auto
i
=
0
;
i
<
groups
;
++
i
)
{
// Calculate sub-tensors starting coordinates
arm_compute
::
Coordinates
input_coord
(
0
,
0
,
input_split
*
i
);
arm_compute
::
Coordinates
output_coord
(
0
,
0
,
output_split
*
i
);
arm_compute
::
Coordinates
weights_coord
(
0
,
0
,
0
,
weights_split
*
i
);
arm_compute
::
Coordinates
biases_coord
(
biases_split
*
i
);
// Create sub-tensors for input, output, weights and bias
acl_op
->
new_tensor
(
acl_op
->
sinput
(
i
),
acl_op
->
input
(),
input_shape
,
input_coord
);
acl_op
->
new_tensor
(
acl_op
->
soutput
(
i
),
acl_op
->
output
(),
output_shape
,
output_coord
);
acl_op
->
new_tensor
(
acl_op
->
sweights
(
i
),
acl_op
->
weights
(),
weights_shape
,
weights_coord
);
if
(
has_biases
)
{
acl_op
->
new_tensor
(
acl_op
->
sbiases
(
i
),
acl_op
->
biases
(),
biases_shape
,
biases_coord
);
}
bool
use_opencl
=
false
;
if
(
conv_method
==
ConvolutionMethodHint
::
GEMM
)
{
#ifdef USE_OPENCL
if
(
hint
==
TargetHint
::
OPENCL
)
{
use_opencl
=
true
;
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
CLConvolutionLayer
,
arm_compute
::
ICLTensor
,
arm_compute
::
PadStrideInfo
,
ACLSubTensor
>
(
acl_op
->
sinput
(
i
),
acl_op
->
sweights
(
i
),
acl_op
->
sbiases
(
i
),
acl_op
->
soutput
(
i
),
info
,
hint
));
}
#endif
if
(
!
use_opencl
)
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
NEConvolutionLayer
,
arm_compute
::
ITensor
,
arm_compute
::
PadStrideInfo
,
ACLSubTensor
>
(
acl_op
->
sinput
(
i
),
acl_op
->
sweights
(
i
),
acl_op
->
sbiases
(
i
),
acl_op
->
soutput
(
i
),
info
,
hint
));
}
}
else
{
#ifdef USE_OPENCL
if
(
hint
==
TargetHint
::
OPENCL
)
{
use_opencl
=
true
;
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
CLDirectConvolutionLayer
,
arm_compute
::
ICLTensor
,
arm_compute
::
PadStrideInfo
,
ACLSubTensor
>
(
acl_op
->
sinput
(
i
),
acl_op
->
sweights
(
i
),
acl_op
->
sbiases
(
i
),
acl_op
->
soutput
(
i
),
info
,
hint
));
}
#endif
if
(
!
use_opencl
)
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
NEDirectConvolutionLayer
,
arm_compute
::
ITensor
,
arm_compute
::
PadStrideInfo
,
ACLSubTensor
>
(
acl_op
->
sinput
(
i
),
acl_op
->
sweights
(
i
),
acl_op
->
sbiases
(
i
),
acl_op
->
soutput
(
i
),
info
,
hint
));
}
}
}
return
true
;
}
template
<
typename
OperatorInfo
>
bool
instantiate_op_lc
(
ACLOperator
*
acl_op
,
std
::
vector
<
std
::
unique_ptr
<
arm_compute
::
IFunction
>>
&
func
,
std
::
unique_ptr
<
ACLTensor
>
&
input
,
std
::
unique_ptr
<
ACLTensor
>
&
output
,
TargetHint
hint
,
const
OperatorInfo
&
info
)
{
std
::
unique_ptr
<
ACLTensor
>
&
weights
=
acl_op
->
weights
();
std
::
unique_ptr
<
ACLTensor
>
&
biases
=
acl_op
->
biases
();
#ifdef USE_OPENCL
if
(
hint
==
TargetHint
::
OPENCL
)
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
CLLocallyConnectedLayer
,
arm_compute
::
ICLTensor
,
arm_compute
::
PadStrideInfo
>
(
input
,
weights
,
biases
,
output
,
info
,
hint
));
return
true
;
}
#endif
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
NELocallyConnectedLayer
,
arm_compute
::
ITensor
,
arm_compute
::
PadStrideInfo
>
(
input
,
weights
,
biases
,
output
,
info
,
hint
));
}
return
true
;
}
template
<
typename
OperatorInfo
>
bool
instantiate_op_fc
(
ACLOperator
*
acl_op
,
std
::
vector
<
std
::
unique_ptr
<
arm_compute
::
IFunction
>>
&
func
,
std
::
unique_ptr
<
ACLTensor
>
&
input
,
std
::
unique_ptr
<
ACLTensor
>
&
output
,
TargetHint
hint
,
const
OperatorInfo
&
info
)
{
std
::
unique_ptr
<
ACLTensor
>
&
weights
=
acl_op
->
weights
();
std
::
unique_ptr
<
ACLTensor
>
&
biases
=
acl_op
->
biases
();
#ifdef USE_OPENCL
if
(
hint
==
TargetHint
::
OPENCL
)
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
CLFullyConnectedLayer
,
arm_compute
::
ICLTensor
,
bool
>
(
input
,
weights
,
biases
,
output
,
info
,
hint
));
return
true
;
}
#elif defined(USE_OPENGLES)
if
(
hint
==
TargetHint
::
OPENGLES
)
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
GCFullyConnectedLayer
,
arm_compute
::
IGCTensor
,
bool
>
(
input
,
weights
,
biases
,
output
,
info
,
hint
));
return
true
;
}
#endif
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
NEFullyConnectedLayer
,
arm_compute
::
ITensor
,
bool
>
(
input
,
weights
,
biases
,
output
,
info
,
hint
));
}
return
true
;
}
template
<
typename
Dtype
>
bool
instantiate_op_bn
(
ACLOperator
*
acl_op
,
std
::
vector
<
std
::
unique_ptr
<
arm_compute
::
IFunction
>>
&
func
,
std
::
unique_ptr
<
ACLTensor
>
&
input
,
std
::
unique_ptr
<
ACLTensor
>
&
output
,
TargetHint
hint
,
Dtype
eps
)
{
std
::
unique_ptr
<
ACLTensor
>
&
mean
=
acl_op
->
mean
();
std
::
unique_ptr
<
ACLTensor
>
&
var
=
acl_op
->
var
();
std
::
unique_ptr
<
ACLTensor
>
&
beta
=
acl_op
->
beta
();
std
::
unique_ptr
<
ACLTensor
>
&
gamma
=
acl_op
->
gamma
();
#ifdef USE_OPENCL
if
(
hint
==
TargetHint
::
OPENCL
)
{
func
.
push_back
(
instantiate_op_func
<
Dtype
,
arm_compute
::
CLBatchNormalizationLayer
,
arm_compute
::
ICLTensor
>
(
input
,
output
,
mean
,
var
,
beta
,
gamma
,
eps
,
hint
));
return
true
;
}
#elif defined(USE_OPENGLES)
if
(
hint
==
TargetHint
::
OPENGLES
)
{
func
.
push_back
(
instantiate_op_func
<
Dtype
,
arm_compute
::
GCBatchNormalizationLayer
,
arm_compute
::
IGCTensor
>
(
input
,
output
,
mean
,
var
,
beta
,
gamma
,
eps
,
hint
));
return
true
;
}
#endif
{
func
.
push_back
(
instantiate_op_func
<
Dtype
,
arm_compute
::
NEBatchNormalizationLayer
,
arm_compute
::
ITensor
>
(
input
,
output
,
mean
,
var
,
beta
,
gamma
,
eps
,
hint
));
}
return
true
;
}
inline
bool
instantiate_op_softmax
(
ACLOperator
*
acl_op
,
std
::
vector
<
std
::
unique_ptr
<
arm_compute
::
IFunction
>>
&
func
,
std
::
unique_ptr
<
ACLTensor
>
&
input
,
std
::
unique_ptr
<
ACLTensor
>
&
output
,
TargetHint
hint
,
void
*
data
)
{
#ifdef USE_OPENCL
if
(
hint
==
TargetHint
::
OPENCL
)
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
CLSoftmaxLayer
,
arm_compute
::
ICLTensor
>
(
input
,
output
,
hint
));
return
true
;
}
#elif defined(USE_OPENGLES)
if
(
hint
==
TargetHint
::
OPENGLES
)
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
GCSoftmaxLayer
,
arm_compute
::
IGCTensor
>
(
input
,
output
,
hint
));
return
true
;
}
#endif
{
func
.
push_back
(
instantiate_op_func
<
arm_compute
::
NESoftmaxLayer
,
arm_compute
::
ITensor
>
(
input
,
output
,
hint
));
}
return
true
;
}
inline
bool
instantiate_op_concat
(
ACLOperator
*
acl_op
,
std
::
vector
<
std
::
unique_ptr
<
arm_compute
::
IFunction
>>
&
func
,
std
::
unique_ptr
<
ACLTensor
>
&
input
,
std
::
unique_ptr
<
ACLTensor
>
&
output
,
TargetHint
hint
,
int
num
)
{
#ifdef USE_OPENCL
if
(
hint
==
TargetHint
::
OPENCL
)
{
func
.
push_back
(
instantiate_op_func_lists
<
arm_compute
::
CLDepthConcatenateLayer
,
arm_compute
::
ICLTensor
>
(
acl_op
,
output
,
num
,
hint
));
return
true
;
}
#elif defined(USE_OPENGLES)
if
(
hint
==
TargetHint
::
OPENGLES
)
{
func
.
push_back
(
instantiate_op_func_lists
<
arm_compute
::
GCDepthConcatenateLayer
,
arm_compute
::
IGCTensor
>
(
acl_op
,
output
,
num
,
hint
));
return
true
;
}
#endif
{
func
.
push_back
(
instantiate_op_func_lists
<
arm_compute
::
NEDepthConcatenateLayer
,
arm_compute
::
ITensor
>
(
acl_op
,
output
,
num
,
hint
));
}
return
true
;
}
template
<
typename
Dtype
>
void
*
InputdataPtr
(
ACLOperator
*
op
,
const
std
::
vector
<
framework
::
LoDTensor
*>
&
input_data
,
Dtype
type
,
int
index
=
-
1
)
{
if
(
index
==
-
1
)
index
=
0
;
return
(
void
*
)(
input_data
[
index
]
->
mutable_data
<
Dtype
>
());
}
template
<
typename
Dtype
>
void
acl_run
(
ACLOperator
*
op
,
const
std
::
vector
<
framework
::
LoDTensor
*>
&
in_data
,
void
*
out_data
,
Dtype
type
,
bool
multi_input_run
=
true
)
{
for
(
int
i
=
0
;
i
<
in_data
.
size
();
++
i
)
{
op
->
tensor_mem
(
op
->
cinput
(
i
),
InputdataPtr
(
op
,
in_data
,
type
,
i
));
}
op
->
acl_run
(
NULL
,
out_data
);
}
}
// namespace acl
}
// namespace operators
}
// namespace paddle_mobile
#ifdef USE_PROFILING
#define acl_configure(opname, acl_op, args...) \
{ \
set_operator_property(acl::operate_type_##opname, #opname); \
logtime_util log_time(ACL_CONFIG_INFO); \
instantiate_op_##opname(acl_op, acl_op->funcs(), acl_op->input(), \
acl_op->output(), acl_op->getTargetHint(), args); \
}
#else
#define acl_configure(opname, acl_op, args...) \
{ \
set_operator_property(acl::operate_type_##opname, #opname); \
instantiate_op_##opname(acl_op, acl_op->funcs(), acl_op->input(), \
acl_op->output(), acl_op->getTargetHint(), args); \
}
#endif
#define ACLOp_Ptr(a) dynamic_cast<ACLOperator *>(a)
#endif // USE_ACL
#endif // ACL_OPERATOR_H_
src/operators/kernel/mali/acl_tensor.cc
已删除
100755 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "acl_tensor.h"
namespace
paddle_mobile
{
namespace
operators
{
namespace
acl
{
#ifdef USE_ACL
template
<
typename
TensorType
>
std
::
unique_ptr
<
arm_compute
::
ITensor
>
initialise_tensor
(
arm_compute
::
TensorInfo
&
info
)
{
auto
tensor
=
cpp14
::
make_unique
<
TensorType
>
();
tensor
->
allocator
()
->
init
(
info
);
return
std
::
move
(
tensor
);
}
template
<
typename
TensorType
>
void
tensor_allocate
(
arm_compute
::
ITensor
&
tensor
)
{
auto
itensor
=
dynamic_cast
<
TensorType
*>
(
&
tensor
);
itensor
->
allocator
()
->
allocate
();
}
Tensor
::
Tensor
(
arm_compute
::
TensorInfo
&
info
)
noexcept
:
_target
(
TargetHint
::
DONT_CARE
),
_info
(
info
),
_tensor
(
nullptr
)
{}
Tensor
::
Tensor
(
Tensor
&&
src
)
noexcept
:
_target
(
src
.
_target
),
_info
(
std
::
move
(
src
.
_info
)),
_tensor
(
std
::
move
(
src
.
_tensor
))
{}
arm_compute
::
ITensor
*
Tensor
::
set_target
(
TargetHint
target
)
{
switch
(
target
)
{
#ifdef USE_OPENCL
case
TargetHint
::
OPENCL
:
_tensor
=
initialise_tensor
<
arm_compute
::
CLTensor
>
(
_info
);
break
;
#elif defined(USE_OPENGLES)
case
TargetHint
::
OPENGLES
:
_tensor
=
initialise_tensor
<
arm_compute
::
GCTensor
>
(
_info
);
break
;
#endif
case
TargetHint
::
NEON
:
_tensor
=
initialise_tensor
<
arm_compute
::
Tensor
>
(
_info
);
break
;
default:
break
;
}
_target
=
target
;
return
_tensor
.
get
();
}
void
Tensor
::
allocate
()
{
switch
(
_target
)
{
#ifdef USE_OPENCL
case
TargetHint
::
OPENCL
:
tensor_allocate
<
arm_compute
::
CLTensor
>
(
*
_tensor
);
break
;
#elif defined(USE_OPENGLES)
case
TargetHint
::
OPENGLES
:
tensor_allocate
<
arm_compute
::
GCTensor
>
(
*
_tensor
);
break
;
#endif
case
TargetHint
::
NEON
:
tensor_allocate
<
arm_compute
::
Tensor
>
(
*
_tensor
);
break
;
default:
break
;
}
}
void
Tensor
::
map
(
bool
blocking
)
{
#ifdef USE_OPENCL
if
(
_target
==
TargetHint
::
OPENCL
)
dynamic_cast
<
arm_compute
::
CLTensor
*>
(
tensor
())
->
map
(
blocking
);
#elif defined(USE_OPENGLES)
if
(
_target
==
TargetHint
::
OPENGLES
)
dynamic_cast
<
arm_compute
::
GCTensor
*>
(
tensor
())
->
map
(
blocking
);
#endif
}
void
Tensor
::
unmap
()
{
#ifdef USE_OPENCL
if
(
_target
==
TargetHint
::
OPENCL
)
dynamic_cast
<
arm_compute
::
CLTensor
*>
(
tensor
())
->
unmap
();
#elif defined(USE_OPENGLES)
if
(
_target
==
TargetHint
::
OPENGLES
)
dynamic_cast
<
arm_compute
::
GCTensor
*>
(
tensor
())
->
unmap
();
#endif
}
template
<
typename
SubTensorType
,
typename
ParentTensorType
>
std
::
unique_ptr
<
arm_compute
::
ITensor
>
initialise_subtensor
(
arm_compute
::
ITensor
*
parent
,
arm_compute
::
TensorShape
shape
,
arm_compute
::
Coordinates
coords
)
{
auto
ptensor
=
dynamic_cast
<
ParentTensorType
*>
(
parent
);
auto
subtensor
=
cpp14
::
make_unique
<
SubTensorType
>
(
ptensor
,
shape
,
coords
);
return
std
::
move
(
subtensor
);
}
SubTensor
::
SubTensor
(
Tensor
*
parent
,
arm_compute
::
TensorShape
&
tensor_shape
,
arm_compute
::
Coordinates
&
coords
)
noexcept
:
_target
(
TargetHint
::
DONT_CARE
),
_tensor_shape
(
tensor_shape
),
_coords
(
coords
),
_parent
(
nullptr
),
_subtensor
(
nullptr
)
{
_parent
=
parent
->
tensor
();
_target
=
parent
->
target
();
instantiate_subtensor
();
}
arm_compute
::
ITensor
*
SubTensor
::
set_target
(
TargetHint
target
)
{
return
(
target
==
_target
)
?
_subtensor
.
get
()
:
nullptr
;
}
arm_compute
::
ITensor
*
SubTensor
::
tensor
()
{
return
_subtensor
.
get
();
}
const
arm_compute
::
ITensor
*
SubTensor
::
tensor
()
const
{
return
_subtensor
.
get
();
}
TargetHint
SubTensor
::
target
()
const
{
return
_target
;
}
void
SubTensor
::
allocate
()
{
// NOP for sub-tensors
}
void
SubTensor
::
instantiate_subtensor
()
{
switch
(
_target
)
{
#ifdef USE_OPENCL
case
TargetHint
::
OPENCL
:
_subtensor
=
initialise_subtensor
<
arm_compute
::
CLSubTensor
,
arm_compute
::
ICLTensor
>
(
_parent
,
_tensor_shape
,
_coords
);
break
;
#endif
default:
case
TargetHint
::
NEON
:
_subtensor
=
initialise_subtensor
<
arm_compute
::
SubTensor
,
arm_compute
::
ITensor
>
(
_parent
,
_tensor_shape
,
_coords
);
break
;
}
}
#endif
}
// namespace acl
}
// namespace operators
}
// namespace paddle_mobile
src/operators/kernel/mali/acl_tensor.h
已删除
100755 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef ACL_TENSOR_H_
#define ACL_TENSOR_H_
#ifdef USE_ACL
#ifdef USE_OPENCL
#include "arm_compute/runtime/CL/CLSubTensor.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#elif defined(USE_OPENGLES)
#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
#endif
#include "arm_compute/runtime/SubTensor.h"
#include "arm_compute/runtime/Tensor.h"
#include <memory>
namespace
paddle_mobile
{
namespace
operators
{
namespace
acl
{
enum
class
TargetHint
{
DONT_CARE
,
OPENCL
,
OPENGLES
,
NEON
,
};
enum
class
ConvolutionMethodHint
{
GEMM
,
DIRECT
,
};
namespace
cpp14
{
template
<
class
T
>
struct
_Unique_if
{
typedef
std
::
unique_ptr
<
T
>
_Single_object
;
};
template
<
class
T
>
struct
_Unique_if
<
T
[]
>
{
typedef
std
::
unique_ptr
<
T
[]
>
_Unknown_bound
;
};
template
<
class
T
,
size_t
N
>
struct
_Unique_if
<
T
[
N
]
>
{
typedef
void
_Known_bound
;
};
template
<
class
T
,
class
...
Args
>
typename
_Unique_if
<
T
>::
_Single_object
make_unique
(
Args
&&
...
args
)
{
return
std
::
unique_ptr
<
T
>
(
new
T
(
std
::
forward
<
Args
>
(
args
)...));
}
template
<
class
T
>
typename
_Unique_if
<
T
>::
_Unknown_bound
make_unique
(
size_t
n
)
{
typedef
typename
std
::
remove_extent
<
T
>::
type
U
;
return
std
::
unique_ptr
<
T
>
(
new
U
[
n
]());
}
template
<
class
T
,
class
...
Args
>
typename
_Unique_if
<
T
>::
_Known_bound
make_unique
(
Args
&&
...);
}
// namespace cpp14
class
Tensor
{
public:
explicit
Tensor
(
arm_compute
::
TensorInfo
&
info
)
noexcept
;
virtual
~
Tensor
()
{}
Tensor
(
Tensor
&&
src
)
noexcept
;
void
set_info
(
arm_compute
::
TensorInfo
&&
info
)
{
_info
=
info
;
}
arm_compute
::
ITensor
*
set_target
(
TargetHint
target
);
const
arm_compute
::
TensorInfo
&
info
()
const
{
return
_info
;
}
arm_compute
::
ITensor
*
tensor
()
{
return
_tensor
.
get
();
}
void
allocate
();
void
init
()
{}
TargetHint
target
()
const
{
return
_target
;
}
virtual
void
map
(
bool
blocking
=
true
);
virtual
void
unmap
();
private:
TargetHint
_target
;
arm_compute
::
TensorInfo
_info
;
std
::
unique_ptr
<
arm_compute
::
ITensor
>
_tensor
;
};
class
SubTensor
{
public:
SubTensor
(
Tensor
*
parent
,
arm_compute
::
TensorShape
&
tensor_shape
,
arm_compute
::
Coordinates
&
coords
)
noexcept
;
~
SubTensor
()
{}
arm_compute
::
ITensor
*
tensor
();
const
arm_compute
::
ITensor
*
tensor
()
const
;
TargetHint
target
()
const
;
void
allocate
();
arm_compute
::
ITensor
*
set_target
(
TargetHint
target
);
private:
/** Instantiates a sub-tensor */
void
instantiate_subtensor
();
private:
/**< Target that this tensor is pinned on */
TargetHint
_target
;
/**< SubTensor shape */
arm_compute
::
TensorShape
_tensor_shape
;
/**< SubTensor Coordinates */
arm_compute
::
Coordinates
_coords
;
/**< Parent tensor */
arm_compute
::
ITensor
*
_parent
;
/**< SubTensor */
std
::
unique_ptr
<
arm_compute
::
ITensor
>
_subtensor
;
};
}
// namespace acl
}
// namespace operators
}
// namespace paddle_mobile
#endif
#endif // ACL_TENSOR_H_
src/operators/kernel/mali/batchnorm_kernel.cpp
已删除
100755 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef BATCHNORM_OP
#include "operators/kernel/batchnorm_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
DeviceType
,
typename
T
>
class
AclBatchNormOp
:
public
acl
::
ACLOperator
{
public:
AclBatchNormOp
()
{
this
->
force_bypass_acl_path_
=
bypass_acl_class_layer
&
FLAGS_ENABLE_ACL_BN
;
}
~
AclBatchNormOp
()
=
default
;
AclBatchNormOp
(
const
AclBatchNormOp
&
)
=
delete
;
AclBatchNormOp
&
operator
=
(
const
AclBatchNormOp
&
)
=
delete
;
AclBatchNormOp
(
AclBatchNormOp
&&
)
=
delete
;
AclBatchNormOp
&
operator
=
(
AclBatchNormOp
&&
)
=
delete
;
acl
::
AclParameters
&
getargs
()
{
return
args
;
}
void
InitAclLayer
(
const
BatchNormParam
<
DeviceType
>&
param
)
{
setTargetHint
(
acl
::
TargetHint
::
OPENCL
);
arm_compute
::
TensorShape
input_shape
(
args
.
in_cols
,
args
.
in_rows
,
args
.
in_depth
,
args
.
batch
);
arm_compute
::
TensorShape
output_shape
(
args
.
out_cols
,
args
.
out_rows
,
args
.
out_depth
,
args
.
out_num
);
if
(
is_operator_init_done
(
input_shape
))
return
;
set_operator_init_done
();
this
->
force_bypass_acl_path_
=
false
;
arm_compute
::
TensorShape
mean_shape
(
args
.
in_depth
);
arm_compute
::
TensorShape
var_shape
=
mean_shape
;
arm_compute
::
TensorShape
beta_shape
=
mean_shape
;
arm_compute
::
TensorShape
gamma_shape
=
mean_shape
;
//[width, height, IFM]
new_tensor
(
input
(),
input_shape
,
args
.
input_data
);
//[width, height, OFM]
new_tensor
(
output
(),
output_shape
,
args
.
output_data
);
new_tensor
(
mean
(),
mean_shape
,
args
.
mean_data
);
new_tensor
(
var
(),
var_shape
,
args
.
var_data
);
new_tensor
(
beta
(),
beta_shape
,
args
.
biases_data
);
new_tensor
(
gamma
(),
gamma_shape
,
args
.
weight_data
);
acl_configure
(
bn
,
this
,
args
.
epsilon
);
}
void
RunAcl
(
void
*
input
,
void
*
output
)
{
acl
::
ACLOperator
::
acl_run
(
input
,
output
);
}
bool
Bypass_acl
(
const
BatchNormParam
<
DeviceType
>&
param
)
{
bool
bypass_acl
=
false
;
AclParametersByContext
(
param
);
InitAclLayer
(
param
);
// for performance, more groups impact GPU performance
if
(
this
->
force_bypass_acl_path_
)
{
bypass_acl
=
true
;
}
return
bypass_acl
;
}
private:
void
AclParametersByContext
(
const
BatchNormParam
<
DeviceType
>&
param
)
{
const
Tensor
*
in_x
=
param
.
InputX
();
Tensor
*
out
=
param
.
OutputY
();
const
Tensor
*
scale
=
param
.
InputScale
();
const
Tensor
*
bias
=
param
.
InputBias
();
const
Tensor
*
saved_mean
=
param
.
InputMean
();
const
Tensor
*
saved_variance
=
param
.
InputVariance
();
const
T
*
input_data
=
in_x
->
data
<
T
>
();
T
*
output_data
=
out
->
mutable_data
<
T
>
();
const
T
*
weight_data
=
scale
->
data
<
T
>
();
const
T
*
bias_data
=
bias
->
data
<
T
>
();
const
T
*
mean_data
=
saved_mean
->
data
<
T
>
();
const
T
*
var_data
=
saved_variance
->
data
<
T
>
();
float
epsilon
=
param
.
Epsilon
();
args
.
input_data
=
(
void
*
)
input_data
;
args
.
output_data
=
(
void
*
)
output_data
;
// args.weight_data = (void*)weight_data;
// args.biases_data = (void*)bias_data;
args
.
mean_data
=
(
void
*
)
mean_data
;
args
.
var_data
=
(
void
*
)
var_data
;
args
.
epsilon
=
epsilon
;
args
.
dim
=
in_x
->
dims
().
size
();
args
.
batch
=
in_x
->
dims
()[
0
];
args
.
in_depth
=
in_x
->
dims
()[
1
];
args
.
in_rows
=
in_x
->
dims
()[
2
];
args
.
in_cols
=
in_x
->
dims
()[
3
];
args
.
out_num
=
out
->
dims
()[
0
];
args
.
out_depth
=
out
->
dims
()[
1
];
args
.
out_rows
=
out
->
dims
()[
2
];
args
.
out_cols
=
out
->
dims
()[
3
];
args
.
weight_data
=
(
void
*
)
weight_data
;
args
.
biases_data
=
(
void
*
)
bias_data
;
// std::cout
// << "Out C: " << args.out_depth
// << " H: " << args.out_rows << " W: " << args.out_cols << "\n";
}
acl
::
AclParameters
args
;
};
template
<
>
bool
BatchNormKernel
<
GPU_MALI
,
float
>::
Init
(
BatchNormParam
<
GPU_MALI
>*
param
)
{
AclBatchNormOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclBatchNormOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
acl_op
=
new
AclBatchNormOp
<
GPU_MALI
,
float
>
();
this
->
SetAclOp
((
void
*
)
acl_op
,
(
void
*
)
this
);
}
if
(
acl_op
->
Bypass_acl
(
*
param
))
{
std
::
cout
<<
"init acl failed"
<<
std
::
endl
;
return
false
;
}
return
true
;
}
template
<
>
void
BatchNormKernel
<
GPU_MALI
,
float
>::
Compute
(
const
BatchNormParam
<
GPU_MALI
>&
param
)
{
std
::
cout
<<
"init acl"
<<
std
::
endl
;
AclBatchNormOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclBatchNormOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
return
;
}
acl
::
AclParameters
&
args
=
acl_op
->
getargs
();
acl_op
->
RunAcl
(
args
.
input_data
,
args
.
output_data
);
}
template
class
BatchNormKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
#endif
src/operators/kernel/mali/concat_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef CONCAT_OP
#include "operators/kernel/concat_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
DeviceType
,
typename
T
>
class
AclConcatOp
:
public
acl
::
ACLOperator
{
public:
AclConcatOp
()
{
this
->
force_bypass_acl_path_
=
bypass_acl_class_layer
&
FLAGS_ENABLE_ACL_CONCAT
;
}
~
AclConcatOp
()
=
default
;
AclConcatOp
(
const
AclConcatOp
&
)
=
delete
;
AclConcatOp
&
operator
=
(
const
AclConcatOp
&
)
=
delete
;
AclConcatOp
(
AclConcatOp
&&
)
=
delete
;
AclConcatOp
&
operator
=
(
AclConcatOp
&&
)
=
delete
;
acl
::
AclParameters
&
getargs
()
{
return
args
;
}
void
InitAclLayer
(
const
ConcatParam
<
DeviceType
>&
param
)
{
setTargetHint
(
acl
::
TargetHint
::
OPENCL
);
const
std
::
vector
<
framework
::
LoDTensor
*>*
input_data
=
&
args
.
in_tensor
;
arm_compute
::
TensorShape
output_shape
(
args
.
out_cols
,
args
.
out_rows
,
args
.
out_depth
,
args
.
batch
);
if
(
is_operator_init_done
(
output_shape
))
return
;
set_operator_init_done
();
this
->
force_bypass_acl_path_
=
false
;
T
type
;
for
(
int
i
=
0
;
i
<
input_data
->
size
();
i
++
)
{
int
in_batch
=
(
*
input_data
)[
i
]
->
dims
()[
0
];
int
in_channels
=
(
*
input_data
)[
i
]
->
dims
()[
1
];
int
in_width
=
(
*
input_data
)[
i
]
->
dims
()[
2
];
int
in_height
=
(
*
input_data
)[
i
]
->
dims
()[
3
];
arm_compute
::
TensorShape
in_shape
(
in_width
,
in_height
,
in_channels
);
new_tensor
(
cinput
(
i
),
in_shape
,
acl
::
InputdataPtr
(
this
,
args
.
in_tensor
,
type
,
i
));
}
//[width, height, OFM]
new_tensor
(
output
(),
output_shape
,
args
.
output_data
);
acl_configure
(
concat
,
this
,
input_data
->
size
());
}
void
RunAcl
(
const
std
::
vector
<
framework
::
LoDTensor
*>&
input
,
void
*
output
)
{
T
type
;
acl
::
acl_run
(
this
,
input
,
output
,
type
);
}
bool
Bypass_acl
(
const
ConcatParam
<
DeviceType
>&
param
)
{
bool
bypass_acl
=
false
;
AclParametersByContext
(
param
);
InitAclLayer
(
param
);
// for performance, more groups impact GPU performance
if
(
this
->
force_bypass_acl_path_
||
!
args
.
is_channel_concat
)
{
bypass_acl
=
true
;
}
return
bypass_acl
;
}
private:
void
AclParametersByContext
(
const
ConcatParam
<
DeviceType
>&
param
)
{
auto
inputs
=
param
.
Inputs
();
auto
*
output
=
param
.
Out
();
int64_t
axis
=
param
.
Axis
();
T
*
output_data
=
output
->
mutable_data
<
T
>
();
args
.
is_channel_concat
=
(
axis
==
1
);
args
.
in_tensor
=
inputs
;
args
.
output_data
=
(
void
*
)
output_data
;
args
.
batch
=
output
->
dims
()[
0
];
args
.
out_depth
=
output
->
dims
()[
1
];
args
.
out_rows
=
output
->
dims
()[
2
];
args
.
out_cols
=
output
->
dims
()[
3
];
}
acl
::
AclParameters
args
;
};
template
<
>
bool
ConcatKernel
<
GPU_MALI
,
float
>::
Init
(
ConcatParam
<
GPU_MALI
>*
param
)
{
AclConcatOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclConcatOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
acl_op
=
new
AclConcatOp
<
GPU_MALI
,
float
>
();
this
->
SetAclOp
((
void
*
)
acl_op
,
(
void
*
)
this
);
}
if
(
acl_op
->
Bypass_acl
(
*
param
))
{
std
::
cout
<<
"init acl failed"
<<
std
::
endl
;
return
false
;
}
return
true
;
}
template
<
>
void
ConcatKernel
<
GPU_MALI
,
float
>::
Compute
(
const
ConcatParam
<
GPU_MALI
>&
param
)
{
std
::
cout
<<
"init acl"
<<
std
::
endl
;
AclConcatOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclConcatOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
return
;
}
acl
::
AclParameters
&
args
=
acl_op
->
getargs
();
acl_op
->
RunAcl
(
args
.
in_tensor
,
args
.
output_data
);
}
template
class
ConcatKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
#endif
src/operators/kernel/mali/conv_add_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADD_OP
#include "operators/kernel/conv_add_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
DeviceType
,
typename
T
>
class
AclConvAddOp
:
public
acl
::
ACLOperator
{
public:
AclConvAddOp
()
{
this
->
force_bypass_acl_path_
=
bypass_acl_class_layer
&
FLAGS_ENABLE_ACL_CONV
;
}
~
AclConvAddOp
()
=
default
;
AclConvAddOp
(
const
AclConvAddOp
&
)
=
delete
;
AclConvAddOp
&
operator
=
(
const
AclConvAddOp
&
)
=
delete
;
AclConvAddOp
(
AclConvAddOp
&&
)
=
delete
;
AclConvAddOp
&
operator
=
(
AclConvAddOp
&&
)
=
delete
;
acl
::
AclParameters
&
getargs
()
{
return
args
;
}
void
InitAclLayer
(
const
FusionConvAddParam
<
DeviceType
>&
param
)
{
setTargetHint
(
acl
::
TargetHint
::
OPENCL
);
arm_compute
::
TensorShape
input_shape
(
args
.
in_cols
,
args
.
in_rows
,
args
.
in_depth
,
args
.
batch
);
arm_compute
::
TensorShape
output_shape
(
args
.
out_cols
,
args
.
out_rows
,
args
.
out_depth
,
args
.
out_num
);
arm_compute
::
TensorShape
weights_shape
(
args
.
filter_cols
,
args
.
filter_rows
,
args
.
in_depth
/
args
.
num_group
,
args
.
out_depth
);
arm_compute
::
TensorShape
biases_shape
(
args
.
out_depth
);
arm_compute
::
PadStrideInfo
conv_info
(
args
.
stride_cols
,
args
.
stride_rows
,
args
.
pad_cols
,
args
.
pad_rows
,
arm_compute
::
DimensionRoundingType
::
FLOOR
);
if
(
is_operator_init_done
(
input_shape
))
return
;
set_operator_init_done
();
this
->
force_bypass_acl_path_
=
false
;
// check_direct_conv();
group
()
=
args
.
num_group
;
//[kernel_x, kernel_y, IFM, OFM]
new_tensor
(
weights
(),
weights_shape
,
args
.
weight_data
);
//[OFM]
if
(
args
.
biases_data
)
{
new_tensor
(
biases
(),
biases_shape
,
args
.
biases_data
);
}
//[width, height, IFM]
new_tensor
(
input
(),
input_shape
,
args
.
input_data
);
//[width, height, OFM]
new_tensor
(
output
(),
output_shape
,
args
.
output_data
);
acl_configure
(
conv
,
this
,
conv_info
);
}
void
RunAcl
(
void
*
input
,
void
*
output
)
{
acl
::
ACLOperator
::
acl_run
(
input
,
output
);
}
bool
Bypass_acl
(
const
FusionConvAddParam
<
DeviceType
>&
param
)
{
bool
bypass_acl
=
false
;
AclParametersByContext
(
param
);
InitAclLayer
(
param
);
// for performance, more groups impact GPU performance
if
(
this
->
force_bypass_acl_path_
||
args
.
num_group
>=
5
)
{
bypass_acl
=
true
;
}
if
(
args
.
dim
>
2
)
{
bypass_acl
=
true
;
}
if
(
args
.
dilated
)
{
bypass_acl
=
true
;
}
return
bypass_acl
;
}
private:
void
check_direct_conv
()
{
bool
use_direct_conv
=
false
;
const
char
*
pDirectConv
;
pDirectConv
=
getenv
(
"DIRECTCONV"
);
if
(
pDirectConv
)
{
unsigned
int
bdirectconv
;
sscanf
(
pDirectConv
,
"%i"
,
&
bdirectconv
);
if
(
bdirectconv
!=
use_direct_conv
)
{
use_direct_conv
=
bdirectconv
;
printf
(
"DIRECTCONV<%s>
\n
"
,
pDirectConv
);
printf
(
"DIRECTCONV: %x
\n
"
,
use_direct_conv
);
}
}
int
pad_data
[
2
],
kernel
[
2
];
pad_data
[
1
]
=
args
.
pad_rows
;
pad_data
[
0
]
=
args
.
pad_cols
;
kernel
[
1
]
=
args
.
filter_rows
;
kernel
[
0
]
=
args
.
filter_cols
;
if
(
use_direct_conv
&&
((
kernel
[
0
]
==
1
&&
kernel
[
1
]
==
1
&&
pad_data
[
0
]
==
0
&&
pad_data
[
1
]
==
0
)
||
(
kernel
[
0
]
==
3
&&
kernel
[
1
]
==
3
&&
pad_data
[
0
]
<=
1
&&
pad_data
[
1
]
<=
1
)))
{
setConvMethod
();
// NEDirectConvolutionLayer only for 1x1 and 3x3
}
}
void
AclParametersByContext
(
const
FusionConvAddParam
<
DeviceType
>&
param
)
{
const
Tensor
*
input
=
param
.
Input
();
Tensor
filter
=
*
param
.
Filter
();
Tensor
*
output
=
param
.
Output
();
Tensor
*
bias
;
int
groups
=
param
.
Groups
();
std
::
vector
<
int
>
strides
=
param
.
Strides
();
std
::
vector
<
int
>
paddings
=
param
.
Paddings
();
std
::
vector
<
int
>
dilations
=
param
.
Dilations
();
const
T
*
input_data
=
input
->
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
();
const
T
*
weight_data
=
filter
.
data
<
T
>
();
args
.
input_data
=
(
void
*
)
input_data
;
args
.
output_data
=
(
void
*
)
output_data
;
args
.
weight_data
=
(
void
*
)
weight_data
;
args
.
biases_data
=
nullptr
;
try
{
bias
=
param
.
Bias
();
}
catch
(
const
std
::
exception
&
e
)
{
}
if
(
bias
)
{
const
T
*
biases_data
=
bias
->
data
<
T
>
();
args
.
biases_data
=
(
void
*
)
biases_data
;
}
args
.
num_group
=
groups
;
args
.
dilation_rows
=
dilations
[
0
];
args
.
dilation_cols
=
dilations
[
1
];
if
(
dilations
[
0
]
!=
1
||
dilations
[
1
]
!=
1
)
{
args
.
dilated
=
true
;
}
// NCHW
// std::cout << "In dims: " << (input->dims()).size() << std::endl;
args
.
batch
=
input
->
dims
()[
0
];
args
.
in_depth
=
input
->
dims
()[
1
];
args
.
in_rows
=
input
->
dims
()[
2
];
args
.
in_cols
=
input
->
dims
()[
3
];
// std::cout <<"In N: " << args.batch << " C: " << args.in_depth
// << " H: " << args.in_rows << " W: " << args.in_cols << "\n";
// NCHW
// std::cout << "Out dims: " << (output->dims()).size() << std::endl;
args
.
out_num
=
output
->
dims
()[
0
];
args
.
out_depth
=
output
->
dims
()[
1
];
args
.
out_rows
=
output
->
dims
()[
2
];
args
.
out_cols
=
output
->
dims
()[
3
];
// std::cout <<"Out N: " << static_cast<int>(output->dims()[0])
// << " C: " << args.out_depth
// << " H: " << args.out_rows << " W: " << args.out_cols << "\n";
// MCHW = OIHW
args
.
filter_rows
=
filter
.
dims
()[
2
];
args
.
filter_cols
=
filter
.
dims
()[
3
];
// std::cout <<"Filter O: " << static_cast<int>(filter.dims()[0])
// << " I: " << static_cast<int>(filter.dims()[1])
// << " H: " << args.filter_rows << " W: " << args.filter_cols << "\n";
// strides(h_stride, w_stride)
args
.
stride_rows
=
strides
[
0
];
args
.
stride_cols
=
strides
[
1
];
// std::cout <<"Stride H: " << args.stride_rows << " W: " <<
// args.stride_cols << "\n";
// paddings(h_pad, w_pad)
args
.
pad_rows
=
paddings
[
0
];
args
.
pad_cols
=
paddings
[
1
];
// std::cout <<"Pad H: " << args.pad_rows << " W: " << args.pad_cols <<
// "\n";
}
acl
::
AclParameters
args
;
};
template
<
>
bool
ConvAddKernel
<
GPU_MALI
,
float
>::
Init
(
FusionConvAddParam
<
GPU_MALI
>*
param
)
{
AclConvAddOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclConvAddOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
acl_op
=
new
AclConvAddOp
<
GPU_MALI
,
float
>
();
this
->
SetAclOp
((
void
*
)
acl_op
,
(
void
*
)
this
);
}
if
(
acl_op
->
Bypass_acl
(
*
param
))
{
std
::
cout
<<
"init acl failed"
<<
std
::
endl
;
return
false
;
}
return
true
;
}
template
<
>
void
ConvAddKernel
<
GPU_MALI
,
float
>::
Compute
(
const
FusionConvAddParam
<
GPU_MALI
>&
param
)
{
std
::
cout
<<
"init acl"
<<
std
::
endl
;
AclConvAddOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclConvAddOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
return
;
}
acl
::
AclParameters
&
args
=
acl_op
->
getargs
();
acl_op
->
RunAcl
(
args
.
input_data
,
args
.
output_data
);
}
template
class
ConvAddKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
#endif
src/operators/kernel/mali/conv_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef CONV_OP
#include "operators/kernel/conv_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
DeviceType
,
typename
T
>
class
AclConvOp
:
public
acl
::
ACLOperator
{
public:
AclConvOp
()
{
this
->
force_bypass_acl_path_
=
bypass_acl_class_layer
&
FLAGS_ENABLE_ACL_CONV
;
}
~
AclConvOp
()
=
default
;
AclConvOp
(
const
AclConvOp
&
)
=
delete
;
AclConvOp
&
operator
=
(
const
AclConvOp
&
)
=
delete
;
AclConvOp
(
AclConvOp
&&
)
=
delete
;
AclConvOp
&
operator
=
(
AclConvOp
&&
)
=
delete
;
acl
::
AclParameters
&
getargs
()
{
return
args
;
}
void
InitAclLayer
(
const
ConvParam
<
DeviceType
>&
param
)
{
setTargetHint
(
acl
::
TargetHint
::
OPENCL
);
arm_compute
::
TensorShape
input_shape
(
args
.
in_cols
,
args
.
in_rows
,
args
.
in_depth
,
args
.
batch
);
arm_compute
::
TensorShape
output_shape
(
args
.
out_cols
,
args
.
out_rows
,
args
.
out_depth
,
args
.
out_num
);
arm_compute
::
TensorShape
weights_shape
(
args
.
filter_cols
,
args
.
filter_rows
,
args
.
in_depth
/
args
.
num_group
,
args
.
out_depth
);
// arm_compute::TensorShape biases_shape(args.out_depth);
arm_compute
::
PadStrideInfo
conv_info
(
args
.
stride_cols
,
args
.
stride_rows
,
args
.
pad_cols
,
args
.
pad_rows
,
arm_compute
::
DimensionRoundingType
::
FLOOR
);
if
(
is_operator_init_done
(
input_shape
))
return
;
set_operator_init_done
();
this
->
force_bypass_acl_path_
=
false
;
check_direct_conv
();
//[kernel_x, kernel_y, IFM, OFM]
new_tensor
(
weights
(),
weights_shape
,
args
.
weight_data
);
//[OFM]
// if (args.biases_data) {
// new_tensor(biases(),biases_shape,args.biases_data);
//}
group
()
=
args
.
num_group
;
//[width, height, IFM]
new_tensor
(
input
(),
input_shape
,
args
.
input_data
);
//[width, height, OFM]
new_tensor
(
output
(),
output_shape
,
args
.
output_data
);
acl_configure
(
conv
,
this
,
conv_info
);
}
void
RunAcl
(
void
*
input
,
void
*
output
)
{
acl
::
ACLOperator
::
acl_run
(
input
,
output
);
}
bool
Bypass_acl
(
const
ConvParam
<
DeviceType
>&
param
)
{
bool
bypass_acl
=
false
;
AclParametersByContext
(
param
);
InitAclLayer
(
param
);
// for performance, more groups impact GPU performance
if
(
this
->
force_bypass_acl_path_
||
args
.
num_group
>=
5
)
{
bypass_acl
=
true
;
}
if
(
args
.
dim
>
2
)
{
bypass_acl
=
true
;
}
if
(
args
.
dilated
)
{
bypass_acl
=
true
;
}
return
bypass_acl
;
}
private:
void
check_direct_conv
()
{
bool
use_direct_conv
=
false
;
const
char
*
pDirectConv
;
pDirectConv
=
getenv
(
"DIRECTCONV"
);
if
(
pDirectConv
)
{
unsigned
int
bdirectconv
;
sscanf
(
pDirectConv
,
"%i"
,
&
bdirectconv
);
if
(
bdirectconv
!=
use_direct_conv
)
{
use_direct_conv
=
bdirectconv
;
printf
(
"DIRECTCONV<%s>
\n
"
,
pDirectConv
);
printf
(
"DIRECTCONV: %x
\n
"
,
use_direct_conv
);
}
}
int
pad_data
[
2
],
kernel
[
2
];
pad_data
[
1
]
=
args
.
pad_rows
;
pad_data
[
0
]
=
args
.
pad_cols
;
kernel
[
1
]
=
args
.
filter_rows
;
kernel
[
0
]
=
args
.
filter_cols
;
if
(
use_direct_conv
&&
((
kernel
[
0
]
==
1
&&
kernel
[
1
]
==
1
&&
pad_data
[
0
]
==
0
&&
pad_data
[
1
]
==
0
)
||
(
kernel
[
0
]
==
3
&&
kernel
[
1
]
==
3
&&
pad_data
[
0
]
<=
1
&&
pad_data
[
1
]
<=
1
)))
{
setConvMethod
();
// NEDirectConvolutionLayer only for 1x1 and 3x3
}
}
void
AclParametersByContext
(
const
ConvParam
<
DeviceType
>&
param
)
{
const
Tensor
*
input
=
param
.
Input
();
Tensor
filter
=
*
param
.
Filter
();
Tensor
*
output
=
param
.
Output
();
int
groups
=
param
.
Groups
();
std
::
vector
<
int
>
strides
=
param
.
Strides
();
std
::
vector
<
int
>
paddings
=
param
.
Paddings
();
std
::
vector
<
int
>
dilations
=
param
.
Dilations
();
const
T
*
input_data
=
input
->
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
();
const
T
*
weight_data
=
filter
.
data
<
T
>
();
args
.
input_data
=
(
void
*
)
input_data
;
args
.
output_data
=
(
void
*
)
output_data
;
args
.
weight_data
=
(
void
*
)
weight_data
;
args
.
biases_data
=
nullptr
;
// try {
// bias = context.Input<framework::Tensor>("Bias");
// } catch (const std::exception& e) {
// }
// if (bias) {
// const T* biases_data = bias->data<T>();
// args.biases_data = (void*)biases_data;
// }
args
.
num_group
=
groups
;
args
.
dilation_rows
=
dilations
[
0
];
args
.
dilation_cols
=
dilations
[
1
];
if
(
dilations
[
0
]
!=
1
||
dilations
[
1
]
!=
1
)
{
args
.
dilated
=
true
;
}
// NCHW
// std::cout << "In dims: " << (input->dims()).size() << std::endl;
args
.
batch
=
input
->
dims
()[
0
];
args
.
in_depth
=
input
->
dims
()[
1
];
args
.
in_rows
=
input
->
dims
()[
2
];
args
.
in_cols
=
input
->
dims
()[
3
];
std
::
cout
<<
"In N: "
<<
args
.
batch
<<
" C: "
<<
args
.
in_depth
<<
" H: "
<<
args
.
in_rows
<<
" W: "
<<
args
.
in_cols
<<
"
\n
"
;
// NCHW
// std::cout << "Out dims: " << (output->dims()).size() << std::endl;
args
.
out_num
=
output
->
dims
()[
0
];
args
.
out_depth
=
output
->
dims
()[
1
];
args
.
out_rows
=
output
->
dims
()[
2
];
args
.
out_cols
=
output
->
dims
()[
3
];
// std::cout <<"Out N: " << static_cast<int>(output->dims()[0])
// << " C: " << args.out_depth
// << " H: " << args.out_rows << " W: " << args.out_cols << "\n";
// MCHW = OIHW
args
.
filter_rows
=
filter
.
dims
()[
2
];
args
.
filter_cols
=
filter
.
dims
()[
3
];
// std::cout <<"Filter O: " << static_cast<int>(filter.dims()[0])
// << " I: " << static_cast<int>(filter.dims()[1])
// << " H: " << args.filter_rows << " W: " << args.filter_cols << "\n";
// strides(h_stride, w_stride)
args
.
stride_rows
=
strides
[
0
];
args
.
stride_cols
=
strides
[
1
];
// std::cout <<"Stride H: " << args.stride_rows << " W: " <<
// args.stride_cols << "\n";
// paddings(h_pad, w_pad)
args
.
pad_rows
=
paddings
[
0
];
args
.
pad_cols
=
paddings
[
1
];
// std::cout <<"Pad H: " << args.pad_rows << " W: " << args.pad_cols <<
// "\n";
}
acl
::
AclParameters
args
;
};
template
<
>
bool
ConvKernel
<
GPU_MALI
,
float
>::
Init
(
ConvParam
<
GPU_MALI
>*
param
)
{
AclConvOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclConvOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
acl_op
=
new
AclConvOp
<
GPU_MALI
,
float
>
();
this
->
SetAclOp
((
void
*
)
acl_op
,
(
void
*
)
this
);
}
if
(
acl_op
->
Bypass_acl
(
*
param
))
{
std
::
cout
<<
"init acl failed"
<<
std
::
endl
;
return
false
;
}
return
true
;
}
template
<
>
void
ConvKernel
<
GPU_MALI
,
float
>::
Compute
(
const
ConvParam
<
GPU_MALI
>&
param
)
{
std
::
cout
<<
"init acl"
<<
std
::
endl
;
AclConvOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclConvOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
return
;
}
acl
::
AclParameters
&
args
=
acl_op
->
getargs
();
acl_op
->
RunAcl
(
args
.
input_data
,
args
.
output_data
);
}
template
class
ConvKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
#endif
src/operators/kernel/mali/elementwise_add_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISEADD_OP
#pragma once
#include "operators/kernel/elementwise_add_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
T
>
struct
AddFunctor
{
inline
T
operator
()(
T
a
,
T
b
)
const
{
return
a
+
b
;
}
};
template
<
>
bool
ElementwiseAddKernel
<
GPU_MALI
,
float
>::
Init
(
ElementwiseAddParam
<
GPU_MALI
>
*
param
)
{
return
true
;
}
template
<
>
void
ElementwiseAddKernel
<
GPU_MALI
,
float
>::
Compute
(
const
ElementwiseAddParam
<
GPU_MALI
>
&
param
)
{
const
Tensor
*
input_x
=
param
.
InputX
();
const
Tensor
*
input_y
=
param
.
InputY
();
Tensor
*
Out
=
param
.
Out
();
Out
->
mutable_data
<
float
>
();
int
axis
=
param
.
Axis
();
ElementwiseComputeEx
<
AddFunctor
<
float
>
,
float
>
(
input_x
,
input_y
,
axis
,
AddFunctor
<
float
>
(),
Out
);
}
template
class
ElementwiseAddKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/mali/feed_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/kernel/feed_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
FeedKernel
<
GPU_MALI
,
float
>::
Init
(
FeedParam
<
GPU_MALI
>
*
param
)
{
return
true
;
}
template
<
>
void
FeedKernel
<
GPU_MALI
,
float
>::
Compute
(
const
FeedParam
<
GPU_MALI
>
&
param
)
{
param
.
Out
()
->
ShareDataWith
(
*
(
param
.
InputX
()));
param
.
Out
()
->
set_lod
(
param
.
InputX
()
->
lod
());
}
template
class
FeedKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/mali/fetch_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADD_OP
#include "operators/kernel/fetch_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
FetchKernel
<
GPU_MALI
,
float
>::
Init
(
FetchParam
<
GPU_MALI
>
*
param
)
{
return
true
;
}
template
<
>
void
FetchKernel
<
GPU_MALI
,
float
>::
Compute
(
const
FetchParam
<
GPU_MALI
>
&
param
)
{
param
.
Out
()
->
ShareDataWith
(
*
(
param
.
InputX
()));
}
template
class
FetchKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/mali/fushion_fc_kernel.cpp
已删除
100755 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_FC_OP
#include "operators/kernel/fusion_fc_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
FusionFcKernel
<
GPU_MALI
,
float
>::
Init
(
FusionFcParam
<
GPU_MALI
>
*
param
)
{
return
true
;
}
template
<
>
void
FusionFcKernel
<
GPU_MALI
,
float
>::
Compute
(
const
FusionFcParam
<
GPU_MALI
>
&
param
)
{
const
Tensor
*
input_x
=
param
.
InputX
();
const
Tensor
*
input_y
=
param
.
InputY
();
const
Tensor
*
input_z
=
param
.
InputZ
();
auto
*
input_z_data
=
input_z
->
data
<
float
>
();
int
axis
=
param
.
Axis
();
Tensor
*
out
=
param
.
Out
();
auto
*
out_data
=
out
->
mutable_data
<
float
>
();
const
Tensor
x_matrix
=
input_x
->
dims
().
size
()
>
2
?
framework
::
ReshapeToMatrix
(
*
input_x
,
param
.
XNumColDims
())
:
*
input_x
;
const
Tensor
y_matrix
=
input_y
->
dims
().
size
()
>
2
?
framework
::
ReshapeToMatrix
(
*
input_y
,
param
.
YNumColDims
())
:
*
input_y
;
auto
out_dim
=
out
->
dims
();
if
(
out_dim
.
size
()
!=
2
)
{
out
->
Resize
({
x_matrix
.
dims
()[
0
],
y_matrix
.
dims
()[
1
]});
}
PADDLE_MOBILE_ENFORCE
(
out_dim
.
size
()
==
2
,
" out_dim.size must be 2."
);
PADDLE_MOBILE_ENFORCE
(
input_z
->
dims
().
size
()
==
1
,
"inpu_z size must be 1"
);
PADDLE_MOBILE_ENFORCE
(
out_dim
[
1
]
==
input_z
->
dims
()[
0
],
" out_dim.size must be 2."
);
axis
=
(
axis
==
-
1
?
out_dim
.
size
()
-
input_z
->
dims
().
size
()
:
axis
);
PADDLE_MOBILE_ENFORCE
(
axis
==
1
,
" to fit broadcast, axis = 1. "
)
int64_t
classes
=
input_z
->
numel
();
for
(
int
i
=
0
;
i
<
out_dim
[
0
];
i
++
)
{
memory
::
Copy
(
out_data
+
i
*
classes
,
input_z_data
,
sizeof
(
float
)
*
classes
);
}
for
(
int
i
=
0
;
i
<
out
->
numel
();
i
++
)
{
DLOG
<<
out_data
[
i
];
}
math
::
MatMul
<
float
>
(
x_matrix
,
false
,
y_matrix
,
false
,
static_cast
<
float
>
(
1
),
out
,
static_cast
<
float
>
(
1
));
PADDLE_MOBILE_ENFORCE
(
out_dim
.
size
()
==
2
,
" out_dim.size must be 2."
);
// if (out_dim.size() != 2) {
// out->Resize(out_dim);
// }
}
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/mali/lrn_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef LRN_OP
#pragma once
#include "operators/kernel/lrn_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/kernel/central-arm-func/lrn_arm_func.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
DeviceType
,
typename
T
>
class
AclLrnOp
:
public
acl
::
ACLOperator
{
public:
AclLrnOp
()
{
this
->
force_bypass_acl_path_
=
bypass_acl_class_layer
&
FLAGS_ENABLE_ACL_LRN
;
}
~
AclLrnOp
()
=
default
;
AclLrnOp
(
const
AclLrnOp
&
)
=
delete
;
AclLrnOp
&
operator
=
(
const
AclLrnOp
&
)
=
delete
;
AclLrnOp
(
AclLrnOp
&&
)
=
delete
;
AclLrnOp
&
operator
=
(
AclLrnOp
&&
)
=
delete
;
acl
::
AclParameters
&
getargs
()
{
return
args
;
}
void
InitAclLayer
(
const
LrnParam
<
DeviceType
>&
param
)
{
setTargetHint
(
acl
::
TargetHint
::
OPENCL
);
arm_compute
::
TensorShape
shape
(
args
.
in_cols
,
args
.
in_rows
,
args
.
in_depth
);
if
(
is_operator_init_done
(
shape
))
return
;
set_operator_init_done
();
this
->
force_bypass_acl_path_
=
false
;
arm_compute
::
NormalizationLayerInfo
norm_info
(
arm_compute
::
NormType
::
CROSS_MAP
,
args
.
nsize
,
args
.
alpha
,
args
.
beta
,
args
.
knorm
);
//[width, height, IFM]
new_tensor
(
input
(),
shape
,
args
.
input_data
);
//[width, height, OFM]
new_tensor
(
output
(),
shape
,
args
.
output_data
);
acl_configure
(
lrn
,
this
,
norm_info
);
}
void
Set_bypass
(
bool
bypass
)
{
args
.
is_bypass
=
bypass
;
}
void
RunAcl
(
void
*
input
,
void
*
output
)
{
acl
::
ACLOperator
::
acl_run
(
input
,
output
);
}
bool
Bypass_acl
(
const
LrnParam
<
DeviceType
>&
param
)
{
bool
bypass_acl
=
false
;
AclParametersByContext
(
param
);
InitAclLayer
(
param
);
// for performance, more groups impact GPU performance
if
(
this
->
force_bypass_acl_path_
)
{
bypass_acl
=
true
;
}
return
bypass_acl
;
}
private:
void
AclParametersByContext
(
const
LrnParam
<
DeviceType
>&
param
)
{
const
Tensor
*
in_x
=
param
.
InputX
();
Tensor
*
out
=
param
.
Out
();
int
n
=
param
.
N
();
T
alpha
=
param
.
Alpha
();
T
beta
=
param
.
Beta
();
T
k
=
param
.
K
();
const
T
*
input_data
=
in_x
->
data
<
T
>
();
T
*
output_data
=
out
->
mutable_data
<
T
>
();
args
.
input_data
=
(
void
*
)
input_data
;
args
.
output_data
=
(
void
*
)
output_data
;
args
.
nsize
=
n
;
args
.
alpha
=
alpha
;
args
.
beta
=
beta
;
args
.
knorm
=
k
;
// NCHW
args
.
batch
=
in_x
->
dims
()[
0
];
args
.
in_depth
=
in_x
->
dims
()[
1
];
args
.
in_rows
=
in_x
->
dims
()[
2
];
args
.
in_cols
=
in_x
->
dims
()[
3
];
// std::cout
// << "Out C: " << args.out_depth
// << " H: " << args.out_rows << " W: " << args.out_cols << "\n";
}
acl
::
AclParameters
args
;
};
template
<
>
bool
LrnKernel
<
GPU_MALI
,
float
>::
Init
(
LrnParam
<
GPU_MALI
>*
param
)
{
AclLrnOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclLrnOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
acl_op
=
new
AclLrnOp
<
GPU_MALI
,
float
>
();
this
->
SetAclOp
((
void
*
)
acl_op
,
(
void
*
)
this
);
}
if
(
acl_op
->
Bypass_acl
(
*
param
))
{
acl_op
->
Set_bypass
(
true
);
std
::
cout
<<
"init acl failed"
<<
std
::
endl
;
return
true
;
}
return
true
;
}
template
<
>
void
LrnKernel
<
GPU_MALI
,
float
>::
Compute
(
const
LrnParam
<
GPU_MALI
>&
param
)
{
std
::
cout
<<
"init acl"
<<
std
::
endl
;
AclLrnOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclLrnOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
return
;
}
acl
::
AclParameters
&
args
=
acl_op
->
getargs
();
if
(
args
.
is_bypass
)
{
std
::
cout
<<
"bypass op"
<<
std
::
endl
;
LrnCompute
<
float
>
(
param
);
return
;
}
const
float
*
input_data
=
(
const
float
*
)
args
.
input_data
;
const
float
*
output_data
=
(
const
float
*
)
args
.
output_data
;
for
(
int
n
=
0
;
n
<
args
.
batch
;
++
n
)
{
acl_op
->
RunAcl
((
void
*
)
input_data
,
(
void
*
)
output_data
);
input_data
+=
args
.
in_depth
*
args
.
in_cols
*
args
.
in_rows
;
output_data
+=
args
.
in_depth
*
args
.
in_cols
*
args
.
in_rows
;
}
}
template
class
LrnKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
#endif
src/operators/kernel/mali/mul_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef MUL_OP
#pragma once
#include "operators/kernel/mul_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
MulKernel
<
GPU_MALI
,
float
>::
Init
(
MulParam
<
GPU_MALI
>
*
param
)
{
return
true
;
}
template
<
>
void
MulKernel
<
GPU_MALI
,
float
>::
Compute
(
const
MulParam
<
GPU_MALI
>
&
param
)
{
const
Tensor
*
input_x
=
param
.
InputX
();
const
Tensor
*
input_y
=
param
.
InputY
();
Tensor
*
out
=
param
.
Out
();
out
->
mutable_data
<
float
>
();
const
Tensor
x_matrix
=
input_x
->
dims
().
size
()
>
2
?
framework
::
ReshapeToMatrix
(
*
input_x
,
param
.
XNumColDims
())
:
*
input_x
;
const
Tensor
y_matrix
=
input_y
->
dims
().
size
()
>
2
?
framework
::
ReshapeToMatrix
(
*
input_y
,
param
.
YNumColDims
())
:
*
input_y
;
auto
out_dim
=
out
->
dims
();
if
(
out_dim
.
size
()
!=
2
)
{
out
->
Resize
({
x_matrix
.
dims
()[
0
],
y_matrix
.
dims
()[
1
]});
}
math
::
MatMul
<
float
>
(
x_matrix
,
false
,
y_matrix
,
false
,
static_cast
<
float
>
(
1
),
out
,
static_cast
<
float
>
(
0
));
if
(
out_dim
.
size
()
!=
2
)
{
out
->
Resize
(
out_dim
);
}
}
template
class
MulKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/mali/pool_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef POOL_OP
#pragma once
#include "operators/kernel/pool_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
DeviceType
,
typename
T
>
class
AclPoolOp
:
public
acl
::
ACLOperator
{
public:
AclPoolOp
()
{
this
->
force_bypass_acl_path_
=
bypass_acl_class_layer
&
FLAGS_ENABLE_ACL_POOLING
;
}
~
AclPoolOp
()
=
default
;
AclPoolOp
(
const
AclPoolOp
&
)
=
delete
;
AclPoolOp
&
operator
=
(
const
AclPoolOp
&
)
=
delete
;
AclPoolOp
(
AclPoolOp
&&
)
=
delete
;
AclPoolOp
&
operator
=
(
AclPoolOp
&&
)
=
delete
;
acl
::
AclParameters
&
getargs
()
{
return
args
;
}
void
InitAclLayer
(
const
PoolParam
<
DeviceType
>&
param
)
{
setTargetHint
(
acl
::
TargetHint
::
OPENCL
);
arm_compute
::
TensorShape
input_shape
(
args
.
in_cols
,
args
.
in_rows
,
args
.
in_depth
);
arm_compute
::
TensorShape
output_shape
(
args
.
out_cols
,
args
.
out_rows
,
args
.
out_depth
);
// arm_compute::TensorShape weights_shape(
// args.filter_cols, args.filter_rows, args.in_depth, args.out_depth);
// arm_compute::TensorShape biases_shape(args.out_depth);
arm_compute
::
PoolingLayerInfo
pool_info
;
if
(
args
.
pool_type
==
"max"
)
{
pool_info
=
arm_compute
::
PoolingLayerInfo
(
arm_compute
::
PoolingType
::
MAX
,
args
.
filter_rows
,
arm_compute
::
PadStrideInfo
(
args
.
stride_cols
,
args
.
stride_rows
,
args
.
pad_cols
,
args
.
pad_rows
,
arm_compute
::
DimensionRoundingType
::
CEIL
));
}
else
{
pool_info
=
arm_compute
::
PoolingLayerInfo
(
arm_compute
::
PoolingType
::
AVG
,
args
.
filter_rows
,
arm_compute
::
PadStrideInfo
(
args
.
stride_cols
,
args
.
stride_rows
,
args
.
pad_cols
,
args
.
pad_rows
,
arm_compute
::
DimensionRoundingType
::
CEIL
));
}
if
(
is_operator_init_done
(
input_shape
))
return
;
set_operator_init_done
();
this
->
force_bypass_acl_path_
=
false
;
//[width, height, IFM]
new_tensor
(
input
(),
input_shape
,
args
.
input_data
);
//[width, height, OFM]
new_tensor
(
output
(),
output_shape
,
args
.
output_data
);
acl_configure
(
pooling
,
this
,
pool_info
);
}
void
RunAcl
(
void
*
input
,
void
*
output
)
{
acl
::
ACLOperator
::
acl_run
(
input
,
output
);
}
bool
Bypass_acl
(
const
PoolParam
<
DeviceType
>&
param
)
{
bool
bypass_acl
=
false
;
AclParametersByContext
(
param
);
InitAclLayer
(
param
);
// for performance, more groups impact GPU performance
if
(
this
->
force_bypass_acl_path_
)
{
bypass_acl
=
true
;
}
if
(
args
.
pool_type
!=
"max"
&&
args
.
pool_type
!=
"avg"
)
{
bypass_acl
=
true
;
}
if
(
args
.
filter_rows
!=
args
.
filter_cols
)
{
bypass_acl
=
true
;
}
// if (args.filter_rows!=2 && args.filter_rows!=3) {
// bypass_acl = true;
// }
return
bypass_acl
;
}
private:
void
AclParametersByContext
(
const
PoolParam
<
DeviceType
>&
param
)
{
const
Tensor
*
in_x
=
param
.
Input
();
Tensor
*
out
=
param
.
Output
();
std
::
string
pooling_type
=
param
.
PoolingType
();
std
::
vector
<
int
>
ksize
=
param
.
Ksize
();
std
::
vector
<
int
>
strides
=
param
.
Strides
();
std
::
vector
<
int
>
paddings
=
param
.
Paddings
();
bool
is_global_pooling
=
param
.
isGlobalPooling
();
const
T
*
input_data
=
in_x
->
data
<
T
>
();
T
*
output_data
=
out
->
mutable_data
<
T
>
();
args
.
input_data
=
(
void
*
)
input_data
;
args
.
output_data
=
(
void
*
)
output_data
;
args
.
is_global_pool
=
is_global_pooling
;
args
.
pool_type
=
pooling_type
;
args
.
filter_rows
=
ksize
[
0
];
args
.
filter_cols
=
ksize
[
1
];
args
.
dim
=
ksize
.
size
();
// NCHW
args
.
batch
=
in_x
->
dims
()[
0
];
args
.
in_depth
=
in_x
->
dims
()[
1
];
args
.
in_rows
=
in_x
->
dims
()[
2
];
args
.
in_cols
=
in_x
->
dims
()[
3
];
// std::cout <<"In N: " << args.batch << " C: " << args.in_depth
// << " H: " << args.in_rows << " W: " << args.in_cols << "\n";
// NCHW
// std::cout <<"Out N: " << static_cast<int>(output->dims()[0])
// << " C: " << args.out_depth
// << " H: " << args.out_rows << " W: " << args.out_cols << "\n";
// MCHW = OIHW
// std::cout <<"Filter O: " << static_cast<int>(filter->dims()[0])
// << " I: " << static_cast<int>(filter->dims()[1])
// << " H: " << args.filter_rows << " W: " << args.filter_cols << "\n";
// strides(h_stride, w_stride)
args
.
stride_rows
=
strides
[
0
];
args
.
stride_cols
=
strides
[
1
];
// std::cout <<"PoolingType: " << args.pool_type << "\n";
// std::cout <<"Stride H: " << args.stride_rows << " W: " <<
// args.stride_cols << "\n";
// paddings(h_pad, w_pad)
args
.
pad_rows
=
paddings
[
0
];
args
.
pad_cols
=
paddings
[
1
];
// std::cout <<"Pad H: " << args.pad_rows << " W: " << args.pad_cols <<
// "\n";
args
.
out_depth
=
args
.
in_depth
;
// args.out_rows = out->dims()[2];
// args.out_cols = out->dims()[3];
args
.
out_rows
=
static_cast
<
int
>
(
ceil
(
static_cast
<
float
>
(
args
.
in_rows
+
2
*
args
.
pad_rows
-
args
.
filter_rows
)
/
args
.
stride_rows
))
+
1
;
args
.
out_cols
=
static_cast
<
int
>
(
ceil
(
static_cast
<
float
>
(
args
.
in_cols
+
2
*
args
.
pad_cols
-
args
.
filter_cols
)
/
args
.
stride_cols
))
+
1
;
if
(
is_global_pooling
)
{
args
.
filter_rows
=
args
.
in_rows
;
args
.
filter_cols
=
args
.
in_cols
;
args
.
pad_rows
=
0
;
args
.
pad_cols
=
0
;
}
}
acl
::
AclParameters
args
;
};
template
<
>
bool
PoolKernel
<
GPU_MALI
,
float
>::
Init
(
PoolParam
<
GPU_MALI
>*
param
)
{
AclPoolOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclPoolOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
acl_op
=
new
AclPoolOp
<
GPU_MALI
,
float
>
();
this
->
SetAclOp
((
void
*
)
acl_op
,
(
void
*
)
this
);
}
if
(
acl_op
->
Bypass_acl
(
*
param
))
{
std
::
cout
<<
"init acl failed"
<<
std
::
endl
;
return
false
;
}
return
true
;
}
template
<
>
void
PoolKernel
<
GPU_MALI
,
float
>::
Compute
(
const
PoolParam
<
GPU_MALI
>&
param
)
{
std
::
cout
<<
"init acl"
<<
std
::
endl
;
AclPoolOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclPoolOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
return
;
}
acl
::
AclParameters
&
args
=
acl_op
->
getargs
();
const
float
*
input_data
=
(
const
float
*
)
args
.
input_data
;
const
float
*
output_data
=
(
const
float
*
)
args
.
output_data
;
for
(
int
n
=
0
;
n
<
args
.
batch
;
++
n
)
{
acl_op
->
RunAcl
((
void
*
)
input_data
,
(
void
*
)
output_data
);
input_data
+=
args
.
in_depth
*
args
.
in_cols
*
args
.
in_rows
;
output_data
+=
args
.
in_depth
*
args
.
out_cols
*
args
.
out_rows
;
}
}
template
class
PoolKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
#endif
src/operators/kernel/mali/relu_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef RELU_OP
#pragma once
#include "operators/kernel/relu_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
DeviceType
,
typename
T
>
class
AclReluOp
:
public
acl
::
ACLOperator
{
public:
AclReluOp
()
{
this
->
force_bypass_acl_path_
=
bypass_acl_class_layer
&
FLAGS_ENABLE_ACL_RELU
;
}
~
AclReluOp
()
=
default
;
AclReluOp
(
const
AclReluOp
&
)
=
delete
;
AclReluOp
&
operator
=
(
const
AclReluOp
&
)
=
delete
;
AclReluOp
(
AclReluOp
&&
)
=
delete
;
AclReluOp
&
operator
=
(
AclReluOp
&&
)
=
delete
;
acl
::
AclParameters
&
getargs
()
{
return
args
;
}
void
InitAclLayer
(
const
ReluParam
<
DeviceType
>&
param
)
{
setTargetHint
(
acl
::
TargetHint
::
OPENCL
);
arm_compute
::
TensorShape
input_shape
(
args
.
in_cols
,
args
.
in_rows
,
args
.
in_depth
,
args
.
batch
);
arm_compute
::
TensorShape
output_shape
(
args
.
in_cols
,
args
.
in_rows
,
args
.
in_depth
,
args
.
out_num
);
// arm_compute::TensorShape weights_shape(
// args.filter_cols, args.filter_rows, args.in_depth, args.out_depth);
// arm_compute::TensorShape biases_shape(args.out_depth);
arm_compute
::
ActivationLayerInfo
::
ActivationFunction
type
;
type
=
arm_compute
::
ActivationLayerInfo
::
ActivationFunction
::
RELU
;
arm_compute
::
ActivationLayerInfo
act_info
(
type
);
if
(
is_operator_init_done
(
input_shape
))
return
;
set_operator_init_done
();
this
->
force_bypass_acl_path_
=
false
;
//[width, height, IFM]
new_tensor
(
input
(),
input_shape
,
args
.
input_data
);
//[width, height, OFM]
new_tensor
(
output
(),
output_shape
,
args
.
output_data
);
acl_configure
(
activation
,
this
,
act_info
);
}
void
RunAcl
(
void
*
input
,
void
*
output
)
{
acl
::
ACLOperator
::
acl_run
(
input
,
output
);
}
bool
Bypass_acl
(
const
ReluParam
<
DeviceType
>&
param
)
{
bool
bypass_acl
=
false
;
AclParametersByContext
(
param
);
InitAclLayer
(
param
);
// for performance, more groups impact GPU performance
if
(
this
->
force_bypass_acl_path_
)
{
bypass_acl
=
true
;
}
return
bypass_acl
;
}
private:
void
AclParametersByContext
(
const
ReluParam
<
DeviceType
>&
param
)
{
const
auto
*
input_x
=
param
.
InputX
();
auto
*
out
=
param
.
Out
();
const
T
*
input_data
=
input_x
->
data
<
T
>
();
T
*
output_data
=
out
->
mutable_data
<
T
>
();
args
.
input_data
=
(
void
*
)
input_data
;
args
.
output_data
=
(
void
*
)
output_data
;
args
.
batch
=
input_x
->
dims
()[
0
];
args
.
in_depth
=
input_x
->
dims
()[
1
];
args
.
in_rows
=
input_x
->
dims
()[
2
];
args
.
in_cols
=
input_x
->
dims
()[
3
];
args
.
out_num
=
out
->
dims
()[
0
];
}
acl
::
AclParameters
args
;
};
template
<
>
bool
ReluKernel
<
GPU_MALI
,
float
>::
Init
(
ReluParam
<
GPU_MALI
>*
param
)
{
AclReluOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclReluOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
acl_op
=
new
AclReluOp
<
GPU_MALI
,
float
>
();
this
->
SetAclOp
((
void
*
)
acl_op
,
(
void
*
)
this
);
}
if
(
acl_op
->
Bypass_acl
(
*
param
))
{
std
::
cout
<<
"init acl failed"
<<
std
::
endl
;
return
false
;
}
return
true
;
}
template
<
>
void
ReluKernel
<
GPU_MALI
,
float
>::
Compute
(
const
ReluParam
<
GPU_MALI
>&
param
)
{
std
::
cout
<<
"init acl"
<<
std
::
endl
;
AclReluOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclReluOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
return
;
}
acl
::
AclParameters
&
args
=
acl_op
->
getargs
();
acl_op
->
RunAcl
(
args
.
input_data
,
args
.
output_data
);
}
template
class
ReluKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
#endif
src/operators/kernel/mali/reshape_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef RESHAPE_OP
#pragma once
#include "operators/kernel/reshape_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
ReshapeKernel
<
GPU_MALI
,
float
>::
Init
(
ReshapeParam
<
GPU_MALI
>
*
param
)
{
return
true
;
}
template
<
>
void
ReshapeKernel
<
GPU_MALI
,
float
>::
Compute
(
const
ReshapeParam
<
GPU_MALI
>
&
param
)
{
const
auto
*
input_x
=
param
.
InputX
();
const
auto
&
input_x_dims
=
input_x
->
dims
();
auto
*
out
=
param
.
Out
();
framework
::
DDim
out_dims
=
out
->
dims
();
const
auto
*
input_shape
=
param
.
InputShape
();
if
(
input_shape
)
{
auto
*
shape_data
=
input_shape
->
data
<
int
>
();
framework
::
Tensor
cpu_shape_tensor
;
auto
shape
=
std
::
vector
<
int
>
(
shape_data
,
shape_data
+
input_shape
->
numel
());
out_dims
=
ValidateShape
(
shape
,
input_x
->
dims
());
}
bool
inplace
=
param
.
Inplace
();
out
->
Resize
(
out_dims
);
if
(
!
inplace
)
{
out
->
mutable_data
<
float
>
();
framework
::
TensorCopy
(
*
input_x
,
out
);
out
->
Resize
(
out_dims
);
}
else
{
out
->
ShareDataWith
(
*
input_x
);
out
->
Resize
(
out_dims
);
}
}
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/mali/softmax_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SOFTMAX_OP
#pragma once
#include "operators/kernel/softmax_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
DeviceType
,
typename
T
>
class
AclSoftmaxOp
:
public
acl
::
ACLOperator
{
public:
AclSoftmaxOp
()
{
this
->
force_bypass_acl_path_
=
bypass_acl_class_layer
&
FLAGS_ENABLE_ACL_SOFTMAX
;
}
~
AclSoftmaxOp
()
=
default
;
AclSoftmaxOp
(
const
AclSoftmaxOp
&
)
=
delete
;
AclSoftmaxOp
&
operator
=
(
const
AclSoftmaxOp
&
)
=
delete
;
AclSoftmaxOp
(
AclSoftmaxOp
&&
)
=
delete
;
AclSoftmaxOp
&
operator
=
(
AclSoftmaxOp
&&
)
=
delete
;
acl
::
AclParameters
&
getargs
()
{
return
args
;
}
void
InitAclLayer
(
const
SoftmaxParam
<
DeviceType
>&
param
)
{
setTargetHint
(
acl
::
TargetHint
::
OPENCL
);
arm_compute
::
TensorShape
shape
(
args
.
in_depth
,
args
.
batch
);
if
(
is_operator_init_done
(
shape
))
return
;
set_operator_init_done
();
this
->
force_bypass_acl_path_
=
false
;
//[width, height, IFM]
new_tensor
(
input
(),
shape
,
args
.
input_data
);
//[width, height, OFM]
new_tensor
(
output
(),
shape
,
args
.
output_data
);
acl_configure
(
softmax
,
this
,
NULL
);
}
void
RunAcl
(
void
*
input
,
void
*
output
)
{
acl
::
ACLOperator
::
acl_run
(
input
,
output
);
}
bool
Bypass_acl
(
const
SoftmaxParam
<
DeviceType
>&
param
)
{
bool
bypass_acl
=
false
;
AclParametersByContext
(
param
);
InitAclLayer
(
param
);
// for performance, more groups impact GPU performance
if
(
this
->
force_bypass_acl_path_
)
{
bypass_acl
=
true
;
}
return
bypass_acl
;
}
private:
void
AclParametersByContext
(
const
SoftmaxParam
<
DeviceType
>&
param
)
{
const
framework
::
Tensor
*
in_x
=
param
.
InputX
();
framework
::
Tensor
*
out
=
param
.
Out
();
auto
x_dims
=
in_x
->
dims
();
out
->
Resize
(
x_dims
);
const
T
*
input_data
=
in_x
->
data
<
T
>
();
T
*
output_data
=
out
->
data
<
T
>
();
args
.
input_data
=
(
void
*
)
input_data
;
args
.
output_data
=
(
void
*
)
output_data
;
// NCHW
args
.
batch
=
in_x
->
dims
()[
0
];
args
.
in_depth
=
in_x
->
dims
()[
1
];
args
.
out_num
=
out
->
dims
()[
0
];
// std::cout
// << "Out C: " << args.out_depth
// << " H: " << args.out_rows << " W: " << args.out_cols << "\n";
}
acl
::
AclParameters
args
;
};
template
<
>
bool
SoftmaxKernel
<
GPU_MALI
,
float
>::
Init
(
SoftmaxParam
<
GPU_MALI
>*
param
)
{
AclSoftmaxOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclSoftmaxOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
acl_op
=
new
AclSoftmaxOp
<
GPU_MALI
,
float
>
();
this
->
SetAclOp
((
void
*
)
acl_op
,
(
void
*
)
this
);
}
if
(
acl_op
->
Bypass_acl
(
*
param
))
{
std
::
cout
<<
"init acl failed"
<<
std
::
endl
;
return
false
;
}
return
true
;
}
template
<
>
void
SoftmaxKernel
<
GPU_MALI
,
float
>::
Compute
(
const
SoftmaxParam
<
GPU_MALI
>&
param
)
{
std
::
cout
<<
"init acl"
<<
std
::
endl
;
AclSoftmaxOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclSoftmaxOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
return
;
}
acl
::
AclParameters
&
args
=
acl_op
->
getargs
();
const
float
*
input_data
=
(
const
float
*
)
args
.
input_data
;
const
float
*
output_data
=
(
const
float
*
)
args
.
output_data
;
for
(
int
n
=
0
;
n
<
args
.
out_num
;
++
n
)
{
acl_op
->
RunAcl
((
void
*
)
input_data
,
(
void
*
)
output_data
);
input_data
+=
args
.
in_depth
;
output_data
+=
args
.
in_depth
;
}
}
template
class
SoftmaxKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
#endif
src/operators/lookup_op.cpp
浏览文件 @
a29db744
...
@@ -59,8 +59,7 @@ namespace ops = paddle_mobile::operators;
...
@@ -59,8 +59,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
lookup_table
,
ops
::
LookupOp
);
REGISTER_OPERATOR_CPU
(
lookup_table
,
ops
::
LookupOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
...
...
src/operators/lrn_op.cpp
浏览文件 @
a29db744
...
@@ -35,8 +35,5 @@ REGISTER_OPERATOR_CPU(lrn, ops::LrnOp);
...
@@ -35,8 +35,5 @@ REGISTER_OPERATOR_CPU(lrn, ops::LrnOp);
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL
(
lrn
,
ops
::
LrnOp
);
REGISTER_OPERATOR_CL
(
lrn
,
ops
::
LrnOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
lrn
,
ops
::
LrnOp
);
#endif
#endif
#endif
src/operators/mul_op.cpp
浏览文件 @
a29db744
...
@@ -61,9 +61,6 @@ REGISTER_OPERATOR_CPU(mul, ops::MulOp);
...
@@ -61,9 +61,6 @@ REGISTER_OPERATOR_CPU(mul, ops::MulOp);
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL
(
mul
,
ops
::
MulOp
);
REGISTER_OPERATOR_CL
(
mul
,
ops
::
MulOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
mul
,
ops
::
MulOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
mul
,
ops
::
MulOp
);
REGISTER_OPERATOR_FPGA
(
mul
,
ops
::
MulOp
);
#endif
#endif
...
...
src/operators/norm_op.cpp
浏览文件 @
a29db744
...
@@ -41,8 +41,7 @@ namespace ops = paddle_mobile::operators;
...
@@ -41,8 +41,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
norm
,
ops
::
NormOp
);
REGISTER_OPERATOR_CPU
(
norm
,
ops
::
NormOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
...
...
src/operators/pool_op.cpp
浏览文件 @
a29db744
...
@@ -63,9 +63,6 @@ namespace ops = paddle_mobile::operators;
...
@@ -63,9 +63,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
pool2d
,
ops
::
PoolOp
);
REGISTER_OPERATOR_CPU
(
pool2d
,
ops
::
PoolOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
pool2d
,
ops
::
PoolOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
pool2d
,
ops
::
PoolOp
);
REGISTER_OPERATOR_FPGA
(
pool2d
,
ops
::
PoolOp
);
#endif
#endif
...
...
src/operators/prelu_op.cpp
浏览文件 @
a29db744
...
@@ -36,8 +36,5 @@ namespace ops = paddle_mobile::operators;
...
@@ -36,8 +36,5 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
prelu
,
ops
::
PReluOp
);
REGISTER_OPERATOR_CPU
(
prelu
,
ops
::
PReluOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
prelu
,
ops
::
PReluOp
);
#endif
#endif
#endif
src/operators/prior_box_op.cpp
浏览文件 @
a29db744
...
@@ -52,8 +52,6 @@ namespace ops = paddle_mobile::operators;
...
@@ -52,8 +52,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
prior_box
,
ops
::
PriorBoxOp
);
REGISTER_OPERATOR_CPU
(
prior_box
,
ops
::
PriorBoxOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL
(
prior_box
,
ops
::
PriorBoxOp
);
REGISTER_OPERATOR_CL
(
prior_box
,
ops
::
PriorBoxOp
);
#endif
#endif
...
...
src/operators/reshape2_op.cpp
浏览文件 @
a29db744
...
@@ -40,9 +40,6 @@ namespace ops = paddle_mobile::operators;
...
@@ -40,9 +40,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
reshape2
,
ops
::
Reshape2Op
);
REGISTER_OPERATOR_CPU
(
reshape2
,
ops
::
Reshape2Op
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
reshape2
,
ops
::
Reshape2Op
);
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
reshape2
,
ops
::
Reshape2Op
);
REGISTER_OPERATOR_FPGA
(
reshape2
,
ops
::
Reshape2Op
);
#endif
#endif
...
...
src/operators/reshape_op.cpp
浏览文件 @
a29db744
...
@@ -35,9 +35,6 @@ namespace ops = paddle_mobile::operators;
...
@@ -35,9 +35,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
reshape
,
ops
::
ReshapeOp
);
REGISTER_OPERATOR_CPU
(
reshape
,
ops
::
ReshapeOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
reshape
,
ops
::
ReshapeOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
reshape
,
ops
::
ReshapeOp
);
REGISTER_OPERATOR_FPGA
(
reshape
,
ops
::
ReshapeOp
);
#endif
#endif
...
...
src/operators/resize_op.cpp
浏览文件 @
a29db744
...
@@ -32,8 +32,5 @@ namespace ops = paddle_mobile::operators;
...
@@ -32,8 +32,5 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
resize
,
ops
::
ResizeOp
);
REGISTER_OPERATOR_CPU
(
resize
,
ops
::
ResizeOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
resize
,
ops
::
ResizeOp
);
#endif
#endif
#endif
src/operators/scale_op.cpp
浏览文件 @
a29db744
...
@@ -32,8 +32,5 @@ namespace ops = paddle_mobile::operators;
...
@@ -32,8 +32,5 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
scale
,
ops
::
ScaleOp
);
REGISTER_OPERATOR_CPU
(
scale
,
ops
::
ScaleOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
scale
,
ops
::
ScaleOp
);
#endif
#endif
#endif
src/operators/shape_op.cpp
浏览文件 @
a29db744
...
@@ -34,7 +34,5 @@ namespace ops = paddle_mobile::operators;
...
@@ -34,7 +34,5 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
shape
,
ops
::
ShapeOp
);
REGISTER_OPERATOR_CPU
(
shape
,
ops
::
ShapeOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#endif
#endif
src/operators/slice_op.cpp
浏览文件 @
a29db744
...
@@ -31,9 +31,6 @@ namespace ops = paddle_mobile::operators;
...
@@ -31,9 +31,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
slice
,
ops
::
SliceOp
);
REGISTER_OPERATOR_CPU
(
slice
,
ops
::
SliceOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
slice
,
ops
::
SliceOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
slice
,
ops
::
SliceOp
);
REGISTER_OPERATOR_FPGA
(
slice
,
ops
::
SliceOp
);
#endif
#endif
...
...
src/operators/softmax_op.cpp
浏览文件 @
a29db744
...
@@ -31,9 +31,6 @@ namespace ops = paddle_mobile::operators;
...
@@ -31,9 +31,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
softmax
,
ops
::
SoftmaxOp
);
REGISTER_OPERATOR_CPU
(
softmax
,
ops
::
SoftmaxOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
softmax
,
ops
::
SoftmaxOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
softmax
,
ops
::
SoftmaxOp
);
REGISTER_OPERATOR_FPGA
(
softmax
,
ops
::
SoftmaxOp
);
#endif
#endif
...
...
src/operators/sum_op.cpp
浏览文件 @
a29db744
...
@@ -61,9 +61,6 @@ namespace ops = paddle_mobile::operators;
...
@@ -61,9 +61,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
sum
,
ops
::
SumOp
);
REGISTER_OPERATOR_CPU
(
sum
,
ops
::
SumOp
);
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
sum
,
ops
::
ConcatOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录