Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
a29db744
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a29db744
编写于
3月 14, 2019
作者:
xiebaiyuan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove mali
上级
ad3844d6
变更
66
展开全部
隐藏空白更改
内联
并排
Showing
66 changed file
with
38 addition
and
3569 deletion
+38
-3569
CMakeLists.txt
CMakeLists.txt
+0
-26
doc/build.md
doc/build.md
+0
-1
doc/design_doc.md
doc/design_doc.md
+0
-5
src/framework/load_ops.h
src/framework/load_ops.h
+21
-31
src/framework/op_registry.h
src/framework/op_registry.h
+0
-3
src/framework/operator.h
src/framework/operator.h
+0
-10
src/operators/activation_op.cpp
src/operators/activation_op.cpp
+0
-3
src/operators/batchnorm_op.cpp
src/operators/batchnorm_op.cpp
+0
-3
src/operators/bilinear_interp_op.cpp
src/operators/bilinear_interp_op.cpp
+1
-2
src/operators/box_coder_op.cpp
src/operators/box_coder_op.cpp
+0
-2
src/operators/concat_op.cpp
src/operators/concat_op.cpp
+1
-3
src/operators/conv_op.cpp
src/operators/conv_op.cpp
+1
-3
src/operators/conv_transpose_op.cpp
src/operators/conv_transpose_op.cpp
+1
-2
src/operators/crf_op.cpp
src/operators/crf_op.cpp
+1
-2
src/operators/elementwise_add_op.cpp
src/operators/elementwise_add_op.cpp
+0
-3
src/operators/elementwise_mul_op.cpp
src/operators/elementwise_mul_op.cpp
+0
-3
src/operators/elementwise_sub_op.cpp
src/operators/elementwise_sub_op.cpp
+0
-3
src/operators/feed_op.cpp
src/operators/feed_op.cpp
+0
-3
src/operators/fetch_op.cpp
src/operators/fetch_op.cpp
+1
-3
src/operators/fusion_conv_add_op.cpp
src/operators/fusion_conv_add_op.cpp
+0
-3
src/operators/fusion_deconv_add_bn_op.cpp
src/operators/fusion_deconv_add_bn_op.cpp
+1
-2
src/operators/fusion_deconv_add_bn_relu_op.cpp
src/operators/fusion_deconv_add_bn_relu_op.cpp
+1
-2
src/operators/fusion_deconv_add_op.cpp
src/operators/fusion_deconv_add_op.cpp
+1
-2
src/operators/fusion_deconv_add_relu_op.cpp
src/operators/fusion_deconv_add_relu_op.cpp
+1
-2
src/operators/fusion_deconv_bn_relu_op.cpp
src/operators/fusion_deconv_bn_relu_op.cpp
+1
-2
src/operators/fusion_deconv_relu_op.cpp
src/operators/fusion_deconv_relu_op.cpp
+1
-2
src/operators/fusion_elementwise_add_relu_op.cpp
src/operators/fusion_elementwise_add_relu_op.cpp
+0
-4
src/operators/fusion_fc_op.cpp
src/operators/fusion_fc_op.cpp
+0
-3
src/operators/fusion_fc_relu_op.cpp
src/operators/fusion_fc_relu_op.cpp
+0
-3
src/operators/gru_unit_op.cpp
src/operators/gru_unit_op.cpp
+1
-2
src/operators/increment_op.cpp
src/operators/increment_op.cpp
+1
-2
src/operators/is_empty_op.cpp
src/operators/is_empty_op.cpp
+1
-2
src/operators/kernel/mali/ACL_Android
src/operators/kernel/mali/ACL_Android
+0
-1
src/operators/kernel/mali/acl_operator.cc
src/operators/kernel/mali/acl_operator.cc
+0
-220
src/operators/kernel/mali/acl_operator.h
src/operators/kernel/mali/acl_operator.h
+0
-1145
src/operators/kernel/mali/acl_tensor.cc
src/operators/kernel/mali/acl_tensor.cc
+0
-160
src/operators/kernel/mali/acl_tensor.h
src/operators/kernel/mali/acl_tensor.h
+0
-128
src/operators/kernel/mali/batchnorm_kernel.cpp
src/operators/kernel/mali/batchnorm_kernel.cpp
+0
-164
src/operators/kernel/mali/concat_kernel.cpp
src/operators/kernel/mali/concat_kernel.cpp
+0
-137
src/operators/kernel/mali/conv_add_kernel.cpp
src/operators/kernel/mali/conv_add_kernel.cpp
+0
-232
src/operators/kernel/mali/conv_kernel.cpp
src/operators/kernel/mali/conv_kernel.cpp
+0
-230
src/operators/kernel/mali/elementwise_add_kernel.cpp
src/operators/kernel/mali/elementwise_add_kernel.cpp
+0
-52
src/operators/kernel/mali/feed_kernel.cpp
src/operators/kernel/mali/feed_kernel.cpp
+0
-36
src/operators/kernel/mali/fetch_kernel.cpp
src/operators/kernel/mali/fetch_kernel.cpp
+0
-36
src/operators/kernel/mali/fushion_fc_kernel.cpp
src/operators/kernel/mali/fushion_fc_kernel.cpp
+0
-75
src/operators/kernel/mali/lrn_kernel.cpp
src/operators/kernel/mali/lrn_kernel.cpp
+0
-157
src/operators/kernel/mali/mul_kernel.cpp
src/operators/kernel/mali/mul_kernel.cpp
+0
-59
src/operators/kernel/mali/pool_kernel.cpp
src/operators/kernel/mali/pool_kernel.cpp
+0
-220
src/operators/kernel/mali/relu_kernel.cpp
src/operators/kernel/mali/relu_kernel.cpp
+0
-134
src/operators/kernel/mali/reshape_kernel.cpp
src/operators/kernel/mali/reshape_kernel.cpp
+0
-61
src/operators/kernel/mali/softmax_kernel.cpp
src/operators/kernel/mali/softmax_kernel.cpp
+0
-139
src/operators/lookup_op.cpp
src/operators/lookup_op.cpp
+1
-2
src/operators/lrn_op.cpp
src/operators/lrn_op.cpp
+0
-3
src/operators/mul_op.cpp
src/operators/mul_op.cpp
+0
-3
src/operators/norm_op.cpp
src/operators/norm_op.cpp
+1
-2
src/operators/pool_op.cpp
src/operators/pool_op.cpp
+0
-3
src/operators/prelu_op.cpp
src/operators/prelu_op.cpp
+0
-3
src/operators/prior_box_op.cpp
src/operators/prior_box_op.cpp
+0
-2
src/operators/reshape2_op.cpp
src/operators/reshape2_op.cpp
+0
-3
src/operators/reshape_op.cpp
src/operators/reshape_op.cpp
+0
-3
src/operators/resize_op.cpp
src/operators/resize_op.cpp
+0
-3
src/operators/scale_op.cpp
src/operators/scale_op.cpp
+0
-3
src/operators/shape_op.cpp
src/operators/shape_op.cpp
+0
-2
src/operators/slice_op.cpp
src/operators/slice_op.cpp
+0
-3
src/operators/softmax_op.cpp
src/operators/softmax_op.cpp
+0
-3
src/operators/sum_op.cpp
src/operators/sum_op.cpp
+0
-3
未找到文件。
CMakeLists.txt
浏览文件 @
a29db744
...
...
@@ -9,7 +9,6 @@ option(WITH_TEST "build with unit tests" ON)
# select the platform to build
option
(
CPU
"build with arm CPU support"
ON
)
option
(
GPU_MALI
"build with arm mali GPU support"
OFF
)
option
(
GPU_CL
"build with OpenCL support"
OFF
)
option
(
FPGA
"build with FPGA support"
OFF
)
if
(
FPGA
)
...
...
@@ -97,31 +96,6 @@ else()
endforeach
()
endif
()
if
(
GPU_MALI
)
add_definitions
(
-DPADDLE_MOBILE_MALI_GPU
)
add_definitions
(
-DUSE_ACL=1
)
add_definitions
(
-DUSE_OPENCL
)
set
(
ACL_ROOT
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/operators/kernel/mali/ACL_Android
)
include_directories
(
${
ACL_ROOT
}
${
ACL_ROOT
}
/include
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-L
${
ACL_ROOT
}
/build"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-larm_compute"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-larm_compute_core"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-larm_compute_graph"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-L
${
ACL_ROOT
}
/build/opencl-1.2-stubs"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-lOpenCL"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-DUSE_ACL=1"
)
else
()
file
(
GLOB_RECURSE _tmp_list src/operators/kernel/mali/*.cpp src/operators/kernel/mali/*.cc
)
foreach
(
f
${
_tmp_list
}
)
list
(
REMOVE_ITEM PADDLE_MOBILE_CC
${
f
}
)
endforeach
()
file
(
GLOB_RECURSE _tmp_list_h src/operators/kernel/mali/*.h
)
foreach
(
f
${
_tmp_list_h
}
)
list
(
REMOVE_ITEM PADDLE_MOBILE_H
${
f
}
)
endforeach
()
endif
()
if
(
FPGA
)
add_definitions
(
-DPADDLE_MOBILE_FPGA
)
file
(
GLOB_RECURSE _tmp_list src/operators/math/*.cpp src/operators/kernel/fpga/*.cc
)
...
...
doc/build.md
浏览文件 @
a29db744
...
...
@@ -46,7 +46,6 @@ root@5affd29d4fc5:/ # ccmake .
DEBUGING ON
FPGA OFF
LOG_PROFILE ON
MALI_GPU OFF
NET googlenet
USE_EXCEPTION ON
USE_OPENMP OFF
...
...
doc/design_doc.md
浏览文件 @
a29db744
...
...
@@ -109,11 +109,6 @@ USE_OP_CPU(conv2d);
REGISTER_OPERATOR_CPU
(
conv2d
,
ops
::
ConvOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
USE_OP_MALI_GPU
(
conv2d
);
REGISTER_OPERATOR_MALI_GPU
(
conv2d
,
ops
::
ConvOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA
(
conv2d
);
REGISTER_OPERATOR_FPGA
(
conv2d
,
ops
::
ConvOp
);
...
...
src/framework/load_ops.h
浏览文件 @
a29db744
...
...
@@ -23,15 +23,6 @@ limitations under the License. */
#define LOAD_CPU_OP(op_type)
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#define LOAD_MALI_GPU_OP(op_type) \
extern int TouchOpRegistrar_##op_type##_##mali_gpu(); \
static int use_op_itself_##op_type##_##mali_gpu __attribute__((unused)) = \
TouchOpRegistrar_##op_type##_##mali_gpu()
#else
#define LOAD_MALI_GPU_OP(op_type)
#endif
#ifdef PADDLE_MOBILE_FPGA
#define LOAD_FPGA_OP(op_type) \
extern int TouchOpRegistrar_##op_type##_##fpga(); \
...
...
@@ -46,9 +37,8 @@ limitations under the License. */
static int use_fusion_matcher_itself_##op_type __attribute__((unused)) = \
TouchFusionMatcherRegistrar_##op_type();
#define LOAD_OP(op_type) \
LOAD_CPU_OP(op_type); \
LOAD_MALI_GPU_OP(op_type); \
#define LOAD_OP(op_type) \
LOAD_CPU_OP(op_type); \
LOAD_FPGA_OP(op_type);
#define LOAD_OP1(op_type, device_type) LOAD_##device_type##_OP(op_type);
...
...
@@ -68,7 +58,7 @@ LOAD_OP(fetch)
LOAD_OP
(
fill_constant
)
#endif
#ifdef BATCHNORM_OP
LOAD_OP
2
(
batch_norm
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
batch_norm
,
C
PU
);
#endif
#ifdef BILINEAR_INTERP_OP
LOAD_OP1
(
bilinear_interp
,
CPU
);
...
...
@@ -77,40 +67,40 @@ LOAD_OP1(bilinear_interp, CPU);
LOAD_OP1
(
box_coder
,
CPU
);
#endif
#ifdef CONCAT_OP
LOAD_OP
3
(
concat
,
CPU
,
MALI_G
PU
,
FPGA
);
LOAD_OP
2
(
concat
,
C
PU
,
FPGA
);
#endif
#ifdef CONV_OP
LOAD_OP
3
(
conv2d
,
CPU
,
MALI_G
PU
,
FPGA
);
LOAD_OP
2
(
conv2d
,
C
PU
,
FPGA
);
#endif
#ifdef LRN_OP
LOAD_OP
2
(
lrn
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
lrn
,
C
PU
);
#endif
#ifdef SIGMOID_OP
LOAD_OP1
(
sigmoid
,
CPU
);
#endif
#ifdef FUSION_FC_RELU_OP
LOAD_OP3
(
fusion_fc_relu
,
CPU
,
MALI_GPU
,
FPGA
);
LOAD_OP3
(
fusion_fc_relu
,
CPU
,
FPGA
);
LOAD_FUSION_MATCHER
(
fusion_fc_relu
);
#endif
#ifdef FUSION_ELEMENTWISEADDRELU_OP
LOAD_OP3
(
fusion_elementwise_add_relu
,
CPU
,
MALI_GPU
,
FPGA
);
LOAD_OP3
(
fusion_elementwise_add_relu
,
CPU
,
FPGA
);
LOAD_FUSION_MATCHER
(
fusion_elementwise_add_relu
);
#endif
#ifdef SPLIT_OP
LOAD_OP1
(
split
,
CPU
);
#endif
#ifdef RESIZE_OP
LOAD_OP
2
(
resize
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
resize
,
C
PU
);
#endif
#ifdef FUSION_CONVADDBNRELU_OP
LOAD_OP2
(
fusion_conv_add_bn_relu
,
CPU
,
FPGA
);
LOAD_FUSION_MATCHER
(
fusion_conv_add_bn_relu
);
#endif
#ifdef RESHAPE_OP
LOAD_OP
2
(
reshape
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
reshape
,
C
PU
);
#endif
#ifdef RESHAPE2_OP
LOAD_OP
2
(
reshape2
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
reshape2
,
C
PU
);
#endif
#ifdef TRANSPOSE_OP
LOAD_OP1
(
transpose
,
CPU
);
...
...
@@ -126,11 +116,11 @@ LOAD_OP2(fusion_conv_add_relu, CPU, FPGA);
LOAD_FUSION_MATCHER
(
fusion_conv_add_relu
);
#endif
#ifdef FUSION_CONVADD_OP
LOAD_OP
2
(
fusion_conv_add
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
fusion_conv_add
,
C
PU
);
LOAD_FUSION_MATCHER
(
fusion_conv_add
);
#endif
#ifdef SOFTMAX_OP
LOAD_OP
2
(
softmax
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
softmax
,
C
PU
);
#endif
#ifdef SHAPE_OP
LOAD_OP1
(
shape
,
CPU
);
...
...
@@ -142,13 +132,13 @@ LOAD_OP1(depthwise_conv2d, CPU);
LOAD_OP1
(
conv2d_transpose
,
CPU
);
#endif
#ifdef SCALE_OP
LOAD_OP
2
(
scale
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
scale
,
C
PU
);
#endif
#ifdef ELEMENTWISEADD_OP
LOAD_OP
2
(
elementwise_add
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
elementwise_add
,
C
PU
);
#endif
#ifdef PRELU_OP
LOAD_OP
2
(
prelu
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
prelu
,
C
PU
);
#endif
#ifdef FLATTEN_OP
LOAD_OP1
(
flatten
,
CPU
);
...
...
@@ -182,13 +172,13 @@ LOAD_FUSION_MATCHER(fusion_dwconv_bn_relu);
LOAD_OP1
(
crf_decoding
,
CPU
);
#endif
#ifdef MUL_OP
LOAD_OP
2
(
mul
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
mul
,
C
PU
);
#endif
#ifdef NORM_OP
LOAD_OP1
(
norm
,
CPU
);
#endif
#ifdef RELU_OP
LOAD_OP
2
(
relu
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
relu
,
C
PU
);
LOAD_OP1
(
relu6
,
CPU
);
#endif
#ifdef IM2SEQUENCE_OP
...
...
@@ -198,11 +188,11 @@ LOAD_OP1(im2sequence, CPU);
LOAD_OP1
(
lookup_table
,
CPU
);
#endif
#ifdef FUSION_FC_OP
LOAD_OP
3
(
fusion_fc
,
CPU
,
MALI_G
PU
,
FPGA
);
LOAD_OP
2
(
fusion_fc
,
C
PU
,
FPGA
);
LOAD_FUSION_MATCHER
(
fusion_fc
);
#endif
#ifdef POOL_OP
LOAD_OP
3
(
pool2d
,
CPU
,
MALI_G
PU
,
FPGA
);
LOAD_OP
2
(
pool2d
,
C
PU
,
FPGA
);
#endif
#ifdef MULTICLASSNMS_OP
LOAD_OP1
(
multiclass_nms
,
CPU
);
...
...
@@ -217,7 +207,7 @@ LOAD_OP1(sum, CPU);
LOAD_OP1
(
elementwise_mul
,
CPU
);
#endif
#ifdef SLICE_OP
LOAD_OP
2
(
slice
,
CPU
,
MALI_G
PU
);
LOAD_OP
1
(
slice
,
C
PU
);
#endif
#ifdef FUSION_CONVBN_OP
LOAD_OP2
(
fusion_conv_bn
,
CPU
,
FPGA
);
...
...
src/framework/op_registry.h
浏览文件 @
a29db744
...
...
@@ -115,9 +115,6 @@ class OpRegistry {
#define REGISTER_OPERATOR_CPU(op_type, op_class) \
REGISTER_OPERATOR(op_type, op_class, cpu, paddle_mobile::CPU);
#define REGISTER_OPERATOR_MALI_GPU(op_type, op_class) \
REGISTER_OPERATOR(op_type, op_class, mali_gpu, paddle_mobile::GPU_MALI);
#define REGISTER_OPERATOR_FPGA(op_type, op_class) \
REGISTER_OPERATOR(op_type, op_class, fpga, paddle_mobile::FPGA);
...
...
src/framework/operator.h
浏览文件 @
a29db744
...
...
@@ -130,13 +130,6 @@ class OpKernelBase {
}
#endif
#ifdef PADDLE_McOBILE_MALI_GPU
OpKernelBase
()
{
acl_op_
=
nullptr
;
}
void
*
GetAclOp
()
const
{
return
acl_op_
;
}
void
SetAclOp
(
void
*
op
,
void
*
ob
)
const
{
reinterpret_cast
<
OpKernelBase
<
Dtype
,
P
>
*>
(
ob
)
->
acl_op_
=
op
;
}
#endif
virtual
void
Compute
(
const
P
&
para
)
=
0
;
virtual
bool
Init
(
P
*
para
)
{
return
true
;
}
virtual
~
OpKernelBase
()
=
default
;
...
...
@@ -147,9 +140,6 @@ class OpKernelBase {
#endif
private:
#ifdef PADDLE_MOBILE_MALI_GPU
void
*
acl_op_
;
#endif
};
class
FusionOpMatcher
{
...
...
src/operators/activation_op.cpp
浏览文件 @
a29db744
...
...
@@ -55,9 +55,6 @@ namespace ops = paddle_mobile::operators;
REGISTER_OPERATOR_CPU
(
relu
,
ops
::
ReluOp
);
REGISTER_OPERATOR_CPU
(
relu6
,
ops
::
Relu6Op
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
relu
,
ops
::
ReluOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
relu
,
ops
::
ReluOp
);
#endif
...
...
src/operators/batchnorm_op.cpp
浏览文件 @
a29db744
...
...
@@ -34,9 +34,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
batch_norm
,
ops
::
BatchNormOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
batch_norm
,
ops
::
BatchNormOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
...
...
src/operators/bilinear_interp_op.cpp
浏览文件 @
a29db744
...
...
@@ -48,8 +48,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
bilinear_interp
,
ops
::
BilinearOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
...
...
src/operators/box_coder_op.cpp
浏览文件 @
a29db744
...
...
@@ -58,8 +58,6 @@ REGISTER_OPERATOR_CPU(box_coder, ops::BoxCoderOp);
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL
(
box_coder
,
ops
::
BoxCoderOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
...
...
src/operators/concat_op.cpp
浏览文件 @
a29db744
...
...
@@ -69,9 +69,7 @@ REGISTER_OPERATOR_CPU(concat, ops::ConcatOp);
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL
(
concat
,
ops
::
ConcatOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
concat
,
ops
::
ConcatOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
concat
,
ops
::
ConcatOp
);
#endif
...
...
src/operators/conv_op.cpp
浏览文件 @
a29db744
...
...
@@ -55,9 +55,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
conv2d
,
ops
::
ConvOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
conv2d
,
ops
::
ConvOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
conv2d
,
ops
::
ConvOp
);
#endif
...
...
src/operators/conv_transpose_op.cpp
浏览文件 @
a29db744
...
...
@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
conv2d_transpose
,
ops
::
ConvOpTranspose
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
conv2d_transpose
,
ops
::
ConvOpTranspose
);
#endif
...
...
src/operators/crf_op.cpp
浏览文件 @
a29db744
...
...
@@ -48,8 +48,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
crf_decoding
,
ops
::
CrfOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
...
...
src/operators/elementwise_add_op.cpp
浏览文件 @
a29db744
...
...
@@ -33,9 +33,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
elementwise_add
,
ops
::
ElementwiseAddOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
elementwise_add
,
ops
::
ElementwiseAddOp
);
#endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL
(
elementwise_add
,
ops
::
ElementwiseAddOp
);
...
...
src/operators/elementwise_mul_op.cpp
浏览文件 @
a29db744
...
...
@@ -32,9 +32,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
elementwise_mul
,
ops
::
ElementwiseMulOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
elementwise_mul
,
ops
::
ElementwiseMulOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
elementwise_mul
,
ops
::
ElementwiseMulOp
);
#endif
...
...
src/operators/elementwise_sub_op.cpp
浏览文件 @
a29db744
...
...
@@ -32,9 +32,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
elementwise_sub
,
ops
::
ElementwiseSubOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
elementwise_sub
,
ops
::
ElementwiseSubOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
...
...
src/operators/feed_op.cpp
浏览文件 @
a29db744
...
...
@@ -38,9 +38,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
feed
,
ops
::
FeedOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
feed
,
ops
::
FeedOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
feed
,
ops
::
FeedOp
);
#endif
...
...
src/operators/fetch_op.cpp
浏览文件 @
a29db744
...
...
@@ -30,9 +30,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
fetch
,
ops
::
FetchOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
fetch
,
ops
::
FetchOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fetch
,
ops
::
FetchOp
);
#endif
...
...
src/operators/fusion_conv_add_op.cpp
浏览文件 @
a29db744
...
...
@@ -54,9 +54,6 @@ REGISTER_FUSION_MATCHER(fusion_conv_add, ops::FusionConvAddMatcher);
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
fusion_conv_add
,
ops
::
FusionConvAddOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
fusion_conv_add
,
ops
::
FusionConvAddOp
);
#endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL
(
fusion_conv_add
,
ops
::
FusionConvAddOp
);
...
...
src/operators/fusion_deconv_add_bn_op.cpp
浏览文件 @
a29db744
...
...
@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators;
REGISTER_FUSION_MATCHER
(
fusion_deconv_add_bn
,
ops
::
FusionDeconvAddBNMatcher
);
#ifdef PADDLE_MOBILE_CPU
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_deconv_add_bn
,
ops
::
FusionDeconvAddBNOp
);
#endif
...
...
src/operators/fusion_deconv_add_bn_relu_op.cpp
浏览文件 @
a29db744
...
...
@@ -25,8 +25,7 @@ REGISTER_FUSION_MATCHER(fusion_deconv_add_bn_relu,
ops
::
FusionDeconvAddBNReluMatcher
);
#ifdef PADDLE_MOBILE_CPU
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_deconv_add_bn_relu
,
ops
::
FusionDeconvAddBNReluOp
);
#endif
...
...
src/operators/fusion_deconv_add_op.cpp
浏览文件 @
a29db744
...
...
@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators;
REGISTER_FUSION_MATCHER
(
fusion_deconv_add
,
ops
::
FusionDeconvAddMatcher
);
#ifdef PADDLE_MOBILE_CPU
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_deconv_add
,
ops
::
FusionDeconvAddOp
);
#endif
...
...
src/operators/fusion_deconv_add_relu_op.cpp
浏览文件 @
a29db744
...
...
@@ -25,8 +25,7 @@ REGISTER_FUSION_MATCHER(fusion_deconv_add_relu,
ops
::
FusionDeconvAddReluMatcher
);
#ifdef PADDLE_MOBILE_CPU
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_deconv_add_relu
,
ops
::
FusionDeconvAddReluOp
);
#endif
...
...
src/operators/fusion_deconv_bn_relu_op.cpp
浏览文件 @
a29db744
...
...
@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators;
REGISTER_FUSION_MATCHER
(
fusion_deconv_bn_relu
,
ops
::
FusionDeconvBNReluMatcher
);
#ifdef PADDLE_MOBILE_CPU
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_deconv_bn_relu
,
ops
::
FusionDeconvBNReluOp
);
#endif
...
...
src/operators/fusion_deconv_relu_op.cpp
浏览文件 @
a29db744
...
...
@@ -23,8 +23,7 @@ namespace operators {}
namespace
ops
=
paddle_mobile
::
operators
;
#ifdef PADDLE_MOBILE_CPU
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_deconv_relu
,
ops
::
FusionDeconvReluOp
);
#endif
...
...
src/operators/fusion_elementwise_add_relu_op.cpp
浏览文件 @
a29db744
...
...
@@ -36,10 +36,6 @@ REGISTER_FUSION_MATCHER(fusion_elementwise_add_relu,
// REGISTER_OPERATOR_CPU(fusion_elementwise_add_relu,
// ops::FusionElementwiseAddReluOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
// REGISTER_OPERATOR_MALI_GPU(fusion_elementwise_add_relu,
// ops::FusionElementwiseAddReluOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_elementwise_add_relu
,
ops
::
FusionElementwiseAddReluOp
);
...
...
src/operators/fusion_fc_op.cpp
浏览文件 @
a29db744
...
...
@@ -63,9 +63,6 @@ REGISTER_OPERATOR_CPU(fusion_fc, ops::FusionFcOp);
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL
(
fusion_fc
,
ops
::
FusionFcOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
fusion_fc
,
ops
::
FusionFcOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_fc
,
ops
::
FusionFcOp
);
#endif
...
...
src/operators/fusion_fc_relu_op.cpp
浏览文件 @
a29db744
...
...
@@ -60,9 +60,6 @@ REGISTER_FUSION_MATCHER(fusion_fc_relu, ops::FusionFcReluMatcher);
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
fusion_fc_relu
,
ops
::
FusionFcReluOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
fusion_fc_relu
,
ops
::
FusionFcReluOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
fusion_fc_relu
,
ops
::
FusionFcReluOp
);
#endif
...
...
src/operators/gru_unit_op.cpp
浏览文件 @
a29db744
...
...
@@ -59,8 +59,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
gru_unit
,
ops
::
GruUnitOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
...
...
src/operators/increment_op.cpp
浏览文件 @
a29db744
...
...
@@ -37,8 +37,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
increment
,
ops
::
IncrementOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
...
...
src/operators/is_empty_op.cpp
浏览文件 @
a29db744
...
...
@@ -34,8 +34,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
is_empty
,
ops
::
IsEmptyOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
...
...
ACL_Android
@
591027fc
比较
591027fc
...
591027fc
Subproject commit 591027fcffea084100c756e48356e0f8a48e35e5
src/operators/kernel/mali/acl_operator.cc
已删除
100755 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#if USE_ACL == 1
#include "acl_operator.h"
unsigned
int
bypass_acl_class_layer
=
(
0
|
FLAGS_ENABLE_ACL_CONCAT
|
/*0xffffffff |*/
/*FLAGS_ENABLE_ACL_FC |*/
/*FLAGS_ENABLE_ACL_LRN
|*/
0
);
int
enable_schedule
=
0
;
#ifdef USE_PROFILING
#include "arm_neon.h"
unsigned
int
acl_log_flags
=
(
0
|
MASK_LOG_APP_TIME
|
/*MASK_LOG_ALLOCATE | */
/*MASK_LOG_ALLOCATE | */
/*MASK_LOG_RUN | */
/*MASK_LOG_CONFIG | */
/*MASK_LOG_COPY | */
MASK_LOG_ABSVAL
|
MASK_LOG_BNLL
|
MASK_LOG_CONV
|
MASK_LOG_FC
|
MASK_LOG_LRN
|
MASK_LOG_POOLING
|
MASK_LOG_RELU
|
MASK_LOG_SIGMOID
|
MASK_LOG_SOFTMAX
|
MASK_LOG_TANH
|
MASK_LOG_LC
|
MASK_LOG_BN
|
MASK_LOG_CONCAT
|
0
);
#include <stdio.h>
/* printf */
#include <stdlib.h>
/* getenv */
#endif // USE_PROFILING
static
bool
force_enable_gpu
=
false
;
bool
AclEnableSchedule
(
int
enable
)
{
enable_schedule
=
enable
;
if
(
enable
)
{
force_enable_gpu
=
true
;
}
return
true
;
}
int
isScheduleEnable
()
{
return
enable_schedule
;
}
namespace
paddle_mobile
{
namespace
operators
{
namespace
acl
{
bool
ACLOperator
::
init_gpu_env
=
true
;
#ifdef USE_OPENCL
bool
ACLOperator
::
support_opencl_
=
false
;
bool
opencl_is_available
()
{
return
arm_compute
::
opencl_is_available
();
}
#elif defined(USE_OPENGLES)
bool
ACLOperator
::
support_opengles_
=
false
;
#endif
ACLOperator
::
ACLOperator
(
bool
is_gpu
)
:
operator_state_
(
operator_not_init
),
force_bypass_acl_path_
(
false
),
target_hint_
(
TargetHint
::
DONT_CARE
),
convolution_method_hint_
(
ConvolutionMethodHint
::
GEMM
),
_group
(
1
),
name_
(
""
),
input_idx_
(
0
),
output_idx_
(
0
),
is_gpu_
(
is_gpu
)
{
const
char
*
pBypassACL
;
if
(
init_gpu_env
)
{
#ifdef USE_OPENCL
try
{
if
(
opencl_is_available
())
{
arm_compute
::
CLScheduler
::
get
().
default_init
();
support_opencl_
=
true
;
}
}
catch
(
std
::
exception
&
e
)
{
support_opencl_
=
false
;
}
#elif defined(USE_OPENGLES)
try
{
arm_compute
::
GCScheduler
::
get
().
default_init
();
support_opengles_
=
true
;
}
catch
(
std
::
exception
&
e
)
{
support_opengles_
=
false
;
}
#endif
init_gpu_env
=
false
;
}
if
(
force_enable_gpu
)
is_gpu_
=
true
;
pBypassACL
=
getenv
(
"BYPASSACL"
);
if
(
pBypassACL
)
{
unsigned
int
bacl
;
sscanf
(
pBypassACL
,
"%i"
,
&
bacl
);
if
(
bacl
!=
bypass_acl_class_layer
)
{
bypass_acl_class_layer
=
bacl
;
printf
(
"BYPASSACL<%s>
\n
"
,
pBypassACL
);
printf
(
"BYPASSACL: %x
\n
"
,
bypass_acl_class_layer
);
}
}
#ifdef USE_PROFILING
const
char
*
pLogACL
;
pLogACL
=
getenv
(
"LOGACL"
);
if
(
pLogACL
)
{
unsigned
int
alf
;
sscanf
(
pLogACL
,
"%i"
,
&
alf
);
if
(
alf
!=
acl_log_flags
)
{
acl_log_flags
=
alf
;
printf
(
"LOGACL<%s>
\n
"
,
pLogACL
);
printf
(
"LOGACL: %x
\n
"
,
acl_log_flags
);
}
}
#endif // USE_PROFILING
const
char
*
pEnableSchedule
;
pEnableSchedule
=
getenv
(
"ENABLESCHEDULE"
);
if
(
pEnableSchedule
)
{
int
bshedule
;
sscanf
(
pEnableSchedule
,
"%i"
,
&
bshedule
);
if
(
bshedule
!=
enable_schedule
)
{
enable_schedule
=
bshedule
;
printf
(
"ENABLESCHEDULE<%s>
\n
"
,
pEnableSchedule
);
printf
(
"ENABLESCHEDULE: %x
\n
"
,
enable_schedule
);
}
if
(
enable_schedule
)
{
AclEnableSchedule
(
1
);
}
}
}
ACLOperator
::~
ACLOperator
()
{}
bool
ACLOperator
::
new_tensor
(
std
::
unique_ptr
<
ACLTensor
>&
tensor
,
arm_compute
::
TensorShape
&
shape
,
void
*
mem
,
bool
commit
)
{
auto
acl_tensor
=
new
ACLTensor
(
arm_compute
::
TensorInfo
(
shape
,
arm_compute
::
Format
::
F32
));
acl_tensor
->
set_target
(
getTargetHint
());
acl_tensor
->
bindmem
(
mem
);
if
(
commit
)
acl_tensor
->
commit
();
tensor
=
(
std
::
unique_ptr
<
ACLTensor
>
)
std
::
move
(
acl_tensor
);
return
true
;
}
bool
ACLOperator
::
new_tensor
(
std
::
unique_ptr
<
ACLSubTensor
>&
tensor
,
std
::
unique_ptr
<
ACLTensor
>&
parent
,
arm_compute
::
TensorShape
&
shape
,
arm_compute
::
Coordinates
&
coord
)
{
auto
acl_tensor
=
new
ACLSubTensor
(
parent
,
shape
,
coord
);
acl_tensor
->
set_target
(
getTargetHint
());
tensor
=
(
std
::
unique_ptr
<
ACLSubTensor
>
)
std
::
move
(
acl_tensor
);
return
true
;
}
void
ACLTensor
::
commit
(
TensorType
type
)
{
settensortype
(
type
);
if
(
mem_
)
{
if
(
!
allocate_
)
{
#ifdef USE_PROFILING
logtime_util
log_time
(
ACL_ALLOCATE_INFO
);
#endif // USE_PROFILING
allocate
();
allocate_
=
true
;
}
if
(
type_
!=
tensor_output
)
{
tensor_copy
(
mem_
);
}
mem_
=
nullptr
;
}
}
int
BaseACLTensor
::
tensor_copy
(
arm_compute
::
ITensor
*
tensor
,
void
*
mem
,
bool
toTensor
)
{
#ifdef USE_PROFILING
logtime_util
log_time
(
ACL_COPY_INFO
);
#endif // USE_PROFILING
arm_compute
::
Window
window
;
// Iterate through the rows (not each element)
window
.
use_tensor_dimensions
(
tensor
->
info
()
->
tensor_shape
(),
/* first_dimension =*/
arm_compute
::
Window
::
DimY
);
int
width
=
tensor
->
info
()
->
tensor_shape
()[
0
];
int
height
=
tensor
->
info
()
->
tensor_shape
()[
1
];
int
deepth
=
tensor
->
info
()
->
tensor_shape
()[
2
];
map
();
// Create an iterator:
arm_compute
::
Iterator
it
(
tensor
,
window
);
// Except it works for an arbitrary number of dimensions
if
(
toTensor
)
{
// mem->tensor
arm_compute
::
execute_window_loop
(
window
,
[
&
](
const
arm_compute
::
Coordinates
&
id
)
{
memcpy
(
it
.
ptr
(),
((
char
*
)
mem
)
+
((
id
[
3
]
*
(
width
*
height
*
deepth
)
+
id
.
z
()
*
(
width
*
height
)
+
id
.
y
()
*
width
+
id
.
x
())
*
tensor
->
info
()
->
element_size
()),
width
*
tensor
->
info
()
->
element_size
());
},
it
);
}
else
{
// tensor-->mem
arm_compute
::
execute_window_loop
(
window
,
[
&
](
const
arm_compute
::
Coordinates
&
id
)
{
memcpy
(((
char
*
)
mem
)
+
((
id
[
3
]
*
(
width
*
height
*
deepth
)
+
id
.
z
()
*
(
width
*
height
)
+
id
.
y
()
*
width
)
*
tensor
->
info
()
->
element_size
()),
it
.
ptr
(),
width
*
tensor
->
info
()
->
element_size
());
},
it
);
}
unmap
();
return
0
;
}
}
// namespace acl
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/mali/acl_operator.h
已删除
100755 → 0
浏览文件 @
ad3844d6
此差异已折叠。
点击以展开。
src/operators/kernel/mali/acl_tensor.cc
已删除
100755 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "acl_tensor.h"
namespace
paddle_mobile
{
namespace
operators
{
namespace
acl
{
#ifdef USE_ACL
template
<
typename
TensorType
>
std
::
unique_ptr
<
arm_compute
::
ITensor
>
initialise_tensor
(
arm_compute
::
TensorInfo
&
info
)
{
auto
tensor
=
cpp14
::
make_unique
<
TensorType
>
();
tensor
->
allocator
()
->
init
(
info
);
return
std
::
move
(
tensor
);
}
template
<
typename
TensorType
>
void
tensor_allocate
(
arm_compute
::
ITensor
&
tensor
)
{
auto
itensor
=
dynamic_cast
<
TensorType
*>
(
&
tensor
);
itensor
->
allocator
()
->
allocate
();
}
Tensor
::
Tensor
(
arm_compute
::
TensorInfo
&
info
)
noexcept
:
_target
(
TargetHint
::
DONT_CARE
),
_info
(
info
),
_tensor
(
nullptr
)
{}
Tensor
::
Tensor
(
Tensor
&&
src
)
noexcept
:
_target
(
src
.
_target
),
_info
(
std
::
move
(
src
.
_info
)),
_tensor
(
std
::
move
(
src
.
_tensor
))
{}
arm_compute
::
ITensor
*
Tensor
::
set_target
(
TargetHint
target
)
{
switch
(
target
)
{
#ifdef USE_OPENCL
case
TargetHint
::
OPENCL
:
_tensor
=
initialise_tensor
<
arm_compute
::
CLTensor
>
(
_info
);
break
;
#elif defined(USE_OPENGLES)
case
TargetHint
::
OPENGLES
:
_tensor
=
initialise_tensor
<
arm_compute
::
GCTensor
>
(
_info
);
break
;
#endif
case
TargetHint
::
NEON
:
_tensor
=
initialise_tensor
<
arm_compute
::
Tensor
>
(
_info
);
break
;
default:
break
;
}
_target
=
target
;
return
_tensor
.
get
();
}
void
Tensor
::
allocate
()
{
switch
(
_target
)
{
#ifdef USE_OPENCL
case
TargetHint
::
OPENCL
:
tensor_allocate
<
arm_compute
::
CLTensor
>
(
*
_tensor
);
break
;
#elif defined(USE_OPENGLES)
case
TargetHint
::
OPENGLES
:
tensor_allocate
<
arm_compute
::
GCTensor
>
(
*
_tensor
);
break
;
#endif
case
TargetHint
::
NEON
:
tensor_allocate
<
arm_compute
::
Tensor
>
(
*
_tensor
);
break
;
default:
break
;
}
}
void
Tensor
::
map
(
bool
blocking
)
{
#ifdef USE_OPENCL
if
(
_target
==
TargetHint
::
OPENCL
)
dynamic_cast
<
arm_compute
::
CLTensor
*>
(
tensor
())
->
map
(
blocking
);
#elif defined(USE_OPENGLES)
if
(
_target
==
TargetHint
::
OPENGLES
)
dynamic_cast
<
arm_compute
::
GCTensor
*>
(
tensor
())
->
map
(
blocking
);
#endif
}
void
Tensor
::
unmap
()
{
#ifdef USE_OPENCL
if
(
_target
==
TargetHint
::
OPENCL
)
dynamic_cast
<
arm_compute
::
CLTensor
*>
(
tensor
())
->
unmap
();
#elif defined(USE_OPENGLES)
if
(
_target
==
TargetHint
::
OPENGLES
)
dynamic_cast
<
arm_compute
::
GCTensor
*>
(
tensor
())
->
unmap
();
#endif
}
template
<
typename
SubTensorType
,
typename
ParentTensorType
>
std
::
unique_ptr
<
arm_compute
::
ITensor
>
initialise_subtensor
(
arm_compute
::
ITensor
*
parent
,
arm_compute
::
TensorShape
shape
,
arm_compute
::
Coordinates
coords
)
{
auto
ptensor
=
dynamic_cast
<
ParentTensorType
*>
(
parent
);
auto
subtensor
=
cpp14
::
make_unique
<
SubTensorType
>
(
ptensor
,
shape
,
coords
);
return
std
::
move
(
subtensor
);
}
SubTensor
::
SubTensor
(
Tensor
*
parent
,
arm_compute
::
TensorShape
&
tensor_shape
,
arm_compute
::
Coordinates
&
coords
)
noexcept
:
_target
(
TargetHint
::
DONT_CARE
),
_tensor_shape
(
tensor_shape
),
_coords
(
coords
),
_parent
(
nullptr
),
_subtensor
(
nullptr
)
{
_parent
=
parent
->
tensor
();
_target
=
parent
->
target
();
instantiate_subtensor
();
}
arm_compute
::
ITensor
*
SubTensor
::
set_target
(
TargetHint
target
)
{
return
(
target
==
_target
)
?
_subtensor
.
get
()
:
nullptr
;
}
arm_compute
::
ITensor
*
SubTensor
::
tensor
()
{
return
_subtensor
.
get
();
}
const
arm_compute
::
ITensor
*
SubTensor
::
tensor
()
const
{
return
_subtensor
.
get
();
}
TargetHint
SubTensor
::
target
()
const
{
return
_target
;
}
void
SubTensor
::
allocate
()
{
// NOP for sub-tensors
}
void
SubTensor
::
instantiate_subtensor
()
{
switch
(
_target
)
{
#ifdef USE_OPENCL
case
TargetHint
::
OPENCL
:
_subtensor
=
initialise_subtensor
<
arm_compute
::
CLSubTensor
,
arm_compute
::
ICLTensor
>
(
_parent
,
_tensor_shape
,
_coords
);
break
;
#endif
default:
case
TargetHint
::
NEON
:
_subtensor
=
initialise_subtensor
<
arm_compute
::
SubTensor
,
arm_compute
::
ITensor
>
(
_parent
,
_tensor_shape
,
_coords
);
break
;
}
}
#endif
}
// namespace acl
}
// namespace operators
}
// namespace paddle_mobile
src/operators/kernel/mali/acl_tensor.h
已删除
100755 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef ACL_TENSOR_H_
#define ACL_TENSOR_H_
#ifdef USE_ACL
#ifdef USE_OPENCL
#include "arm_compute/runtime/CL/CLSubTensor.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#elif defined(USE_OPENGLES)
#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
#endif
#include "arm_compute/runtime/SubTensor.h"
#include "arm_compute/runtime/Tensor.h"
#include <memory>
namespace
paddle_mobile
{
namespace
operators
{
namespace
acl
{
enum
class
TargetHint
{
DONT_CARE
,
OPENCL
,
OPENGLES
,
NEON
,
};
enum
class
ConvolutionMethodHint
{
GEMM
,
DIRECT
,
};
namespace
cpp14
{
template
<
class
T
>
struct
_Unique_if
{
typedef
std
::
unique_ptr
<
T
>
_Single_object
;
};
template
<
class
T
>
struct
_Unique_if
<
T
[]
>
{
typedef
std
::
unique_ptr
<
T
[]
>
_Unknown_bound
;
};
template
<
class
T
,
size_t
N
>
struct
_Unique_if
<
T
[
N
]
>
{
typedef
void
_Known_bound
;
};
template
<
class
T
,
class
...
Args
>
typename
_Unique_if
<
T
>::
_Single_object
make_unique
(
Args
&&
...
args
)
{
return
std
::
unique_ptr
<
T
>
(
new
T
(
std
::
forward
<
Args
>
(
args
)...));
}
template
<
class
T
>
typename
_Unique_if
<
T
>::
_Unknown_bound
make_unique
(
size_t
n
)
{
typedef
typename
std
::
remove_extent
<
T
>::
type
U
;
return
std
::
unique_ptr
<
T
>
(
new
U
[
n
]());
}
template
<
class
T
,
class
...
Args
>
typename
_Unique_if
<
T
>::
_Known_bound
make_unique
(
Args
&&
...);
}
// namespace cpp14
class
Tensor
{
public:
explicit
Tensor
(
arm_compute
::
TensorInfo
&
info
)
noexcept
;
virtual
~
Tensor
()
{}
Tensor
(
Tensor
&&
src
)
noexcept
;
void
set_info
(
arm_compute
::
TensorInfo
&&
info
)
{
_info
=
info
;
}
arm_compute
::
ITensor
*
set_target
(
TargetHint
target
);
const
arm_compute
::
TensorInfo
&
info
()
const
{
return
_info
;
}
arm_compute
::
ITensor
*
tensor
()
{
return
_tensor
.
get
();
}
void
allocate
();
void
init
()
{}
TargetHint
target
()
const
{
return
_target
;
}
virtual
void
map
(
bool
blocking
=
true
);
virtual
void
unmap
();
private:
TargetHint
_target
;
arm_compute
::
TensorInfo
_info
;
std
::
unique_ptr
<
arm_compute
::
ITensor
>
_tensor
;
};
class
SubTensor
{
public:
SubTensor
(
Tensor
*
parent
,
arm_compute
::
TensorShape
&
tensor_shape
,
arm_compute
::
Coordinates
&
coords
)
noexcept
;
~
SubTensor
()
{}
arm_compute
::
ITensor
*
tensor
();
const
arm_compute
::
ITensor
*
tensor
()
const
;
TargetHint
target
()
const
;
void
allocate
();
arm_compute
::
ITensor
*
set_target
(
TargetHint
target
);
private:
/** Instantiates a sub-tensor */
void
instantiate_subtensor
();
private:
/**< Target that this tensor is pinned on */
TargetHint
_target
;
/**< SubTensor shape */
arm_compute
::
TensorShape
_tensor_shape
;
/**< SubTensor Coordinates */
arm_compute
::
Coordinates
_coords
;
/**< Parent tensor */
arm_compute
::
ITensor
*
_parent
;
/**< SubTensor */
std
::
unique_ptr
<
arm_compute
::
ITensor
>
_subtensor
;
};
}
// namespace acl
}
// namespace operators
}
// namespace paddle_mobile
#endif
#endif // ACL_TENSOR_H_
src/operators/kernel/mali/batchnorm_kernel.cpp
已删除
100755 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef BATCHNORM_OP
#include "operators/kernel/batchnorm_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
DeviceType
,
typename
T
>
class
AclBatchNormOp
:
public
acl
::
ACLOperator
{
public:
AclBatchNormOp
()
{
this
->
force_bypass_acl_path_
=
bypass_acl_class_layer
&
FLAGS_ENABLE_ACL_BN
;
}
~
AclBatchNormOp
()
=
default
;
AclBatchNormOp
(
const
AclBatchNormOp
&
)
=
delete
;
AclBatchNormOp
&
operator
=
(
const
AclBatchNormOp
&
)
=
delete
;
AclBatchNormOp
(
AclBatchNormOp
&&
)
=
delete
;
AclBatchNormOp
&
operator
=
(
AclBatchNormOp
&&
)
=
delete
;
acl
::
AclParameters
&
getargs
()
{
return
args
;
}
void
InitAclLayer
(
const
BatchNormParam
<
DeviceType
>&
param
)
{
setTargetHint
(
acl
::
TargetHint
::
OPENCL
);
arm_compute
::
TensorShape
input_shape
(
args
.
in_cols
,
args
.
in_rows
,
args
.
in_depth
,
args
.
batch
);
arm_compute
::
TensorShape
output_shape
(
args
.
out_cols
,
args
.
out_rows
,
args
.
out_depth
,
args
.
out_num
);
if
(
is_operator_init_done
(
input_shape
))
return
;
set_operator_init_done
();
this
->
force_bypass_acl_path_
=
false
;
arm_compute
::
TensorShape
mean_shape
(
args
.
in_depth
);
arm_compute
::
TensorShape
var_shape
=
mean_shape
;
arm_compute
::
TensorShape
beta_shape
=
mean_shape
;
arm_compute
::
TensorShape
gamma_shape
=
mean_shape
;
//[width, height, IFM]
new_tensor
(
input
(),
input_shape
,
args
.
input_data
);
//[width, height, OFM]
new_tensor
(
output
(),
output_shape
,
args
.
output_data
);
new_tensor
(
mean
(),
mean_shape
,
args
.
mean_data
);
new_tensor
(
var
(),
var_shape
,
args
.
var_data
);
new_tensor
(
beta
(),
beta_shape
,
args
.
biases_data
);
new_tensor
(
gamma
(),
gamma_shape
,
args
.
weight_data
);
acl_configure
(
bn
,
this
,
args
.
epsilon
);
}
void
RunAcl
(
void
*
input
,
void
*
output
)
{
acl
::
ACLOperator
::
acl_run
(
input
,
output
);
}
bool
Bypass_acl
(
const
BatchNormParam
<
DeviceType
>&
param
)
{
bool
bypass_acl
=
false
;
AclParametersByContext
(
param
);
InitAclLayer
(
param
);
// for performance, more groups impact GPU performance
if
(
this
->
force_bypass_acl_path_
)
{
bypass_acl
=
true
;
}
return
bypass_acl
;
}
private:
void
AclParametersByContext
(
const
BatchNormParam
<
DeviceType
>&
param
)
{
const
Tensor
*
in_x
=
param
.
InputX
();
Tensor
*
out
=
param
.
OutputY
();
const
Tensor
*
scale
=
param
.
InputScale
();
const
Tensor
*
bias
=
param
.
InputBias
();
const
Tensor
*
saved_mean
=
param
.
InputMean
();
const
Tensor
*
saved_variance
=
param
.
InputVariance
();
const
T
*
input_data
=
in_x
->
data
<
T
>
();
T
*
output_data
=
out
->
mutable_data
<
T
>
();
const
T
*
weight_data
=
scale
->
data
<
T
>
();
const
T
*
bias_data
=
bias
->
data
<
T
>
();
const
T
*
mean_data
=
saved_mean
->
data
<
T
>
();
const
T
*
var_data
=
saved_variance
->
data
<
T
>
();
float
epsilon
=
param
.
Epsilon
();
args
.
input_data
=
(
void
*
)
input_data
;
args
.
output_data
=
(
void
*
)
output_data
;
// args.weight_data = (void*)weight_data;
// args.biases_data = (void*)bias_data;
args
.
mean_data
=
(
void
*
)
mean_data
;
args
.
var_data
=
(
void
*
)
var_data
;
args
.
epsilon
=
epsilon
;
args
.
dim
=
in_x
->
dims
().
size
();
args
.
batch
=
in_x
->
dims
()[
0
];
args
.
in_depth
=
in_x
->
dims
()[
1
];
args
.
in_rows
=
in_x
->
dims
()[
2
];
args
.
in_cols
=
in_x
->
dims
()[
3
];
args
.
out_num
=
out
->
dims
()[
0
];
args
.
out_depth
=
out
->
dims
()[
1
];
args
.
out_rows
=
out
->
dims
()[
2
];
args
.
out_cols
=
out
->
dims
()[
3
];
args
.
weight_data
=
(
void
*
)
weight_data
;
args
.
biases_data
=
(
void
*
)
bias_data
;
// std::cout
// << "Out C: " << args.out_depth
// << " H: " << args.out_rows << " W: " << args.out_cols << "\n";
}
acl
::
AclParameters
args
;
};
template
<
>
bool
BatchNormKernel
<
GPU_MALI
,
float
>::
Init
(
BatchNormParam
<
GPU_MALI
>*
param
)
{
AclBatchNormOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclBatchNormOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
acl_op
=
new
AclBatchNormOp
<
GPU_MALI
,
float
>
();
this
->
SetAclOp
((
void
*
)
acl_op
,
(
void
*
)
this
);
}
if
(
acl_op
->
Bypass_acl
(
*
param
))
{
std
::
cout
<<
"init acl failed"
<<
std
::
endl
;
return
false
;
}
return
true
;
}
template
<
>
void
BatchNormKernel
<
GPU_MALI
,
float
>::
Compute
(
const
BatchNormParam
<
GPU_MALI
>&
param
)
{
std
::
cout
<<
"init acl"
<<
std
::
endl
;
AclBatchNormOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclBatchNormOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
return
;
}
acl
::
AclParameters
&
args
=
acl_op
->
getargs
();
acl_op
->
RunAcl
(
args
.
input_data
,
args
.
output_data
);
}
template
class
BatchNormKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
#endif
src/operators/kernel/mali/concat_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef CONCAT_OP
#include "operators/kernel/concat_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
DeviceType
,
typename
T
>
class
AclConcatOp
:
public
acl
::
ACLOperator
{
public:
AclConcatOp
()
{
this
->
force_bypass_acl_path_
=
bypass_acl_class_layer
&
FLAGS_ENABLE_ACL_CONCAT
;
}
~
AclConcatOp
()
=
default
;
AclConcatOp
(
const
AclConcatOp
&
)
=
delete
;
AclConcatOp
&
operator
=
(
const
AclConcatOp
&
)
=
delete
;
AclConcatOp
(
AclConcatOp
&&
)
=
delete
;
AclConcatOp
&
operator
=
(
AclConcatOp
&&
)
=
delete
;
acl
::
AclParameters
&
getargs
()
{
return
args
;
}
void
InitAclLayer
(
const
ConcatParam
<
DeviceType
>&
param
)
{
setTargetHint
(
acl
::
TargetHint
::
OPENCL
);
const
std
::
vector
<
framework
::
LoDTensor
*>*
input_data
=
&
args
.
in_tensor
;
arm_compute
::
TensorShape
output_shape
(
args
.
out_cols
,
args
.
out_rows
,
args
.
out_depth
,
args
.
batch
);
if
(
is_operator_init_done
(
output_shape
))
return
;
set_operator_init_done
();
this
->
force_bypass_acl_path_
=
false
;
T
type
;
for
(
int
i
=
0
;
i
<
input_data
->
size
();
i
++
)
{
int
in_batch
=
(
*
input_data
)[
i
]
->
dims
()[
0
];
int
in_channels
=
(
*
input_data
)[
i
]
->
dims
()[
1
];
int
in_width
=
(
*
input_data
)[
i
]
->
dims
()[
2
];
int
in_height
=
(
*
input_data
)[
i
]
->
dims
()[
3
];
arm_compute
::
TensorShape
in_shape
(
in_width
,
in_height
,
in_channels
);
new_tensor
(
cinput
(
i
),
in_shape
,
acl
::
InputdataPtr
(
this
,
args
.
in_tensor
,
type
,
i
));
}
//[width, height, OFM]
new_tensor
(
output
(),
output_shape
,
args
.
output_data
);
acl_configure
(
concat
,
this
,
input_data
->
size
());
}
void
RunAcl
(
const
std
::
vector
<
framework
::
LoDTensor
*>&
input
,
void
*
output
)
{
T
type
;
acl
::
acl_run
(
this
,
input
,
output
,
type
);
}
bool
Bypass_acl
(
const
ConcatParam
<
DeviceType
>&
param
)
{
bool
bypass_acl
=
false
;
AclParametersByContext
(
param
);
InitAclLayer
(
param
);
// for performance, more groups impact GPU performance
if
(
this
->
force_bypass_acl_path_
||
!
args
.
is_channel_concat
)
{
bypass_acl
=
true
;
}
return
bypass_acl
;
}
private:
void
AclParametersByContext
(
const
ConcatParam
<
DeviceType
>&
param
)
{
auto
inputs
=
param
.
Inputs
();
auto
*
output
=
param
.
Out
();
int64_t
axis
=
param
.
Axis
();
T
*
output_data
=
output
->
mutable_data
<
T
>
();
args
.
is_channel_concat
=
(
axis
==
1
);
args
.
in_tensor
=
inputs
;
args
.
output_data
=
(
void
*
)
output_data
;
args
.
batch
=
output
->
dims
()[
0
];
args
.
out_depth
=
output
->
dims
()[
1
];
args
.
out_rows
=
output
->
dims
()[
2
];
args
.
out_cols
=
output
->
dims
()[
3
];
}
acl
::
AclParameters
args
;
};
template
<
>
bool
ConcatKernel
<
GPU_MALI
,
float
>::
Init
(
ConcatParam
<
GPU_MALI
>*
param
)
{
AclConcatOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclConcatOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
acl_op
=
new
AclConcatOp
<
GPU_MALI
,
float
>
();
this
->
SetAclOp
((
void
*
)
acl_op
,
(
void
*
)
this
);
}
if
(
acl_op
->
Bypass_acl
(
*
param
))
{
std
::
cout
<<
"init acl failed"
<<
std
::
endl
;
return
false
;
}
return
true
;
}
template
<
>
void
ConcatKernel
<
GPU_MALI
,
float
>::
Compute
(
const
ConcatParam
<
GPU_MALI
>&
param
)
{
std
::
cout
<<
"init acl"
<<
std
::
endl
;
AclConcatOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclConcatOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
return
;
}
acl
::
AclParameters
&
args
=
acl_op
->
getargs
();
acl_op
->
RunAcl
(
args
.
in_tensor
,
args
.
output_data
);
}
template
class
ConcatKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
#endif
src/operators/kernel/mali/conv_add_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADD_OP
#include "operators/kernel/conv_add_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
DeviceType
,
typename
T
>
class
AclConvAddOp
:
public
acl
::
ACLOperator
{
public:
AclConvAddOp
()
{
this
->
force_bypass_acl_path_
=
bypass_acl_class_layer
&
FLAGS_ENABLE_ACL_CONV
;
}
~
AclConvAddOp
()
=
default
;
AclConvAddOp
(
const
AclConvAddOp
&
)
=
delete
;
AclConvAddOp
&
operator
=
(
const
AclConvAddOp
&
)
=
delete
;
AclConvAddOp
(
AclConvAddOp
&&
)
=
delete
;
AclConvAddOp
&
operator
=
(
AclConvAddOp
&&
)
=
delete
;
acl
::
AclParameters
&
getargs
()
{
return
args
;
}
void
InitAclLayer
(
const
FusionConvAddParam
<
DeviceType
>&
param
)
{
setTargetHint
(
acl
::
TargetHint
::
OPENCL
);
arm_compute
::
TensorShape
input_shape
(
args
.
in_cols
,
args
.
in_rows
,
args
.
in_depth
,
args
.
batch
);
arm_compute
::
TensorShape
output_shape
(
args
.
out_cols
,
args
.
out_rows
,
args
.
out_depth
,
args
.
out_num
);
arm_compute
::
TensorShape
weights_shape
(
args
.
filter_cols
,
args
.
filter_rows
,
args
.
in_depth
/
args
.
num_group
,
args
.
out_depth
);
arm_compute
::
TensorShape
biases_shape
(
args
.
out_depth
);
arm_compute
::
PadStrideInfo
conv_info
(
args
.
stride_cols
,
args
.
stride_rows
,
args
.
pad_cols
,
args
.
pad_rows
,
arm_compute
::
DimensionRoundingType
::
FLOOR
);
if
(
is_operator_init_done
(
input_shape
))
return
;
set_operator_init_done
();
this
->
force_bypass_acl_path_
=
false
;
// check_direct_conv();
group
()
=
args
.
num_group
;
//[kernel_x, kernel_y, IFM, OFM]
new_tensor
(
weights
(),
weights_shape
,
args
.
weight_data
);
//[OFM]
if
(
args
.
biases_data
)
{
new_tensor
(
biases
(),
biases_shape
,
args
.
biases_data
);
}
//[width, height, IFM]
new_tensor
(
input
(),
input_shape
,
args
.
input_data
);
//[width, height, OFM]
new_tensor
(
output
(),
output_shape
,
args
.
output_data
);
acl_configure
(
conv
,
this
,
conv_info
);
}
void
RunAcl
(
void
*
input
,
void
*
output
)
{
acl
::
ACLOperator
::
acl_run
(
input
,
output
);
}
bool
Bypass_acl
(
const
FusionConvAddParam
<
DeviceType
>&
param
)
{
bool
bypass_acl
=
false
;
AclParametersByContext
(
param
);
InitAclLayer
(
param
);
// for performance, more groups impact GPU performance
if
(
this
->
force_bypass_acl_path_
||
args
.
num_group
>=
5
)
{
bypass_acl
=
true
;
}
if
(
args
.
dim
>
2
)
{
bypass_acl
=
true
;
}
if
(
args
.
dilated
)
{
bypass_acl
=
true
;
}
return
bypass_acl
;
}
private:
void
check_direct_conv
()
{
bool
use_direct_conv
=
false
;
const
char
*
pDirectConv
;
pDirectConv
=
getenv
(
"DIRECTCONV"
);
if
(
pDirectConv
)
{
unsigned
int
bdirectconv
;
sscanf
(
pDirectConv
,
"%i"
,
&
bdirectconv
);
if
(
bdirectconv
!=
use_direct_conv
)
{
use_direct_conv
=
bdirectconv
;
printf
(
"DIRECTCONV<%s>
\n
"
,
pDirectConv
);
printf
(
"DIRECTCONV: %x
\n
"
,
use_direct_conv
);
}
}
int
pad_data
[
2
],
kernel
[
2
];
pad_data
[
1
]
=
args
.
pad_rows
;
pad_data
[
0
]
=
args
.
pad_cols
;
kernel
[
1
]
=
args
.
filter_rows
;
kernel
[
0
]
=
args
.
filter_cols
;
if
(
use_direct_conv
&&
((
kernel
[
0
]
==
1
&&
kernel
[
1
]
==
1
&&
pad_data
[
0
]
==
0
&&
pad_data
[
1
]
==
0
)
||
(
kernel
[
0
]
==
3
&&
kernel
[
1
]
==
3
&&
pad_data
[
0
]
<=
1
&&
pad_data
[
1
]
<=
1
)))
{
setConvMethod
();
// NEDirectConvolutionLayer only for 1x1 and 3x3
}
}
void
AclParametersByContext
(
const
FusionConvAddParam
<
DeviceType
>&
param
)
{
const
Tensor
*
input
=
param
.
Input
();
Tensor
filter
=
*
param
.
Filter
();
Tensor
*
output
=
param
.
Output
();
Tensor
*
bias
;
int
groups
=
param
.
Groups
();
std
::
vector
<
int
>
strides
=
param
.
Strides
();
std
::
vector
<
int
>
paddings
=
param
.
Paddings
();
std
::
vector
<
int
>
dilations
=
param
.
Dilations
();
const
T
*
input_data
=
input
->
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
();
const
T
*
weight_data
=
filter
.
data
<
T
>
();
args
.
input_data
=
(
void
*
)
input_data
;
args
.
output_data
=
(
void
*
)
output_data
;
args
.
weight_data
=
(
void
*
)
weight_data
;
args
.
biases_data
=
nullptr
;
try
{
bias
=
param
.
Bias
();
}
catch
(
const
std
::
exception
&
e
)
{
}
if
(
bias
)
{
const
T
*
biases_data
=
bias
->
data
<
T
>
();
args
.
biases_data
=
(
void
*
)
biases_data
;
}
args
.
num_group
=
groups
;
args
.
dilation_rows
=
dilations
[
0
];
args
.
dilation_cols
=
dilations
[
1
];
if
(
dilations
[
0
]
!=
1
||
dilations
[
1
]
!=
1
)
{
args
.
dilated
=
true
;
}
// NCHW
// std::cout << "In dims: " << (input->dims()).size() << std::endl;
args
.
batch
=
input
->
dims
()[
0
];
args
.
in_depth
=
input
->
dims
()[
1
];
args
.
in_rows
=
input
->
dims
()[
2
];
args
.
in_cols
=
input
->
dims
()[
3
];
// std::cout <<"In N: " << args.batch << " C: " << args.in_depth
// << " H: " << args.in_rows << " W: " << args.in_cols << "\n";
// NCHW
// std::cout << "Out dims: " << (output->dims()).size() << std::endl;
args
.
out_num
=
output
->
dims
()[
0
];
args
.
out_depth
=
output
->
dims
()[
1
];
args
.
out_rows
=
output
->
dims
()[
2
];
args
.
out_cols
=
output
->
dims
()[
3
];
// std::cout <<"Out N: " << static_cast<int>(output->dims()[0])
// << " C: " << args.out_depth
// << " H: " << args.out_rows << " W: " << args.out_cols << "\n";
// MCHW = OIHW
args
.
filter_rows
=
filter
.
dims
()[
2
];
args
.
filter_cols
=
filter
.
dims
()[
3
];
// std::cout <<"Filter O: " << static_cast<int>(filter.dims()[0])
// << " I: " << static_cast<int>(filter.dims()[1])
// << " H: " << args.filter_rows << " W: " << args.filter_cols << "\n";
// strides(h_stride, w_stride)
args
.
stride_rows
=
strides
[
0
];
args
.
stride_cols
=
strides
[
1
];
// std::cout <<"Stride H: " << args.stride_rows << " W: " <<
// args.stride_cols << "\n";
// paddings(h_pad, w_pad)
args
.
pad_rows
=
paddings
[
0
];
args
.
pad_cols
=
paddings
[
1
];
// std::cout <<"Pad H: " << args.pad_rows << " W: " << args.pad_cols <<
// "\n";
}
acl
::
AclParameters
args
;
};
template
<
>
bool
ConvAddKernel
<
GPU_MALI
,
float
>::
Init
(
FusionConvAddParam
<
GPU_MALI
>*
param
)
{
AclConvAddOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclConvAddOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
acl_op
=
new
AclConvAddOp
<
GPU_MALI
,
float
>
();
this
->
SetAclOp
((
void
*
)
acl_op
,
(
void
*
)
this
);
}
if
(
acl_op
->
Bypass_acl
(
*
param
))
{
std
::
cout
<<
"init acl failed"
<<
std
::
endl
;
return
false
;
}
return
true
;
}
template
<
>
void
ConvAddKernel
<
GPU_MALI
,
float
>::
Compute
(
const
FusionConvAddParam
<
GPU_MALI
>&
param
)
{
std
::
cout
<<
"init acl"
<<
std
::
endl
;
AclConvAddOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclConvAddOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
return
;
}
acl
::
AclParameters
&
args
=
acl_op
->
getargs
();
acl_op
->
RunAcl
(
args
.
input_data
,
args
.
output_data
);
}
template
class
ConvAddKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
#endif
src/operators/kernel/mali/conv_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef CONV_OP
#include "operators/kernel/conv_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
DeviceType
,
typename
T
>
class
AclConvOp
:
public
acl
::
ACLOperator
{
public:
AclConvOp
()
{
this
->
force_bypass_acl_path_
=
bypass_acl_class_layer
&
FLAGS_ENABLE_ACL_CONV
;
}
~
AclConvOp
()
=
default
;
AclConvOp
(
const
AclConvOp
&
)
=
delete
;
AclConvOp
&
operator
=
(
const
AclConvOp
&
)
=
delete
;
AclConvOp
(
AclConvOp
&&
)
=
delete
;
AclConvOp
&
operator
=
(
AclConvOp
&&
)
=
delete
;
acl
::
AclParameters
&
getargs
()
{
return
args
;
}
void
InitAclLayer
(
const
ConvParam
<
DeviceType
>&
param
)
{
setTargetHint
(
acl
::
TargetHint
::
OPENCL
);
arm_compute
::
TensorShape
input_shape
(
args
.
in_cols
,
args
.
in_rows
,
args
.
in_depth
,
args
.
batch
);
arm_compute
::
TensorShape
output_shape
(
args
.
out_cols
,
args
.
out_rows
,
args
.
out_depth
,
args
.
out_num
);
arm_compute
::
TensorShape
weights_shape
(
args
.
filter_cols
,
args
.
filter_rows
,
args
.
in_depth
/
args
.
num_group
,
args
.
out_depth
);
// arm_compute::TensorShape biases_shape(args.out_depth);
arm_compute
::
PadStrideInfo
conv_info
(
args
.
stride_cols
,
args
.
stride_rows
,
args
.
pad_cols
,
args
.
pad_rows
,
arm_compute
::
DimensionRoundingType
::
FLOOR
);
if
(
is_operator_init_done
(
input_shape
))
return
;
set_operator_init_done
();
this
->
force_bypass_acl_path_
=
false
;
check_direct_conv
();
//[kernel_x, kernel_y, IFM, OFM]
new_tensor
(
weights
(),
weights_shape
,
args
.
weight_data
);
//[OFM]
// if (args.biases_data) {
// new_tensor(biases(),biases_shape,args.biases_data);
//}
group
()
=
args
.
num_group
;
//[width, height, IFM]
new_tensor
(
input
(),
input_shape
,
args
.
input_data
);
//[width, height, OFM]
new_tensor
(
output
(),
output_shape
,
args
.
output_data
);
acl_configure
(
conv
,
this
,
conv_info
);
}
void
RunAcl
(
void
*
input
,
void
*
output
)
{
acl
::
ACLOperator
::
acl_run
(
input
,
output
);
}
bool
Bypass_acl
(
const
ConvParam
<
DeviceType
>&
param
)
{
bool
bypass_acl
=
false
;
AclParametersByContext
(
param
);
InitAclLayer
(
param
);
// for performance, more groups impact GPU performance
if
(
this
->
force_bypass_acl_path_
||
args
.
num_group
>=
5
)
{
bypass_acl
=
true
;
}
if
(
args
.
dim
>
2
)
{
bypass_acl
=
true
;
}
if
(
args
.
dilated
)
{
bypass_acl
=
true
;
}
return
bypass_acl
;
}
private:
void
check_direct_conv
()
{
bool
use_direct_conv
=
false
;
const
char
*
pDirectConv
;
pDirectConv
=
getenv
(
"DIRECTCONV"
);
if
(
pDirectConv
)
{
unsigned
int
bdirectconv
;
sscanf
(
pDirectConv
,
"%i"
,
&
bdirectconv
);
if
(
bdirectconv
!=
use_direct_conv
)
{
use_direct_conv
=
bdirectconv
;
printf
(
"DIRECTCONV<%s>
\n
"
,
pDirectConv
);
printf
(
"DIRECTCONV: %x
\n
"
,
use_direct_conv
);
}
}
int
pad_data
[
2
],
kernel
[
2
];
pad_data
[
1
]
=
args
.
pad_rows
;
pad_data
[
0
]
=
args
.
pad_cols
;
kernel
[
1
]
=
args
.
filter_rows
;
kernel
[
0
]
=
args
.
filter_cols
;
if
(
use_direct_conv
&&
((
kernel
[
0
]
==
1
&&
kernel
[
1
]
==
1
&&
pad_data
[
0
]
==
0
&&
pad_data
[
1
]
==
0
)
||
(
kernel
[
0
]
==
3
&&
kernel
[
1
]
==
3
&&
pad_data
[
0
]
<=
1
&&
pad_data
[
1
]
<=
1
)))
{
setConvMethod
();
// NEDirectConvolutionLayer only for 1x1 and 3x3
}
}
void
AclParametersByContext
(
const
ConvParam
<
DeviceType
>&
param
)
{
const
Tensor
*
input
=
param
.
Input
();
Tensor
filter
=
*
param
.
Filter
();
Tensor
*
output
=
param
.
Output
();
int
groups
=
param
.
Groups
();
std
::
vector
<
int
>
strides
=
param
.
Strides
();
std
::
vector
<
int
>
paddings
=
param
.
Paddings
();
std
::
vector
<
int
>
dilations
=
param
.
Dilations
();
const
T
*
input_data
=
input
->
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
();
const
T
*
weight_data
=
filter
.
data
<
T
>
();
args
.
input_data
=
(
void
*
)
input_data
;
args
.
output_data
=
(
void
*
)
output_data
;
args
.
weight_data
=
(
void
*
)
weight_data
;
args
.
biases_data
=
nullptr
;
// try {
// bias = context.Input<framework::Tensor>("Bias");
// } catch (const std::exception& e) {
// }
// if (bias) {
// const T* biases_data = bias->data<T>();
// args.biases_data = (void*)biases_data;
// }
args
.
num_group
=
groups
;
args
.
dilation_rows
=
dilations
[
0
];
args
.
dilation_cols
=
dilations
[
1
];
if
(
dilations
[
0
]
!=
1
||
dilations
[
1
]
!=
1
)
{
args
.
dilated
=
true
;
}
// NCHW
// std::cout << "In dims: " << (input->dims()).size() << std::endl;
args
.
batch
=
input
->
dims
()[
0
];
args
.
in_depth
=
input
->
dims
()[
1
];
args
.
in_rows
=
input
->
dims
()[
2
];
args
.
in_cols
=
input
->
dims
()[
3
];
std
::
cout
<<
"In N: "
<<
args
.
batch
<<
" C: "
<<
args
.
in_depth
<<
" H: "
<<
args
.
in_rows
<<
" W: "
<<
args
.
in_cols
<<
"
\n
"
;
// NCHW
// std::cout << "Out dims: " << (output->dims()).size() << std::endl;
args
.
out_num
=
output
->
dims
()[
0
];
args
.
out_depth
=
output
->
dims
()[
1
];
args
.
out_rows
=
output
->
dims
()[
2
];
args
.
out_cols
=
output
->
dims
()[
3
];
// std::cout <<"Out N: " << static_cast<int>(output->dims()[0])
// << " C: " << args.out_depth
// << " H: " << args.out_rows << " W: " << args.out_cols << "\n";
// MCHW = OIHW
args
.
filter_rows
=
filter
.
dims
()[
2
];
args
.
filter_cols
=
filter
.
dims
()[
3
];
// std::cout <<"Filter O: " << static_cast<int>(filter.dims()[0])
// << " I: " << static_cast<int>(filter.dims()[1])
// << " H: " << args.filter_rows << " W: " << args.filter_cols << "\n";
// strides(h_stride, w_stride)
args
.
stride_rows
=
strides
[
0
];
args
.
stride_cols
=
strides
[
1
];
// std::cout <<"Stride H: " << args.stride_rows << " W: " <<
// args.stride_cols << "\n";
// paddings(h_pad, w_pad)
args
.
pad_rows
=
paddings
[
0
];
args
.
pad_cols
=
paddings
[
1
];
// std::cout <<"Pad H: " << args.pad_rows << " W: " << args.pad_cols <<
// "\n";
}
acl
::
AclParameters
args
;
};
template
<
>
bool
ConvKernel
<
GPU_MALI
,
float
>::
Init
(
ConvParam
<
GPU_MALI
>*
param
)
{
AclConvOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclConvOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
acl_op
=
new
AclConvOp
<
GPU_MALI
,
float
>
();
this
->
SetAclOp
((
void
*
)
acl_op
,
(
void
*
)
this
);
}
if
(
acl_op
->
Bypass_acl
(
*
param
))
{
std
::
cout
<<
"init acl failed"
<<
std
::
endl
;
return
false
;
}
return
true
;
}
template
<
>
void
ConvKernel
<
GPU_MALI
,
float
>::
Compute
(
const
ConvParam
<
GPU_MALI
>&
param
)
{
std
::
cout
<<
"init acl"
<<
std
::
endl
;
AclConvOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclConvOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
return
;
}
acl
::
AclParameters
&
args
=
acl_op
->
getargs
();
acl_op
->
RunAcl
(
args
.
input_data
,
args
.
output_data
);
}
template
class
ConvKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
#endif
src/operators/kernel/mali/elementwise_add_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISEADD_OP
#pragma once
#include "operators/kernel/elementwise_add_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
T
>
struct
AddFunctor
{
inline
T
operator
()(
T
a
,
T
b
)
const
{
return
a
+
b
;
}
};
template
<
>
bool
ElementwiseAddKernel
<
GPU_MALI
,
float
>::
Init
(
ElementwiseAddParam
<
GPU_MALI
>
*
param
)
{
return
true
;
}
template
<
>
void
ElementwiseAddKernel
<
GPU_MALI
,
float
>::
Compute
(
const
ElementwiseAddParam
<
GPU_MALI
>
&
param
)
{
const
Tensor
*
input_x
=
param
.
InputX
();
const
Tensor
*
input_y
=
param
.
InputY
();
Tensor
*
Out
=
param
.
Out
();
Out
->
mutable_data
<
float
>
();
int
axis
=
param
.
Axis
();
ElementwiseComputeEx
<
AddFunctor
<
float
>
,
float
>
(
input_x
,
input_y
,
axis
,
AddFunctor
<
float
>
(),
Out
);
}
template
class
ElementwiseAddKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/mali/feed_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/kernel/feed_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
FeedKernel
<
GPU_MALI
,
float
>::
Init
(
FeedParam
<
GPU_MALI
>
*
param
)
{
return
true
;
}
template
<
>
void
FeedKernel
<
GPU_MALI
,
float
>::
Compute
(
const
FeedParam
<
GPU_MALI
>
&
param
)
{
param
.
Out
()
->
ShareDataWith
(
*
(
param
.
InputX
()));
param
.
Out
()
->
set_lod
(
param
.
InputX
()
->
lod
());
}
template
class
FeedKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/mali/fetch_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADD_OP
#include "operators/kernel/fetch_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
FetchKernel
<
GPU_MALI
,
float
>::
Init
(
FetchParam
<
GPU_MALI
>
*
param
)
{
return
true
;
}
template
<
>
void
FetchKernel
<
GPU_MALI
,
float
>::
Compute
(
const
FetchParam
<
GPU_MALI
>
&
param
)
{
param
.
Out
()
->
ShareDataWith
(
*
(
param
.
InputX
()));
}
template
class
FetchKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/mali/fushion_fc_kernel.cpp
已删除
100755 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_FC_OP
#include "operators/kernel/fusion_fc_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
FusionFcKernel
<
GPU_MALI
,
float
>::
Init
(
FusionFcParam
<
GPU_MALI
>
*
param
)
{
return
true
;
}
template
<
>
void
FusionFcKernel
<
GPU_MALI
,
float
>::
Compute
(
const
FusionFcParam
<
GPU_MALI
>
&
param
)
{
const
Tensor
*
input_x
=
param
.
InputX
();
const
Tensor
*
input_y
=
param
.
InputY
();
const
Tensor
*
input_z
=
param
.
InputZ
();
auto
*
input_z_data
=
input_z
->
data
<
float
>
();
int
axis
=
param
.
Axis
();
Tensor
*
out
=
param
.
Out
();
auto
*
out_data
=
out
->
mutable_data
<
float
>
();
const
Tensor
x_matrix
=
input_x
->
dims
().
size
()
>
2
?
framework
::
ReshapeToMatrix
(
*
input_x
,
param
.
XNumColDims
())
:
*
input_x
;
const
Tensor
y_matrix
=
input_y
->
dims
().
size
()
>
2
?
framework
::
ReshapeToMatrix
(
*
input_y
,
param
.
YNumColDims
())
:
*
input_y
;
auto
out_dim
=
out
->
dims
();
if
(
out_dim
.
size
()
!=
2
)
{
out
->
Resize
({
x_matrix
.
dims
()[
0
],
y_matrix
.
dims
()[
1
]});
}
PADDLE_MOBILE_ENFORCE
(
out_dim
.
size
()
==
2
,
" out_dim.size must be 2."
);
PADDLE_MOBILE_ENFORCE
(
input_z
->
dims
().
size
()
==
1
,
"inpu_z size must be 1"
);
PADDLE_MOBILE_ENFORCE
(
out_dim
[
1
]
==
input_z
->
dims
()[
0
],
" out_dim.size must be 2."
);
axis
=
(
axis
==
-
1
?
out_dim
.
size
()
-
input_z
->
dims
().
size
()
:
axis
);
PADDLE_MOBILE_ENFORCE
(
axis
==
1
,
" to fit broadcast, axis = 1. "
)
int64_t
classes
=
input_z
->
numel
();
for
(
int
i
=
0
;
i
<
out_dim
[
0
];
i
++
)
{
memory
::
Copy
(
out_data
+
i
*
classes
,
input_z_data
,
sizeof
(
float
)
*
classes
);
}
for
(
int
i
=
0
;
i
<
out
->
numel
();
i
++
)
{
DLOG
<<
out_data
[
i
];
}
math
::
MatMul
<
float
>
(
x_matrix
,
false
,
y_matrix
,
false
,
static_cast
<
float
>
(
1
),
out
,
static_cast
<
float
>
(
1
));
PADDLE_MOBILE_ENFORCE
(
out_dim
.
size
()
==
2
,
" out_dim.size must be 2."
);
// if (out_dim.size() != 2) {
// out->Resize(out_dim);
// }
}
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/mali/lrn_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef LRN_OP
#pragma once
#include "operators/kernel/lrn_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/kernel/central-arm-func/lrn_arm_func.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
DeviceType
,
typename
T
>
class
AclLrnOp
:
public
acl
::
ACLOperator
{
public:
AclLrnOp
()
{
this
->
force_bypass_acl_path_
=
bypass_acl_class_layer
&
FLAGS_ENABLE_ACL_LRN
;
}
~
AclLrnOp
()
=
default
;
AclLrnOp
(
const
AclLrnOp
&
)
=
delete
;
AclLrnOp
&
operator
=
(
const
AclLrnOp
&
)
=
delete
;
AclLrnOp
(
AclLrnOp
&&
)
=
delete
;
AclLrnOp
&
operator
=
(
AclLrnOp
&&
)
=
delete
;
acl
::
AclParameters
&
getargs
()
{
return
args
;
}
void
InitAclLayer
(
const
LrnParam
<
DeviceType
>&
param
)
{
setTargetHint
(
acl
::
TargetHint
::
OPENCL
);
arm_compute
::
TensorShape
shape
(
args
.
in_cols
,
args
.
in_rows
,
args
.
in_depth
);
if
(
is_operator_init_done
(
shape
))
return
;
set_operator_init_done
();
this
->
force_bypass_acl_path_
=
false
;
arm_compute
::
NormalizationLayerInfo
norm_info
(
arm_compute
::
NormType
::
CROSS_MAP
,
args
.
nsize
,
args
.
alpha
,
args
.
beta
,
args
.
knorm
);
//[width, height, IFM]
new_tensor
(
input
(),
shape
,
args
.
input_data
);
//[width, height, OFM]
new_tensor
(
output
(),
shape
,
args
.
output_data
);
acl_configure
(
lrn
,
this
,
norm_info
);
}
void
Set_bypass
(
bool
bypass
)
{
args
.
is_bypass
=
bypass
;
}
void
RunAcl
(
void
*
input
,
void
*
output
)
{
acl
::
ACLOperator
::
acl_run
(
input
,
output
);
}
bool
Bypass_acl
(
const
LrnParam
<
DeviceType
>&
param
)
{
bool
bypass_acl
=
false
;
AclParametersByContext
(
param
);
InitAclLayer
(
param
);
// for performance, more groups impact GPU performance
if
(
this
->
force_bypass_acl_path_
)
{
bypass_acl
=
true
;
}
return
bypass_acl
;
}
private:
void
AclParametersByContext
(
const
LrnParam
<
DeviceType
>&
param
)
{
const
Tensor
*
in_x
=
param
.
InputX
();
Tensor
*
out
=
param
.
Out
();
int
n
=
param
.
N
();
T
alpha
=
param
.
Alpha
();
T
beta
=
param
.
Beta
();
T
k
=
param
.
K
();
const
T
*
input_data
=
in_x
->
data
<
T
>
();
T
*
output_data
=
out
->
mutable_data
<
T
>
();
args
.
input_data
=
(
void
*
)
input_data
;
args
.
output_data
=
(
void
*
)
output_data
;
args
.
nsize
=
n
;
args
.
alpha
=
alpha
;
args
.
beta
=
beta
;
args
.
knorm
=
k
;
// NCHW
args
.
batch
=
in_x
->
dims
()[
0
];
args
.
in_depth
=
in_x
->
dims
()[
1
];
args
.
in_rows
=
in_x
->
dims
()[
2
];
args
.
in_cols
=
in_x
->
dims
()[
3
];
// std::cout
// << "Out C: " << args.out_depth
// << " H: " << args.out_rows << " W: " << args.out_cols << "\n";
}
acl
::
AclParameters
args
;
};
template
<
>
bool
LrnKernel
<
GPU_MALI
,
float
>::
Init
(
LrnParam
<
GPU_MALI
>*
param
)
{
AclLrnOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclLrnOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
acl_op
=
new
AclLrnOp
<
GPU_MALI
,
float
>
();
this
->
SetAclOp
((
void
*
)
acl_op
,
(
void
*
)
this
);
}
if
(
acl_op
->
Bypass_acl
(
*
param
))
{
acl_op
->
Set_bypass
(
true
);
std
::
cout
<<
"init acl failed"
<<
std
::
endl
;
return
true
;
}
return
true
;
}
template
<
>
void
LrnKernel
<
GPU_MALI
,
float
>::
Compute
(
const
LrnParam
<
GPU_MALI
>&
param
)
{
std
::
cout
<<
"init acl"
<<
std
::
endl
;
AclLrnOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclLrnOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
return
;
}
acl
::
AclParameters
&
args
=
acl_op
->
getargs
();
if
(
args
.
is_bypass
)
{
std
::
cout
<<
"bypass op"
<<
std
::
endl
;
LrnCompute
<
float
>
(
param
);
return
;
}
const
float
*
input_data
=
(
const
float
*
)
args
.
input_data
;
const
float
*
output_data
=
(
const
float
*
)
args
.
output_data
;
for
(
int
n
=
0
;
n
<
args
.
batch
;
++
n
)
{
acl_op
->
RunAcl
((
void
*
)
input_data
,
(
void
*
)
output_data
);
input_data
+=
args
.
in_depth
*
args
.
in_cols
*
args
.
in_rows
;
output_data
+=
args
.
in_depth
*
args
.
in_cols
*
args
.
in_rows
;
}
}
template
class
LrnKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
#endif
src/operators/kernel/mali/mul_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef MUL_OP
#pragma once
#include "operators/kernel/mul_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
MulKernel
<
GPU_MALI
,
float
>::
Init
(
MulParam
<
GPU_MALI
>
*
param
)
{
return
true
;
}
template
<
>
void
MulKernel
<
GPU_MALI
,
float
>::
Compute
(
const
MulParam
<
GPU_MALI
>
&
param
)
{
const
Tensor
*
input_x
=
param
.
InputX
();
const
Tensor
*
input_y
=
param
.
InputY
();
Tensor
*
out
=
param
.
Out
();
out
->
mutable_data
<
float
>
();
const
Tensor
x_matrix
=
input_x
->
dims
().
size
()
>
2
?
framework
::
ReshapeToMatrix
(
*
input_x
,
param
.
XNumColDims
())
:
*
input_x
;
const
Tensor
y_matrix
=
input_y
->
dims
().
size
()
>
2
?
framework
::
ReshapeToMatrix
(
*
input_y
,
param
.
YNumColDims
())
:
*
input_y
;
auto
out_dim
=
out
->
dims
();
if
(
out_dim
.
size
()
!=
2
)
{
out
->
Resize
({
x_matrix
.
dims
()[
0
],
y_matrix
.
dims
()[
1
]});
}
math
::
MatMul
<
float
>
(
x_matrix
,
false
,
y_matrix
,
false
,
static_cast
<
float
>
(
1
),
out
,
static_cast
<
float
>
(
0
));
if
(
out_dim
.
size
()
!=
2
)
{
out
->
Resize
(
out_dim
);
}
}
template
class
MulKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/mali/pool_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef POOL_OP
#pragma once
#include "operators/kernel/pool_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
DeviceType
,
typename
T
>
class
AclPoolOp
:
public
acl
::
ACLOperator
{
public:
AclPoolOp
()
{
this
->
force_bypass_acl_path_
=
bypass_acl_class_layer
&
FLAGS_ENABLE_ACL_POOLING
;
}
~
AclPoolOp
()
=
default
;
AclPoolOp
(
const
AclPoolOp
&
)
=
delete
;
AclPoolOp
&
operator
=
(
const
AclPoolOp
&
)
=
delete
;
AclPoolOp
(
AclPoolOp
&&
)
=
delete
;
AclPoolOp
&
operator
=
(
AclPoolOp
&&
)
=
delete
;
acl
::
AclParameters
&
getargs
()
{
return
args
;
}
void
InitAclLayer
(
const
PoolParam
<
DeviceType
>&
param
)
{
setTargetHint
(
acl
::
TargetHint
::
OPENCL
);
arm_compute
::
TensorShape
input_shape
(
args
.
in_cols
,
args
.
in_rows
,
args
.
in_depth
);
arm_compute
::
TensorShape
output_shape
(
args
.
out_cols
,
args
.
out_rows
,
args
.
out_depth
);
// arm_compute::TensorShape weights_shape(
// args.filter_cols, args.filter_rows, args.in_depth, args.out_depth);
// arm_compute::TensorShape biases_shape(args.out_depth);
arm_compute
::
PoolingLayerInfo
pool_info
;
if
(
args
.
pool_type
==
"max"
)
{
pool_info
=
arm_compute
::
PoolingLayerInfo
(
arm_compute
::
PoolingType
::
MAX
,
args
.
filter_rows
,
arm_compute
::
PadStrideInfo
(
args
.
stride_cols
,
args
.
stride_rows
,
args
.
pad_cols
,
args
.
pad_rows
,
arm_compute
::
DimensionRoundingType
::
CEIL
));
}
else
{
pool_info
=
arm_compute
::
PoolingLayerInfo
(
arm_compute
::
PoolingType
::
AVG
,
args
.
filter_rows
,
arm_compute
::
PadStrideInfo
(
args
.
stride_cols
,
args
.
stride_rows
,
args
.
pad_cols
,
args
.
pad_rows
,
arm_compute
::
DimensionRoundingType
::
CEIL
));
}
if
(
is_operator_init_done
(
input_shape
))
return
;
set_operator_init_done
();
this
->
force_bypass_acl_path_
=
false
;
//[width, height, IFM]
new_tensor
(
input
(),
input_shape
,
args
.
input_data
);
//[width, height, OFM]
new_tensor
(
output
(),
output_shape
,
args
.
output_data
);
acl_configure
(
pooling
,
this
,
pool_info
);
}
void
RunAcl
(
void
*
input
,
void
*
output
)
{
acl
::
ACLOperator
::
acl_run
(
input
,
output
);
}
bool
Bypass_acl
(
const
PoolParam
<
DeviceType
>&
param
)
{
bool
bypass_acl
=
false
;
AclParametersByContext
(
param
);
InitAclLayer
(
param
);
// for performance, more groups impact GPU performance
if
(
this
->
force_bypass_acl_path_
)
{
bypass_acl
=
true
;
}
if
(
args
.
pool_type
!=
"max"
&&
args
.
pool_type
!=
"avg"
)
{
bypass_acl
=
true
;
}
if
(
args
.
filter_rows
!=
args
.
filter_cols
)
{
bypass_acl
=
true
;
}
// if (args.filter_rows!=2 && args.filter_rows!=3) {
// bypass_acl = true;
// }
return
bypass_acl
;
}
private:
void
AclParametersByContext
(
const
PoolParam
<
DeviceType
>&
param
)
{
const
Tensor
*
in_x
=
param
.
Input
();
Tensor
*
out
=
param
.
Output
();
std
::
string
pooling_type
=
param
.
PoolingType
();
std
::
vector
<
int
>
ksize
=
param
.
Ksize
();
std
::
vector
<
int
>
strides
=
param
.
Strides
();
std
::
vector
<
int
>
paddings
=
param
.
Paddings
();
bool
is_global_pooling
=
param
.
isGlobalPooling
();
const
T
*
input_data
=
in_x
->
data
<
T
>
();
T
*
output_data
=
out
->
mutable_data
<
T
>
();
args
.
input_data
=
(
void
*
)
input_data
;
args
.
output_data
=
(
void
*
)
output_data
;
args
.
is_global_pool
=
is_global_pooling
;
args
.
pool_type
=
pooling_type
;
args
.
filter_rows
=
ksize
[
0
];
args
.
filter_cols
=
ksize
[
1
];
args
.
dim
=
ksize
.
size
();
// NCHW
args
.
batch
=
in_x
->
dims
()[
0
];
args
.
in_depth
=
in_x
->
dims
()[
1
];
args
.
in_rows
=
in_x
->
dims
()[
2
];
args
.
in_cols
=
in_x
->
dims
()[
3
];
// std::cout <<"In N: " << args.batch << " C: " << args.in_depth
// << " H: " << args.in_rows << " W: " << args.in_cols << "\n";
// NCHW
// std::cout <<"Out N: " << static_cast<int>(output->dims()[0])
// << " C: " << args.out_depth
// << " H: " << args.out_rows << " W: " << args.out_cols << "\n";
// MCHW = OIHW
// std::cout <<"Filter O: " << static_cast<int>(filter->dims()[0])
// << " I: " << static_cast<int>(filter->dims()[1])
// << " H: " << args.filter_rows << " W: " << args.filter_cols << "\n";
// strides(h_stride, w_stride)
args
.
stride_rows
=
strides
[
0
];
args
.
stride_cols
=
strides
[
1
];
// std::cout <<"PoolingType: " << args.pool_type << "\n";
// std::cout <<"Stride H: " << args.stride_rows << " W: " <<
// args.stride_cols << "\n";
// paddings(h_pad, w_pad)
args
.
pad_rows
=
paddings
[
0
];
args
.
pad_cols
=
paddings
[
1
];
// std::cout <<"Pad H: " << args.pad_rows << " W: " << args.pad_cols <<
// "\n";
args
.
out_depth
=
args
.
in_depth
;
// args.out_rows = out->dims()[2];
// args.out_cols = out->dims()[3];
args
.
out_rows
=
static_cast
<
int
>
(
ceil
(
static_cast
<
float
>
(
args
.
in_rows
+
2
*
args
.
pad_rows
-
args
.
filter_rows
)
/
args
.
stride_rows
))
+
1
;
args
.
out_cols
=
static_cast
<
int
>
(
ceil
(
static_cast
<
float
>
(
args
.
in_cols
+
2
*
args
.
pad_cols
-
args
.
filter_cols
)
/
args
.
stride_cols
))
+
1
;
if
(
is_global_pooling
)
{
args
.
filter_rows
=
args
.
in_rows
;
args
.
filter_cols
=
args
.
in_cols
;
args
.
pad_rows
=
0
;
args
.
pad_cols
=
0
;
}
}
acl
::
AclParameters
args
;
};
template
<
>
bool
PoolKernel
<
GPU_MALI
,
float
>::
Init
(
PoolParam
<
GPU_MALI
>*
param
)
{
AclPoolOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclPoolOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
acl_op
=
new
AclPoolOp
<
GPU_MALI
,
float
>
();
this
->
SetAclOp
((
void
*
)
acl_op
,
(
void
*
)
this
);
}
if
(
acl_op
->
Bypass_acl
(
*
param
))
{
std
::
cout
<<
"init acl failed"
<<
std
::
endl
;
return
false
;
}
return
true
;
}
template
<
>
void
PoolKernel
<
GPU_MALI
,
float
>::
Compute
(
const
PoolParam
<
GPU_MALI
>&
param
)
{
std
::
cout
<<
"init acl"
<<
std
::
endl
;
AclPoolOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclPoolOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
return
;
}
acl
::
AclParameters
&
args
=
acl_op
->
getargs
();
const
float
*
input_data
=
(
const
float
*
)
args
.
input_data
;
const
float
*
output_data
=
(
const
float
*
)
args
.
output_data
;
for
(
int
n
=
0
;
n
<
args
.
batch
;
++
n
)
{
acl_op
->
RunAcl
((
void
*
)
input_data
,
(
void
*
)
output_data
);
input_data
+=
args
.
in_depth
*
args
.
in_cols
*
args
.
in_rows
;
output_data
+=
args
.
in_depth
*
args
.
out_cols
*
args
.
out_rows
;
}
}
template
class
PoolKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
#endif
src/operators/kernel/mali/relu_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef RELU_OP
#pragma once
#include "operators/kernel/relu_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
DeviceType
,
typename
T
>
class
AclReluOp
:
public
acl
::
ACLOperator
{
public:
AclReluOp
()
{
this
->
force_bypass_acl_path_
=
bypass_acl_class_layer
&
FLAGS_ENABLE_ACL_RELU
;
}
~
AclReluOp
()
=
default
;
AclReluOp
(
const
AclReluOp
&
)
=
delete
;
AclReluOp
&
operator
=
(
const
AclReluOp
&
)
=
delete
;
AclReluOp
(
AclReluOp
&&
)
=
delete
;
AclReluOp
&
operator
=
(
AclReluOp
&&
)
=
delete
;
acl
::
AclParameters
&
getargs
()
{
return
args
;
}
void
InitAclLayer
(
const
ReluParam
<
DeviceType
>&
param
)
{
setTargetHint
(
acl
::
TargetHint
::
OPENCL
);
arm_compute
::
TensorShape
input_shape
(
args
.
in_cols
,
args
.
in_rows
,
args
.
in_depth
,
args
.
batch
);
arm_compute
::
TensorShape
output_shape
(
args
.
in_cols
,
args
.
in_rows
,
args
.
in_depth
,
args
.
out_num
);
// arm_compute::TensorShape weights_shape(
// args.filter_cols, args.filter_rows, args.in_depth, args.out_depth);
// arm_compute::TensorShape biases_shape(args.out_depth);
arm_compute
::
ActivationLayerInfo
::
ActivationFunction
type
;
type
=
arm_compute
::
ActivationLayerInfo
::
ActivationFunction
::
RELU
;
arm_compute
::
ActivationLayerInfo
act_info
(
type
);
if
(
is_operator_init_done
(
input_shape
))
return
;
set_operator_init_done
();
this
->
force_bypass_acl_path_
=
false
;
//[width, height, IFM]
new_tensor
(
input
(),
input_shape
,
args
.
input_data
);
//[width, height, OFM]
new_tensor
(
output
(),
output_shape
,
args
.
output_data
);
acl_configure
(
activation
,
this
,
act_info
);
}
void
RunAcl
(
void
*
input
,
void
*
output
)
{
acl
::
ACLOperator
::
acl_run
(
input
,
output
);
}
bool
Bypass_acl
(
const
ReluParam
<
DeviceType
>&
param
)
{
bool
bypass_acl
=
false
;
AclParametersByContext
(
param
);
InitAclLayer
(
param
);
// for performance, more groups impact GPU performance
if
(
this
->
force_bypass_acl_path_
)
{
bypass_acl
=
true
;
}
return
bypass_acl
;
}
private:
void
AclParametersByContext
(
const
ReluParam
<
DeviceType
>&
param
)
{
const
auto
*
input_x
=
param
.
InputX
();
auto
*
out
=
param
.
Out
();
const
T
*
input_data
=
input_x
->
data
<
T
>
();
T
*
output_data
=
out
->
mutable_data
<
T
>
();
args
.
input_data
=
(
void
*
)
input_data
;
args
.
output_data
=
(
void
*
)
output_data
;
args
.
batch
=
input_x
->
dims
()[
0
];
args
.
in_depth
=
input_x
->
dims
()[
1
];
args
.
in_rows
=
input_x
->
dims
()[
2
];
args
.
in_cols
=
input_x
->
dims
()[
3
];
args
.
out_num
=
out
->
dims
()[
0
];
}
acl
::
AclParameters
args
;
};
template
<
>
bool
ReluKernel
<
GPU_MALI
,
float
>::
Init
(
ReluParam
<
GPU_MALI
>*
param
)
{
AclReluOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclReluOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
acl_op
=
new
AclReluOp
<
GPU_MALI
,
float
>
();
this
->
SetAclOp
((
void
*
)
acl_op
,
(
void
*
)
this
);
}
if
(
acl_op
->
Bypass_acl
(
*
param
))
{
std
::
cout
<<
"init acl failed"
<<
std
::
endl
;
return
false
;
}
return
true
;
}
template
<
>
void
ReluKernel
<
GPU_MALI
,
float
>::
Compute
(
const
ReluParam
<
GPU_MALI
>&
param
)
{
std
::
cout
<<
"init acl"
<<
std
::
endl
;
AclReluOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclReluOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
return
;
}
acl
::
AclParameters
&
args
=
acl_op
->
getargs
();
acl_op
->
RunAcl
(
args
.
input_data
,
args
.
output_data
);
}
template
class
ReluKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
#endif
src/operators/kernel/mali/reshape_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef RESHAPE_OP
#pragma once
#include "operators/kernel/reshape_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
ReshapeKernel
<
GPU_MALI
,
float
>::
Init
(
ReshapeParam
<
GPU_MALI
>
*
param
)
{
return
true
;
}
template
<
>
void
ReshapeKernel
<
GPU_MALI
,
float
>::
Compute
(
const
ReshapeParam
<
GPU_MALI
>
&
param
)
{
const
auto
*
input_x
=
param
.
InputX
();
const
auto
&
input_x_dims
=
input_x
->
dims
();
auto
*
out
=
param
.
Out
();
framework
::
DDim
out_dims
=
out
->
dims
();
const
auto
*
input_shape
=
param
.
InputShape
();
if
(
input_shape
)
{
auto
*
shape_data
=
input_shape
->
data
<
int
>
();
framework
::
Tensor
cpu_shape_tensor
;
auto
shape
=
std
::
vector
<
int
>
(
shape_data
,
shape_data
+
input_shape
->
numel
());
out_dims
=
ValidateShape
(
shape
,
input_x
->
dims
());
}
bool
inplace
=
param
.
Inplace
();
out
->
Resize
(
out_dims
);
if
(
!
inplace
)
{
out
->
mutable_data
<
float
>
();
framework
::
TensorCopy
(
*
input_x
,
out
);
out
->
Resize
(
out_dims
);
}
else
{
out
->
ShareDataWith
(
*
input_x
);
out
->
Resize
(
out_dims
);
}
}
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/mali/softmax_kernel.cpp
已删除
100644 → 0
浏览文件 @
ad3844d6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SOFTMAX_OP
#pragma once
#include "operators/kernel/softmax_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
DeviceType
,
typename
T
>
class
AclSoftmaxOp
:
public
acl
::
ACLOperator
{
public:
AclSoftmaxOp
()
{
this
->
force_bypass_acl_path_
=
bypass_acl_class_layer
&
FLAGS_ENABLE_ACL_SOFTMAX
;
}
~
AclSoftmaxOp
()
=
default
;
AclSoftmaxOp
(
const
AclSoftmaxOp
&
)
=
delete
;
AclSoftmaxOp
&
operator
=
(
const
AclSoftmaxOp
&
)
=
delete
;
AclSoftmaxOp
(
AclSoftmaxOp
&&
)
=
delete
;
AclSoftmaxOp
&
operator
=
(
AclSoftmaxOp
&&
)
=
delete
;
acl
::
AclParameters
&
getargs
()
{
return
args
;
}
void
InitAclLayer
(
const
SoftmaxParam
<
DeviceType
>&
param
)
{
setTargetHint
(
acl
::
TargetHint
::
OPENCL
);
arm_compute
::
TensorShape
shape
(
args
.
in_depth
,
args
.
batch
);
if
(
is_operator_init_done
(
shape
))
return
;
set_operator_init_done
();
this
->
force_bypass_acl_path_
=
false
;
//[width, height, IFM]
new_tensor
(
input
(),
shape
,
args
.
input_data
);
//[width, height, OFM]
new_tensor
(
output
(),
shape
,
args
.
output_data
);
acl_configure
(
softmax
,
this
,
NULL
);
}
void
RunAcl
(
void
*
input
,
void
*
output
)
{
acl
::
ACLOperator
::
acl_run
(
input
,
output
);
}
bool
Bypass_acl
(
const
SoftmaxParam
<
DeviceType
>&
param
)
{
bool
bypass_acl
=
false
;
AclParametersByContext
(
param
);
InitAclLayer
(
param
);
// for performance, more groups impact GPU performance
if
(
this
->
force_bypass_acl_path_
)
{
bypass_acl
=
true
;
}
return
bypass_acl
;
}
private:
void
AclParametersByContext
(
const
SoftmaxParam
<
DeviceType
>&
param
)
{
const
framework
::
Tensor
*
in_x
=
param
.
InputX
();
framework
::
Tensor
*
out
=
param
.
Out
();
auto
x_dims
=
in_x
->
dims
();
out
->
Resize
(
x_dims
);
const
T
*
input_data
=
in_x
->
data
<
T
>
();
T
*
output_data
=
out
->
data
<
T
>
();
args
.
input_data
=
(
void
*
)
input_data
;
args
.
output_data
=
(
void
*
)
output_data
;
// NCHW
args
.
batch
=
in_x
->
dims
()[
0
];
args
.
in_depth
=
in_x
->
dims
()[
1
];
args
.
out_num
=
out
->
dims
()[
0
];
// std::cout
// << "Out C: " << args.out_depth
// << " H: " << args.out_rows << " W: " << args.out_cols << "\n";
}
acl
::
AclParameters
args
;
};
template
<
>
bool
SoftmaxKernel
<
GPU_MALI
,
float
>::
Init
(
SoftmaxParam
<
GPU_MALI
>*
param
)
{
AclSoftmaxOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclSoftmaxOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
acl_op
=
new
AclSoftmaxOp
<
GPU_MALI
,
float
>
();
this
->
SetAclOp
((
void
*
)
acl_op
,
(
void
*
)
this
);
}
if
(
acl_op
->
Bypass_acl
(
*
param
))
{
std
::
cout
<<
"init acl failed"
<<
std
::
endl
;
return
false
;
}
return
true
;
}
template
<
>
void
SoftmaxKernel
<
GPU_MALI
,
float
>::
Compute
(
const
SoftmaxParam
<
GPU_MALI
>&
param
)
{
std
::
cout
<<
"init acl"
<<
std
::
endl
;
AclSoftmaxOp
<
GPU_MALI
,
float
>*
acl_op
=
reinterpret_cast
<
AclSoftmaxOp
<
GPU_MALI
,
float
>*>
(
this
->
GetAclOp
());
if
(
acl_op
==
nullptr
)
{
return
;
}
acl
::
AclParameters
&
args
=
acl_op
->
getargs
();
const
float
*
input_data
=
(
const
float
*
)
args
.
input_data
;
const
float
*
output_data
=
(
const
float
*
)
args
.
output_data
;
for
(
int
n
=
0
;
n
<
args
.
out_num
;
++
n
)
{
acl_op
->
RunAcl
((
void
*
)
input_data
,
(
void
*
)
output_data
);
input_data
+=
args
.
in_depth
;
output_data
+=
args
.
in_depth
;
}
}
template
class
SoftmaxKernel
<
GPU_MALI
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
#endif
src/operators/lookup_op.cpp
浏览文件 @
a29db744
...
...
@@ -59,8 +59,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
lookup_table
,
ops
::
LookupOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
...
...
src/operators/lrn_op.cpp
浏览文件 @
a29db744
...
...
@@ -35,8 +35,5 @@ REGISTER_OPERATOR_CPU(lrn, ops::LrnOp);
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL
(
lrn
,
ops
::
LrnOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
lrn
,
ops
::
LrnOp
);
#endif
#endif
src/operators/mul_op.cpp
浏览文件 @
a29db744
...
...
@@ -61,9 +61,6 @@ REGISTER_OPERATOR_CPU(mul, ops::MulOp);
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL
(
mul
,
ops
::
MulOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
mul
,
ops
::
MulOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
mul
,
ops
::
MulOp
);
#endif
...
...
src/operators/norm_op.cpp
浏览文件 @
a29db744
...
...
@@ -41,8 +41,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
norm
,
ops
::
NormOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
...
...
src/operators/pool_op.cpp
浏览文件 @
a29db744
...
...
@@ -63,9 +63,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
pool2d
,
ops
::
PoolOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
pool2d
,
ops
::
PoolOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
pool2d
,
ops
::
PoolOp
);
#endif
...
...
src/operators/prelu_op.cpp
浏览文件 @
a29db744
...
...
@@ -36,8 +36,5 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
prelu
,
ops
::
PReluOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
prelu
,
ops
::
PReluOp
);
#endif
#endif
src/operators/prior_box_op.cpp
浏览文件 @
a29db744
...
...
@@ -52,8 +52,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
prior_box
,
ops
::
PriorBoxOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL
(
prior_box
,
ops
::
PriorBoxOp
);
#endif
...
...
src/operators/reshape2_op.cpp
浏览文件 @
a29db744
...
...
@@ -40,9 +40,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
reshape2
,
ops
::
Reshape2Op
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
reshape2
,
ops
::
Reshape2Op
);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
reshape2
,
ops
::
Reshape2Op
);
#endif
...
...
src/operators/reshape_op.cpp
浏览文件 @
a29db744
...
...
@@ -35,9 +35,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
reshape
,
ops
::
ReshapeOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
reshape
,
ops
::
ReshapeOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
reshape
,
ops
::
ReshapeOp
);
#endif
...
...
src/operators/resize_op.cpp
浏览文件 @
a29db744
...
...
@@ -32,8 +32,5 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
resize
,
ops
::
ResizeOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
resize
,
ops
::
ResizeOp
);
#endif
#endif
src/operators/scale_op.cpp
浏览文件 @
a29db744
...
...
@@ -32,8 +32,5 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
scale
,
ops
::
ScaleOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
scale
,
ops
::
ScaleOp
);
#endif
#endif
src/operators/shape_op.cpp
浏览文件 @
a29db744
...
...
@@ -34,7 +34,5 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
shape
,
ops
::
ShapeOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#endif
src/operators/slice_op.cpp
浏览文件 @
a29db744
...
...
@@ -31,9 +31,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
slice
,
ops
::
SliceOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
slice
,
ops
::
SliceOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
slice
,
ops
::
SliceOp
);
#endif
...
...
src/operators/softmax_op.cpp
浏览文件 @
a29db744
...
...
@@ -31,9 +31,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
softmax
,
ops
::
SoftmaxOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
softmax
,
ops
::
SoftmaxOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
softmax
,
ops
::
SoftmaxOp
);
#endif
...
...
src/operators/sum_op.cpp
浏览文件 @
a29db744
...
...
@@ -61,9 +61,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
sum
,
ops
::
SumOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU
(
sum
,
ops
::
ConcatOp
);
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录