diff --git a/CMakeLists.txt b/CMakeLists.txt index c664f43e9e446a08bdcbe844ee7741a86a72660e..e669f1b1c630e664ad19c029f4028d043c2d0126 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,7 +9,6 @@ option(WITH_TEST "build with unit tests" ON) # select the platform to build option(CPU "build with arm CPU support" ON) -option(GPU_MALI "build with arm mali GPU support" OFF) option(GPU_CL "build with OpenCL support" OFF) option(FPGA "build with FPGA support" OFF) if(FPGA) @@ -97,31 +96,6 @@ else() endforeach() endif() -if (GPU_MALI) - add_definitions(-DPADDLE_MOBILE_MALI_GPU) - add_definitions(-DUSE_ACL=1) - add_definitions(-DUSE_OPENCL) - set(ACL_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/kernel/mali/ACL_Android) - include_directories(${ACL_ROOT} ${ACL_ROOT}/include) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -L${ACL_ROOT}/build") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -larm_compute") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -larm_compute_core") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -larm_compute_graph") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -L${ACL_ROOT}/build/opencl-1.2-stubs") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lOpenCL") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_ACL=1") -else() - file(GLOB_RECURSE _tmp_list src/operators/kernel/mali/*.cpp src/operators/kernel/mali/*.cc) - foreach(f ${_tmp_list}) - list(REMOVE_ITEM PADDLE_MOBILE_CC ${f}) - endforeach() - - file(GLOB_RECURSE _tmp_list_h src/operators/kernel/mali/*.h) - foreach(f ${_tmp_list_h}) - list(REMOVE_ITEM PADDLE_MOBILE_H ${f}) - endforeach() -endif() - if(FPGA) add_definitions(-DPADDLE_MOBILE_FPGA) file(GLOB_RECURSE _tmp_list src/operators/math/*.cpp src/operators/kernel/fpga/*.cc) diff --git a/doc/build.md b/doc/build.md index 1c1c906458a0dd5f525c9d5153d48356b907b23b..0aaaccd03129d07a73c6ff37ba660b054b72c576 100644 --- a/doc/build.md +++ b/doc/build.md @@ -46,7 +46,6 @@ root@5affd29d4fc5:/ # ccmake . DEBUGING ON FPGA OFF LOG_PROFILE ON - MALI_GPU OFF NET googlenet USE_EXCEPTION ON USE_OPENMP OFF diff --git a/doc/design_doc.md b/doc/design_doc.md index 70292c6b0bd617930a9c9458b87cef34dee3347e..bc3214ddb3cf31712c8ffc975c3dc8ac541fcdc8 100644 --- a/doc/design_doc.md +++ b/doc/design_doc.md @@ -109,11 +109,6 @@ USE_OP_CPU(conv2d); REGISTER_OPERATOR_CPU(conv2d, ops::ConvOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -USE_OP_MALI_GPU(conv2d); -REGISTER_OPERATOR_MALI_GPU(conv2d, ops::ConvOp); -#endif - #ifdef PADDLE_MOBILE_FPGA USE_OP_FPGA(conv2d); REGISTER_OPERATOR_FPGA(conv2d, ops::ConvOp); diff --git a/src/framework/load_ops.h b/src/framework/load_ops.h index e72c55f5f736b81362f461952a706127998f9ade..79d0f0e5fad0b074c78492d1f3038b7c9453a1c6 100644 --- a/src/framework/load_ops.h +++ b/src/framework/load_ops.h @@ -23,15 +23,6 @@ limitations under the License. */ #define LOAD_CPU_OP(op_type) #endif -#ifdef PADDLE_MOBILE_MALI_GPU -#define LOAD_MALI_GPU_OP(op_type) \ - extern int TouchOpRegistrar_##op_type##_##mali_gpu(); \ - static int use_op_itself_##op_type##_##mali_gpu __attribute__((unused)) = \ - TouchOpRegistrar_##op_type##_##mali_gpu() -#else -#define LOAD_MALI_GPU_OP(op_type) -#endif - #ifdef PADDLE_MOBILE_FPGA #define LOAD_FPGA_OP(op_type) \ extern int TouchOpRegistrar_##op_type##_##fpga(); \ @@ -46,9 +37,8 @@ limitations under the License. */ static int use_fusion_matcher_itself_##op_type __attribute__((unused)) = \ TouchFusionMatcherRegistrar_##op_type(); -#define LOAD_OP(op_type) \ - LOAD_CPU_OP(op_type); \ - LOAD_MALI_GPU_OP(op_type); \ +#define LOAD_OP(op_type) \ + LOAD_CPU_OP(op_type); \ LOAD_FPGA_OP(op_type); #define LOAD_OP1(op_type, device_type) LOAD_##device_type##_OP(op_type); @@ -68,7 +58,7 @@ LOAD_OP(fetch) LOAD_OP(fill_constant) #endif #ifdef BATCHNORM_OP -LOAD_OP2(batch_norm, CPU, MALI_GPU); +LOAD_OP1(batch_norm, CPU); #endif #ifdef BILINEAR_INTERP_OP LOAD_OP1(bilinear_interp, CPU); @@ -77,40 +67,40 @@ LOAD_OP1(bilinear_interp, CPU); LOAD_OP1(box_coder, CPU); #endif #ifdef CONCAT_OP -LOAD_OP3(concat, CPU, MALI_GPU, FPGA); +LOAD_OP2(concat, CPU, FPGA); #endif #ifdef CONV_OP -LOAD_OP3(conv2d, CPU, MALI_GPU, FPGA); +LOAD_OP2(conv2d, CPU, FPGA); #endif #ifdef LRN_OP -LOAD_OP2(lrn, CPU, MALI_GPU); +LOAD_OP1(lrn, CPU); #endif #ifdef SIGMOID_OP LOAD_OP1(sigmoid, CPU); #endif #ifdef FUSION_FC_RELU_OP -LOAD_OP3(fusion_fc_relu, CPU, MALI_GPU, FPGA); +LOAD_OP3(fusion_fc_relu, CPU, FPGA); LOAD_FUSION_MATCHER(fusion_fc_relu); #endif #ifdef FUSION_ELEMENTWISEADDRELU_OP -LOAD_OP3(fusion_elementwise_add_relu, CPU, MALI_GPU, FPGA); +LOAD_OP3(fusion_elementwise_add_relu, CPU, FPGA); LOAD_FUSION_MATCHER(fusion_elementwise_add_relu); #endif #ifdef SPLIT_OP LOAD_OP1(split, CPU); #endif #ifdef RESIZE_OP -LOAD_OP2(resize, CPU, MALI_GPU); +LOAD_OP1(resize, CPU); #endif #ifdef FUSION_CONVADDBNRELU_OP LOAD_OP2(fusion_conv_add_bn_relu, CPU, FPGA); LOAD_FUSION_MATCHER(fusion_conv_add_bn_relu); #endif #ifdef RESHAPE_OP -LOAD_OP2(reshape, CPU, MALI_GPU); +LOAD_OP1(reshape, CPU); #endif #ifdef RESHAPE2_OP -LOAD_OP2(reshape2, CPU, MALI_GPU); +LOAD_OP1(reshape2, CPU); #endif #ifdef TRANSPOSE_OP LOAD_OP1(transpose, CPU); @@ -126,11 +116,11 @@ LOAD_OP2(fusion_conv_add_relu, CPU, FPGA); LOAD_FUSION_MATCHER(fusion_conv_add_relu); #endif #ifdef FUSION_CONVADD_OP -LOAD_OP2(fusion_conv_add, CPU, MALI_GPU); +LOAD_OP1(fusion_conv_add, CPU); LOAD_FUSION_MATCHER(fusion_conv_add); #endif #ifdef SOFTMAX_OP -LOAD_OP2(softmax, CPU, MALI_GPU); +LOAD_OP1(softmax, CPU); #endif #ifdef SHAPE_OP LOAD_OP1(shape, CPU); @@ -142,13 +132,13 @@ LOAD_OP1(depthwise_conv2d, CPU); LOAD_OP1(conv2d_transpose, CPU); #endif #ifdef SCALE_OP -LOAD_OP2(scale, CPU, MALI_GPU); +LOAD_OP1(scale, CPU); #endif #ifdef ELEMENTWISEADD_OP -LOAD_OP2(elementwise_add, CPU, MALI_GPU); +LOAD_OP1(elementwise_add, CPU); #endif #ifdef PRELU_OP -LOAD_OP2(prelu, CPU, MALI_GPU); +LOAD_OP1(prelu, CPU); #endif #ifdef FLATTEN_OP LOAD_OP1(flatten, CPU); @@ -182,13 +172,13 @@ LOAD_FUSION_MATCHER(fusion_dwconv_bn_relu); LOAD_OP1(crf_decoding, CPU); #endif #ifdef MUL_OP -LOAD_OP2(mul, CPU, MALI_GPU); +LOAD_OP1(mul, CPU); #endif #ifdef NORM_OP LOAD_OP1(norm, CPU); #endif #ifdef RELU_OP -LOAD_OP2(relu, CPU, MALI_GPU); +LOAD_OP1(relu, CPU); LOAD_OP1(relu6, CPU); #endif #ifdef IM2SEQUENCE_OP @@ -198,11 +188,11 @@ LOAD_OP1(im2sequence, CPU); LOAD_OP1(lookup_table, CPU); #endif #ifdef FUSION_FC_OP -LOAD_OP3(fusion_fc, CPU, MALI_GPU, FPGA); +LOAD_OP2(fusion_fc, CPU, FPGA); LOAD_FUSION_MATCHER(fusion_fc); #endif #ifdef POOL_OP -LOAD_OP3(pool2d, CPU, MALI_GPU, FPGA); +LOAD_OP2(pool2d, CPU, FPGA); #endif #ifdef MULTICLASSNMS_OP LOAD_OP1(multiclass_nms, CPU); @@ -217,7 +207,7 @@ LOAD_OP1(sum, CPU); LOAD_OP1(elementwise_mul, CPU); #endif #ifdef SLICE_OP -LOAD_OP2(slice, CPU, MALI_GPU); +LOAD_OP1(slice, CPU); #endif #ifdef FUSION_CONVBN_OP LOAD_OP2(fusion_conv_bn, CPU, FPGA); diff --git a/src/framework/op_registry.h b/src/framework/op_registry.h index f57519ee0272d74507ad9e53864262310adced4d..3897fc02c84486258bf9debbd16582e59f33e736 100644 --- a/src/framework/op_registry.h +++ b/src/framework/op_registry.h @@ -115,9 +115,6 @@ class OpRegistry { #define REGISTER_OPERATOR_CPU(op_type, op_class) \ REGISTER_OPERATOR(op_type, op_class, cpu, paddle_mobile::CPU); -#define REGISTER_OPERATOR_MALI_GPU(op_type, op_class) \ - REGISTER_OPERATOR(op_type, op_class, mali_gpu, paddle_mobile::GPU_MALI); - #define REGISTER_OPERATOR_FPGA(op_type, op_class) \ REGISTER_OPERATOR(op_type, op_class, fpga, paddle_mobile::FPGA); diff --git a/src/framework/operator.h b/src/framework/operator.h index aaddb9c5649dca1c55daec0497354b127a118605..93f23e9d1067392a0bb43e8f764dd6695249d996 100644 --- a/src/framework/operator.h +++ b/src/framework/operator.h @@ -130,13 +130,6 @@ class OpKernelBase { } #endif -#ifdef PADDLE_McOBILE_MALI_GPU - OpKernelBase() { acl_op_ = nullptr; } - void *GetAclOp() const { return acl_op_; } - void SetAclOp(void *op, void *ob) const { - reinterpret_cast *>(ob)->acl_op_ = op; - } -#endif virtual void Compute(const P ¶) = 0; virtual bool Init(P *para) { return true; } virtual ~OpKernelBase() = default; @@ -147,9 +140,6 @@ class OpKernelBase { #endif private: -#ifdef PADDLE_MOBILE_MALI_GPU - void *acl_op_; -#endif }; class FusionOpMatcher { diff --git a/src/operators/activation_op.cpp b/src/operators/activation_op.cpp index 158eb8eb47e872ed3c90fd4ae3ea1a9d257333e6..ab129690fe95127fec8c36e6cd6e27abc4b8505e 100644 --- a/src/operators/activation_op.cpp +++ b/src/operators/activation_op.cpp @@ -55,9 +55,6 @@ namespace ops = paddle_mobile::operators; REGISTER_OPERATOR_CPU(relu, ops::ReluOp); REGISTER_OPERATOR_CPU(relu6, ops::Relu6Op); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(relu, ops::ReluOp); -#endif #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(relu, ops::ReluOp); #endif diff --git a/src/operators/batchnorm_op.cpp b/src/operators/batchnorm_op.cpp index 89220dd2489c93a84bc8a141c06a151b8044a4e4..3a272845cce9e67ddbca75cf7c691e5ea3355b99 100644 --- a/src/operators/batchnorm_op.cpp +++ b/src/operators/batchnorm_op.cpp @@ -34,9 +34,6 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(batch_norm, ops::BatchNormOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(batch_norm, ops::BatchNormOp); -#endif #ifdef PADDLE_MOBILE_FPGA #endif diff --git a/src/operators/bilinear_interp_op.cpp b/src/operators/bilinear_interp_op.cpp index b3388c38ec6050faff1cb7bbe49e8dd042291fc9..5db21396b07f90f380439139b48dd44918cb1347 100644 --- a/src/operators/bilinear_interp_op.cpp +++ b/src/operators/bilinear_interp_op.cpp @@ -48,8 +48,7 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(bilinear_interp, ops::BilinearOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -#endif + #ifdef PADDLE_MOBILE_FPGA #endif diff --git a/src/operators/box_coder_op.cpp b/src/operators/box_coder_op.cpp index c4005bcfd665eebe805e781966e28df026c190c2..6511266e687ca1465da5681a2a68976b1bc5d049 100644 --- a/src/operators/box_coder_op.cpp +++ b/src/operators/box_coder_op.cpp @@ -58,8 +58,6 @@ REGISTER_OPERATOR_CPU(box_coder, ops::BoxCoderOp); #ifdef PADDLE_MOBILE_CL REGISTER_OPERATOR_CL(box_coder, ops::BoxCoderOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -#endif #ifdef PADDLE_MOBILE_FPGA #endif diff --git a/src/operators/concat_op.cpp b/src/operators/concat_op.cpp index e64cbccd4192f1e04679d37f775ba79a9229f211..10ea7cb1c214892d54ff9eab605008fdc7eca686 100644 --- a/src/operators/concat_op.cpp +++ b/src/operators/concat_op.cpp @@ -69,9 +69,7 @@ REGISTER_OPERATOR_CPU(concat, ops::ConcatOp); #ifdef PADDLE_MOBILE_CL REGISTER_OPERATOR_CL(concat, ops::ConcatOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(concat, ops::ConcatOp); -#endif + #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(concat, ops::ConcatOp); #endif diff --git a/src/operators/conv_op.cpp b/src/operators/conv_op.cpp index ad778b1fef7fe400e1df645703cf3ebfb1b22727..88c1262546bee8ec11c36b57d88fcfde080a407c 100644 --- a/src/operators/conv_op.cpp +++ b/src/operators/conv_op.cpp @@ -55,9 +55,7 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(conv2d, ops::ConvOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(conv2d, ops::ConvOp); -#endif + #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(conv2d, ops::ConvOp); #endif diff --git a/src/operators/conv_transpose_op.cpp b/src/operators/conv_transpose_op.cpp index d09a7937453f3bd2c20d9e6bc1a03d4375d57491..86d2d6209112a69a544cee411892f90ca7529ed4 100644 --- a/src/operators/conv_transpose_op.cpp +++ b/src/operators/conv_transpose_op.cpp @@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(conv2d_transpose, ops::ConvOpTranspose); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -#endif + #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(conv2d_transpose, ops::ConvOpTranspose); #endif diff --git a/src/operators/crf_op.cpp b/src/operators/crf_op.cpp index 61f9a54352e236a7fcb7b2765ab11055fbec95ab..4ab299ebf4fff08ccfb9f0497d2883e2d9cbcc4b 100644 --- a/src/operators/crf_op.cpp +++ b/src/operators/crf_op.cpp @@ -48,8 +48,7 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(crf_decoding, ops::CrfOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -#endif + #ifdef PADDLE_MOBILE_FPGA #endif diff --git a/src/operators/elementwise_add_op.cpp b/src/operators/elementwise_add_op.cpp index 6fde477f228d140f28525989bdbba564ed88854d..f694a56621399e923d54da82027a73e064d310ed 100644 --- a/src/operators/elementwise_add_op.cpp +++ b/src/operators/elementwise_add_op.cpp @@ -33,9 +33,6 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(elementwise_add, ops::ElementwiseAddOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(elementwise_add, ops::ElementwiseAddOp); -#endif #ifdef PADDLE_MOBILE_CL REGISTER_OPERATOR_CL(elementwise_add, ops::ElementwiseAddOp); diff --git a/src/operators/elementwise_mul_op.cpp b/src/operators/elementwise_mul_op.cpp index 3417fedbb2b8717355e1a7492321ecd5d7c6a9c3..61001ff4ec6be5bc76e5e6dd12093b2e56c12b96 100644 --- a/src/operators/elementwise_mul_op.cpp +++ b/src/operators/elementwise_mul_op.cpp @@ -32,9 +32,6 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(elementwise_mul, ops::ElementwiseMulOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(elementwise_mul, ops::ElementwiseMulOp); -#endif #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(elementwise_mul, ops::ElementwiseMulOp); #endif diff --git a/src/operators/elementwise_sub_op.cpp b/src/operators/elementwise_sub_op.cpp index e5ec33ced29f02a524350ed907ef69f2a5dbfca8..9b9d89073a637fb769687684ead23829e5445c90 100644 --- a/src/operators/elementwise_sub_op.cpp +++ b/src/operators/elementwise_sub_op.cpp @@ -32,9 +32,6 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(elementwise_sub, ops::ElementwiseSubOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(elementwise_sub, ops::ElementwiseSubOp); -#endif #ifdef PADDLE_MOBILE_FPGA #endif diff --git a/src/operators/feed_op.cpp b/src/operators/feed_op.cpp index 9e0b037c8dff4e4ea27d6f2f3155d06c9ed4821f..9c35e5df9bff29d8fcc05b61d9ccbd13ff59a5ce 100644 --- a/src/operators/feed_op.cpp +++ b/src/operators/feed_op.cpp @@ -38,9 +38,6 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(feed, ops::FeedOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(feed, ops::FeedOp); -#endif #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(feed, ops::FeedOp); #endif diff --git a/src/operators/fetch_op.cpp b/src/operators/fetch_op.cpp index 2d0ac82ec8a1d9338b4e1784d19587cf09fdba74..eb814c8d5648510b9706389375ad5d9a286e9524 100644 --- a/src/operators/fetch_op.cpp +++ b/src/operators/fetch_op.cpp @@ -30,9 +30,7 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(fetch, ops::FetchOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(fetch, ops::FetchOp); -#endif + #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(fetch, ops::FetchOp); #endif diff --git a/src/operators/fusion_conv_add_op.cpp b/src/operators/fusion_conv_add_op.cpp index 49cf29c38e40f5a55fa0546e988d2860a6842f6b..35a8cf326c00b32b0d9048b8ae57f00f962884c1 100644 --- a/src/operators/fusion_conv_add_op.cpp +++ b/src/operators/fusion_conv_add_op.cpp @@ -54,9 +54,6 @@ REGISTER_FUSION_MATCHER(fusion_conv_add, ops::FusionConvAddMatcher); #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(fusion_conv_add, ops::FusionConvAddOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(fusion_conv_add, ops::FusionConvAddOp); -#endif #ifdef PADDLE_MOBILE_CL REGISTER_OPERATOR_CL(fusion_conv_add, ops::FusionConvAddOp); diff --git a/src/operators/fusion_deconv_add_bn_op.cpp b/src/operators/fusion_deconv_add_bn_op.cpp index cb22e29f0903259d7bcf46271fb2a8bd70ba8eb7..e83e29d2eaf341faf178c5aa1b5b522407c17468 100644 --- a/src/operators/fusion_deconv_add_bn_op.cpp +++ b/src/operators/fusion_deconv_add_bn_op.cpp @@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators; REGISTER_FUSION_MATCHER(fusion_deconv_add_bn, ops::FusionDeconvAddBNMatcher); #ifdef PADDLE_MOBILE_CPU #endif -#ifdef PADDLE_MOBILE_MALI_GPU -#endif + #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(fusion_deconv_add_bn, ops::FusionDeconvAddBNOp); #endif diff --git a/src/operators/fusion_deconv_add_bn_relu_op.cpp b/src/operators/fusion_deconv_add_bn_relu_op.cpp index b7e9abe660b350e9d3ccc89aef685505a7449a9f..9f3ca09c3e3e7b0136c1c769540469f7eede74ab 100755 --- a/src/operators/fusion_deconv_add_bn_relu_op.cpp +++ b/src/operators/fusion_deconv_add_bn_relu_op.cpp @@ -25,8 +25,7 @@ REGISTER_FUSION_MATCHER(fusion_deconv_add_bn_relu, ops::FusionDeconvAddBNReluMatcher); #ifdef PADDLE_MOBILE_CPU #endif -#ifdef PADDLE_MOBILE_MALI_GPU -#endif + #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(fusion_deconv_add_bn_relu, ops::FusionDeconvAddBNReluOp); #endif diff --git a/src/operators/fusion_deconv_add_op.cpp b/src/operators/fusion_deconv_add_op.cpp index 99af70c1c05c166481f522282bee11895546afa5..717039cd3db66c7af0e9d6d0fd16d8607b5d6bed 100644 --- a/src/operators/fusion_deconv_add_op.cpp +++ b/src/operators/fusion_deconv_add_op.cpp @@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators; REGISTER_FUSION_MATCHER(fusion_deconv_add, ops::FusionDeconvAddMatcher); #ifdef PADDLE_MOBILE_CPU #endif -#ifdef PADDLE_MOBILE_MALI_GPU -#endif + #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(fusion_deconv_add, ops::FusionDeconvAddOp); #endif diff --git a/src/operators/fusion_deconv_add_relu_op.cpp b/src/operators/fusion_deconv_add_relu_op.cpp index cb76eda2ea942d8852217f02a6ed54a60c3b4cc4..a461bce2efd27ebff50f705137e88970579ff62d 100644 --- a/src/operators/fusion_deconv_add_relu_op.cpp +++ b/src/operators/fusion_deconv_add_relu_op.cpp @@ -25,8 +25,7 @@ REGISTER_FUSION_MATCHER(fusion_deconv_add_relu, ops::FusionDeconvAddReluMatcher); #ifdef PADDLE_MOBILE_CPU #endif -#ifdef PADDLE_MOBILE_MALI_GPU -#endif + #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(fusion_deconv_add_relu, ops::FusionDeconvAddReluOp); #endif diff --git a/src/operators/fusion_deconv_bn_relu_op.cpp b/src/operators/fusion_deconv_bn_relu_op.cpp index 22f549d1fcd501c420d3fb3c209c4dbb1273f7a8..207acd93802e07e5891c07c3a72b701fb0e77fca 100644 --- a/src/operators/fusion_deconv_bn_relu_op.cpp +++ b/src/operators/fusion_deconv_bn_relu_op.cpp @@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators; REGISTER_FUSION_MATCHER(fusion_deconv_bn_relu, ops::FusionDeconvBNReluMatcher); #ifdef PADDLE_MOBILE_CPU #endif -#ifdef PADDLE_MOBILE_MALI_GPU -#endif + #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(fusion_deconv_bn_relu, ops::FusionDeconvBNReluOp); #endif diff --git a/src/operators/fusion_deconv_relu_op.cpp b/src/operators/fusion_deconv_relu_op.cpp index daae39c951b34fa05962f936c28381f7d5d4e15c..7c48c4f14caa310b7ddd2d1414e19c1586cfe7a6 100644 --- a/src/operators/fusion_deconv_relu_op.cpp +++ b/src/operators/fusion_deconv_relu_op.cpp @@ -23,8 +23,7 @@ namespace operators {} namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU #endif -#ifdef PADDLE_MOBILE_MALI_GPU -#endif + #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(fusion_deconv_relu, ops::FusionDeconvReluOp); #endif diff --git a/src/operators/fusion_elementwise_add_relu_op.cpp b/src/operators/fusion_elementwise_add_relu_op.cpp index 0297fb01f54f731d97b274d664593be378b069e5..7c7aa7a49e8b6fa4fb90cd442d477902bfd4317b 100644 --- a/src/operators/fusion_elementwise_add_relu_op.cpp +++ b/src/operators/fusion_elementwise_add_relu_op.cpp @@ -36,10 +36,6 @@ REGISTER_FUSION_MATCHER(fusion_elementwise_add_relu, // REGISTER_OPERATOR_CPU(fusion_elementwise_add_relu, // ops::FusionElementwiseAddReluOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -// REGISTER_OPERATOR_MALI_GPU(fusion_elementwise_add_relu, -// ops::FusionElementwiseAddReluOp); -#endif #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(fusion_elementwise_add_relu, ops::FusionElementwiseAddReluOp); diff --git a/src/operators/fusion_fc_op.cpp b/src/operators/fusion_fc_op.cpp index f2e98b2b4ceae283ddbe04af06e8926f1b8bb47f..4a26c22152cec3db118ee558c9cd0055fc4d4f8a 100644 --- a/src/operators/fusion_fc_op.cpp +++ b/src/operators/fusion_fc_op.cpp @@ -63,9 +63,6 @@ REGISTER_OPERATOR_CPU(fusion_fc, ops::FusionFcOp); #ifdef PADDLE_MOBILE_CL REGISTER_OPERATOR_CL(fusion_fc, ops::FusionFcOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(fusion_fc, ops::FusionFcOp); -#endif #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(fusion_fc, ops::FusionFcOp); #endif diff --git a/src/operators/fusion_fc_relu_op.cpp b/src/operators/fusion_fc_relu_op.cpp index e11da8814b3a5ef3b128be944965fb97d6142da8..c797f72e8cddb0c07ade6aecb8c06420fc057749 100644 --- a/src/operators/fusion_fc_relu_op.cpp +++ b/src/operators/fusion_fc_relu_op.cpp @@ -60,9 +60,6 @@ REGISTER_FUSION_MATCHER(fusion_fc_relu, ops::FusionFcReluMatcher); #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(fusion_fc_relu, ops::FusionFcReluOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(fusion_fc_relu, ops::FusionFcReluOp); -#endif #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(fusion_fc_relu, ops::FusionFcReluOp); #endif diff --git a/src/operators/gru_unit_op.cpp b/src/operators/gru_unit_op.cpp index 38ddd217943fd63ce457b62a8c695ba0fa684b8d..5dd1cd3dd38efc916f428d4408d9d53274c56e89 100644 --- a/src/operators/gru_unit_op.cpp +++ b/src/operators/gru_unit_op.cpp @@ -59,8 +59,7 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(gru_unit, ops::GruUnitOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -#endif + #ifdef PADDLE_MOBILE_FPGA #endif diff --git a/src/operators/increment_op.cpp b/src/operators/increment_op.cpp index 4b2cd0462e5392f20deb86231b02745458a83b3e..841c063a205eff045061b4843eefb0a35b499f30 100644 --- a/src/operators/increment_op.cpp +++ b/src/operators/increment_op.cpp @@ -37,8 +37,7 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(increment, ops::IncrementOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -#endif + #ifdef PADDLE_MOBILE_FPGA #endif diff --git a/src/operators/is_empty_op.cpp b/src/operators/is_empty_op.cpp index 654b998ebdfa6f6b0f40401a32ab5968c9dfeee1..e3d71c8427a4fd1d82c8491522a5c2ceacb9f120 100644 --- a/src/operators/is_empty_op.cpp +++ b/src/operators/is_empty_op.cpp @@ -34,8 +34,7 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(is_empty, ops::IsEmptyOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -#endif + #ifdef PADDLE_MOBILE_FPGA #endif diff --git a/src/operators/kernel/mali/ACL_Android b/src/operators/kernel/mali/ACL_Android deleted file mode 160000 index 591027fcffea084100c756e48356e0f8a48e35e5..0000000000000000000000000000000000000000 --- a/src/operators/kernel/mali/ACL_Android +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 591027fcffea084100c756e48356e0f8a48e35e5 diff --git a/src/operators/kernel/mali/acl_operator.cc b/src/operators/kernel/mali/acl_operator.cc deleted file mode 100755 index 562d2fe1c46aa7a30b6418c7a3fcb21daafffa0f..0000000000000000000000000000000000000000 --- a/src/operators/kernel/mali/acl_operator.cc +++ /dev/null @@ -1,220 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#if USE_ACL == 1 -#include "acl_operator.h" -unsigned int bypass_acl_class_layer = - (0 | FLAGS_ENABLE_ACL_CONCAT | - /*0xffffffff |*/ /*FLAGS_ENABLE_ACL_FC |*/ /*FLAGS_ENABLE_ACL_LRN - |*/ - 0); - -int enable_schedule = 0; - -#ifdef USE_PROFILING - -#include "arm_neon.h" - -unsigned int acl_log_flags = - (0 | MASK_LOG_APP_TIME | /*MASK_LOG_ALLOCATE | */ /*MASK_LOG_ALLOCATE | */ - /*MASK_LOG_RUN | */ /*MASK_LOG_CONFIG | */ /*MASK_LOG_COPY | */ - MASK_LOG_ABSVAL | MASK_LOG_BNLL | MASK_LOG_CONV | MASK_LOG_FC | - MASK_LOG_LRN | MASK_LOG_POOLING | MASK_LOG_RELU | MASK_LOG_SIGMOID | - MASK_LOG_SOFTMAX | MASK_LOG_TANH | MASK_LOG_LC | MASK_LOG_BN | - MASK_LOG_CONCAT | 0); -#include /* printf */ -#include /* getenv */ -#endif // USE_PROFILING - -static bool force_enable_gpu = false; -bool AclEnableSchedule(int enable) { - enable_schedule = enable; - if (enable) { - force_enable_gpu = true; - } - return true; -} -int isScheduleEnable() { return enable_schedule; } - -namespace paddle_mobile { -namespace operators { -namespace acl { - -bool ACLOperator::init_gpu_env = true; -#ifdef USE_OPENCL -bool ACLOperator::support_opencl_ = false; -bool opencl_is_available() { return arm_compute::opencl_is_available(); } -#elif defined(USE_OPENGLES) -bool ACLOperator::support_opengles_ = false; -#endif -ACLOperator::ACLOperator(bool is_gpu) - : operator_state_(operator_not_init), - force_bypass_acl_path_(false), - target_hint_(TargetHint::DONT_CARE), - convolution_method_hint_(ConvolutionMethodHint::GEMM), - _group(1), - name_(""), - input_idx_(0), - output_idx_(0), - is_gpu_(is_gpu) { - const char* pBypassACL; - if (init_gpu_env) { -#ifdef USE_OPENCL - try { - if (opencl_is_available()) { - arm_compute::CLScheduler::get().default_init(); - support_opencl_ = true; - } - } catch (std::exception& e) { - support_opencl_ = false; - } -#elif defined(USE_OPENGLES) - try { - arm_compute::GCScheduler::get().default_init(); - support_opengles_ = true; - } catch (std::exception& e) { - support_opengles_ = false; - } -#endif - init_gpu_env = false; - } - if (force_enable_gpu) is_gpu_ = true; - pBypassACL = getenv("BYPASSACL"); - if (pBypassACL) { - unsigned int bacl; - sscanf(pBypassACL, "%i", &bacl); - if (bacl != bypass_acl_class_layer) { - bypass_acl_class_layer = bacl; - printf("BYPASSACL<%s>\n", pBypassACL); - printf("BYPASSACL: %x\n", bypass_acl_class_layer); - } - } - -#ifdef USE_PROFILING - const char* pLogACL; - pLogACL = getenv("LOGACL"); - if (pLogACL) { - unsigned int alf; - sscanf(pLogACL, "%i", &alf); - if (alf != acl_log_flags) { - acl_log_flags = alf; - printf("LOGACL<%s>\n", pLogACL); - printf("LOGACL: %x\n", acl_log_flags); - } - } -#endif // USE_PROFILING - const char* pEnableSchedule; - pEnableSchedule = getenv("ENABLESCHEDULE"); - if (pEnableSchedule) { - int bshedule; - sscanf(pEnableSchedule, "%i", &bshedule); - if (bshedule != enable_schedule) { - enable_schedule = bshedule; - printf("ENABLESCHEDULE<%s>\n", pEnableSchedule); - printf("ENABLESCHEDULE: %x\n", enable_schedule); - } - if (enable_schedule) { - AclEnableSchedule(1); - } - } -} -ACLOperator::~ACLOperator() {} - -bool ACLOperator::new_tensor(std::unique_ptr& tensor, - arm_compute::TensorShape& shape, void* mem, - bool commit) { - auto acl_tensor = - new ACLTensor(arm_compute::TensorInfo(shape, arm_compute::Format::F32)); - acl_tensor->set_target(getTargetHint()); - acl_tensor->bindmem(mem); - if (commit) acl_tensor->commit(); - tensor = (std::unique_ptr)std::move(acl_tensor); - return true; -} -bool ACLOperator::new_tensor(std::unique_ptr& tensor, - std::unique_ptr& parent, - arm_compute::TensorShape& shape, - arm_compute::Coordinates& coord) { - auto acl_tensor = new ACLSubTensor(parent, shape, coord); - acl_tensor->set_target(getTargetHint()); - tensor = (std::unique_ptr)std::move(acl_tensor); - return true; -} - -void ACLTensor::commit(TensorType type) { - settensortype(type); - if (mem_) { - if (!allocate_) { -#ifdef USE_PROFILING - logtime_util log_time(ACL_ALLOCATE_INFO); -#endif // USE_PROFILING - allocate(); - allocate_ = true; - } - if (type_ != tensor_output) { - tensor_copy(mem_); - } - mem_ = nullptr; - } -} - -int BaseACLTensor::tensor_copy(arm_compute::ITensor* tensor, void* mem, - bool toTensor) { -#ifdef USE_PROFILING - logtime_util log_time(ACL_COPY_INFO); -#endif // USE_PROFILING - arm_compute::Window window; - // Iterate through the rows (not each element) - window.use_tensor_dimensions(tensor->info()->tensor_shape(), - /* first_dimension =*/arm_compute::Window::DimY); - - int width = tensor->info()->tensor_shape()[0]; - int height = tensor->info()->tensor_shape()[1]; - int deepth = tensor->info()->tensor_shape()[2]; - map(); - // Create an iterator: - arm_compute::Iterator it(tensor, window); - // Except it works for an arbitrary number of dimensions - if (toTensor) { // mem->tensor - arm_compute::execute_window_loop( - window, - [&](const arm_compute::Coordinates& id) { - memcpy(it.ptr(), - ((char*)mem) + - ((id[3] * (width * height * deepth) + - id.z() * (width * height) + id.y() * width + id.x()) * - tensor->info()->element_size()), - width * tensor->info()->element_size()); - }, - it); - } else { // tensor-->mem - arm_compute::execute_window_loop( - window, - [&](const arm_compute::Coordinates& id) { - memcpy(((char*)mem) + ((id[3] * (width * height * deepth) + - id.z() * (width * height) + id.y() * width) * - tensor->info()->element_size()), - it.ptr(), width * tensor->info()->element_size()); - }, - it); - } - unmap(); - - return 0; -} - -} // namespace acl -} // namespace operators -} // namespace paddle_mobile -#endif diff --git a/src/operators/kernel/mali/acl_operator.h b/src/operators/kernel/mali/acl_operator.h deleted file mode 100755 index bf8200d486f91998c79540177ab1b26596a3e9dc..0000000000000000000000000000000000000000 --- a/src/operators/kernel/mali/acl_operator.h +++ /dev/null @@ -1,1145 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef ACL_OPERATOR_H_ -#define ACL_OPERATOR_H_ -#include -#include - -#if USE_ACL == 1 -#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h" -#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" -#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" -#include "arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h" -#include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h" -#include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h" -#include "arm_compute/runtime/Tensor.h" - -#ifdef PADDLE_MOBILE_MALI_GPU -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/functions/CLActivationLayer.h" -#include "arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h" -#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" -#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h" -#include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h" -#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" -#include "arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h" -#include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h" -#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h" -#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h" -#endif - -#ifdef USE_OPENGLES -#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" -#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" -#include "arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h" -#include "arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h" -#include "arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h" -#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h" -#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h" -#include "arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h" -#include "arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h" -#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h" -#include "arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h" -#endif - -#include "acl_tensor.h" -#define FLAGS_ENABLE_ACL_ABSVAL 0x00000001 -#define FLAGS_ENABLE_ACL_BNLL 0x00000002 -#define FLAGS_ENABLE_ACL_CONV 0x00000004 -#define FLAGS_ENABLE_ACL_FC 0x00000008 -#define FLAGS_ENABLE_ACL_LRN 0x00000010 -#define FLAGS_ENABLE_ACL_POOLING 0x00000020 -#define FLAGS_ENABLE_ACL_RELU 0x00000040 -#define FLAGS_ENABLE_ACL_SIGMOID 0x00000080 -#define FLAGS_ENABLE_ACL_SOFTMAX 0x00000100 -#define FLAGS_ENABLE_ACL_TANH 0x00000200 -#define FLAGS_ENABLE_ACL_LC 0x00000400 -#define FLAGS_ENABLE_ACL_BN 0x00000800 -#define FLAGS_ENABLE_ACL_CONCAT 0x00001000 -extern unsigned int bypass_acl_class_layer; - -#ifdef USE_PROFILING -#include -#define NANO_SEC_CONV 1000000 - -#define MASK_LOG_APP_TIME 0x00000001 -#define MASK_LOG_ALLOCATE 0x00000002 -#define MASK_LOG_RUN 0x00000004 -#define MASK_LOG_CONFIG 0x00000008 -#define MASK_LOG_COPY 0x00000010 -#define MASK_LOG_ABSVAL 0x00000020 -#define MASK_LOG_BNLL 0x00000040 -#define MASK_LOG_CONV 0x00000080 -#define MASK_LOG_FC 0x00000100 -#define MASK_LOG_LRN 0x00000200 -#define MASK_LOG_POOLING 0x00000400 -#define MASK_LOG_RELU 0x00000800 -#define MASK_LOG_SIGMOID 0x00001000 -#define MASK_LOG_SOFTMAX 0x00002000 -#define MASK_LOG_TANH 0x00004000 -#define MASK_LOG_LC 0x00008000 -#define MASK_LOG_BN 0x00010000 -#define MASK_LOG_CONCAT 0x00020000 -#define APP_TIME_INFO MASK_LOG_APP_TIME, "time: \t" -#define ACL_ALLOCATE_INFO MASK_LOG_ALLOCATE, "allocate: \t\t" -#define ACL_RUN_INFO MASK_LOG_RUN, "run: \t\t\t" -#define ACL_CONFIG_INFO MASK_LOG_CONFIG, "configure: \t\t\t\t" -#define ACL_COPY_INFO MASK_LOG_COPY, "tensor_copy:\t\t\t\t\t" -#define ACL_ABSVAL_INFO MASK_LOG_ABSVAL, "ACL_ABSVAL :\t\t\t\t\t\t" -#define ACL_BNLL_INFO MASK_LOG_BNLL, "ACL_BNLL :\t\t\t\t\t\t\t" -#define ACL_CONV_INFO MASK_LOG_CONV, "ACL_CONV :\t\t\t\t\t\t\t\t" -#define ACL_FC_INFO MASK_LOG_FC, "ACL_FC :\t\t\t\t\t\t\t\t\t" -#define ACL_LRN_INFO MASK_LOG_LRN, "ACL_LRN :\t\t\t\t\t\t\t\t\t\t" -#define ACL_POOLING_INFO MASK_LOG_POOLING, "ACL_POOLING:\t\t\t\t\t\t\t\t\t\t\t" -#define ACL_RELU_INFO MASK_LOG_RELU, "ACL_RELU :\t\t\t\t\t\t\t\t\t\t\t\t" -#define ACL_SIGMOID_INFO \ - MASK_LOG_SIGMOID, "ACL_SIGMOID:\t\t\t\t\t\t\t\t\t\t\t\t\t" -#define ACL_SOFTMAX_INFO \ - MASK_LOG_SOFTMAX, "ACL_SOFTMAX:\t\t\t\t\t\t\t\t\t\t\t\t\t\t" -#define ACL_TANH_INFO \ - MASK_LOG_TANH, "ACL_TANH :\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" -#define ACL_LC_INFO MASK_LOG_LC, "ACL_LC :\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" -#define ACL_BN_INFO \ - MASK_LOG_BN, "ACL_BN :\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" -#define ACL_CONCAT_INFO \ - MASK_LOG_CONCAT, "ACL_CONCAT :\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" -extern unsigned int acl_log_flags; - -class logtime_util { - public: - logtime_util() { mask = 0; } - logtime_util(int mask_, const char *information_) { - setlogtime_info(mask_, information_); - } - void setlogtime_info(int mask_, const char *information_) { - mask = mask_; - if (acl_log_flags & mask) { - strncpy(information, information_, 255); - gettimeofday(&tv[0], NULL); - } - } - ~logtime_util() { - if (acl_log_flags & mask) { - int time[2]; - gettimeofday(&tv[1], NULL); - time[0] = tv[0].tv_sec * NANO_SEC_CONV + tv[0].tv_usec; - time[1] = tv[1].tv_sec * NANO_SEC_CONV + tv[1].tv_usec; - printf("%s %.6lf\n", information, - (((double)time[1] - time[0]) / NANO_SEC_CONV)); - } - } - void log_time(bool start) { - if (acl_log_flags & mask) { - if (start) { - gettimeofday(&tv[0], NULL); - } else { - int time[2]; - gettimeofday(&tv[1], NULL); - time[0] = tv[0].tv_sec * NANO_SEC_CONV + tv[0].tv_usec; - time[1] = tv[1].tv_sec * NANO_SEC_CONV + tv[1].tv_usec; - printf("%s %.6lf\n", information, - (((double)time[1] - time[0]) / NANO_SEC_CONV)); - } - } - } - - private: - struct timeval tv[2]; - int mask; - char information[256]; -}; - -#endif // USE_PROFILING - -namespace paddle_mobile { -namespace operators { -namespace acl { - -class AclParameters { - public: - AclParameters() { - dilated = false; - dim = 2; - num_group = 1; - } - int batch; - int in_depth; - int in_rows; - int in_cols; - - int out_depth; - int out_rows; - int out_cols; - int out_num; - - int filter_rows; - int filter_cols; - - int stride_rows; - int stride_cols; - - int pad_rows; - int pad_cols; - - int dilation_rows; - int dilation_cols; - - int num_group; - bool dilated; - int dim; - int epsilon; - - int nsize; - float alpha; - float beta; - float knorm; - - void *input_data; - void *output_data; - void *weight_data; - void *biases_data; - void *mean_data; - void *var_data; - - std::string pool_type; - std::string act_type; - std::string data_layout; - - bool is_global_pool; - bool is_channel_concat; - bool is_bypass; - - std::vector in_tensor; -}; - -enum TensorType { - tensor_input, - tensor_output, - tensor_weights, - tensor_biases, - tensor_mean, - tensor_var, - tensor_beta, - tensor_gamma, - tensor_concat, - tensor_data, -}; -enum OperatorState { - operator_not_init, - operator_init_done, - operator_reinit, -}; -enum OperateType { - operate_type_pooling, - operate_type_activation, - operate_type_lrn, - operate_type_conv, - operate_type_lc, - operate_type_fc, - operate_type_bn, - operate_type_softmax, - operate_type_concat, -}; - -class BaseACLTensor { - public: - BaseACLTensor() : type_(tensor_input), allocate_(false) {} - virtual ~BaseACLTensor() {} - virtual void bindmem(void *mem) { mem_ = mem; } - virtual void settensortype(TensorType type) { type_ = type; } - virtual void map(bool blocking = true) {} - virtual void unmap() {} - virtual void commit(TensorType type = tensor_data) {} - int tensor_copy(arm_compute::ITensor *tensor, void *mem, - bool toTensor = true); - - protected: - void *mem_; - TensorType type_; - bool allocate_; -}; -class ACLTensor : public BaseACLTensor, public Tensor { - public: - explicit ACLTensor(arm_compute::TensorInfo &&info) : Tensor(info) {} - virtual void map(bool blocking = true) { - if (!allocate_) { - Tensor::allocate(); - allocate_ = true; - } - Tensor::map(blocking); - } - virtual int tensor_copy(void *mem, bool toTensor = true) { - auto acl_tensor = this; - arm_compute::ITensor *tensor = acl_tensor->tensor(); - BaseACLTensor::tensor_copy(tensor, mem, toTensor); - return 0; - } - virtual void unmap() { Tensor::unmap(); } - virtual void commit(TensorType type = tensor_data); -}; -class ACLSubTensor : public BaseACLTensor, public SubTensor { - public: - ACLSubTensor(std::unique_ptr &parent, - arm_compute::TensorShape &shape, arm_compute::Coordinates &coord) - : SubTensor(parent.get(), shape, coord) {} - virtual int tensor_copy(void *mem, bool toTensor = true) { return 0; } -}; - -template -class TensorPair { - public: - TensorPair() {} - ~TensorPair() {} - TensorType type; - std::unique_ptr tensor; -}; -template -std::unique_ptr &tensor_item( - std::vector>> &pool, TensorType type, - int idx) { - int count = 0; - for (auto &item : pool) { - if (item.get()->type == type) { - ++count; - } - if (item.get()->type == type && idx == count - 1) { - return item.get()->tensor; - } - } - pool.push_back((std::unique_ptr>)std::move(new TensorPair)); - auto item = pool[pool.size() - 1].get(); - item->type = type; - item->tensor = NULL; - return item->tensor; -} -class ACLOperator { - public: - virtual void commit() { - for (auto &item : tensor_pool_) { - if (item.get()->tensor) item.get()->tensor->commit(item.get()->type); - } - } - inline void run() { - commit(); -#ifdef USE_PROFILING - logtime_util log_time(ACL_RUN_INFO); -#endif // USE_PROFILING - for (auto &c : funcs_) { - c->run(); - } - } - - inline std::vector> &funcs() { - return funcs_; - } - inline std::unique_ptr &sinput(int idx = 0) { - return tensor_item(subtensor_pool_, tensor_input, idx); - } - inline std::unique_ptr &soutput(int idx = 0) { - return tensor_item(subtensor_pool_, tensor_output, idx); - } - inline std::unique_ptr &sweights(int idx = 0) { - return tensor_item(subtensor_pool_, tensor_weights, idx); - } - inline std::unique_ptr &sbiases(int idx = 0) { - return tensor_item(subtensor_pool_, tensor_biases, idx); - } - inline std::unique_ptr &cinput(int idx = 0) { - return tensor_item(tensor_pool_, tensor_concat, idx); - } - inline std::unique_ptr &input(int idx = 0) { - return tensor_item(tensor_pool_, tensor_input, idx); - } - inline std::unique_ptr &output(int idx = 0) { - return tensor_item(tensor_pool_, tensor_output, idx); - } - inline std::unique_ptr &weights(int idx = 0) { - return tensor_item(tensor_pool_, tensor_weights, idx); - } - inline std::unique_ptr &biases(int idx = 0) { - return tensor_item(tensor_pool_, tensor_biases, idx); - } - inline std::unique_ptr &mean(int idx = 0) { - return tensor_item(tensor_pool_, tensor_mean, idx); - } - inline std::unique_ptr &var(int idx = 0) { - return tensor_item(tensor_pool_, tensor_var, idx); - } - inline std::unique_ptr &beta(int idx = 0) { - return tensor_item(tensor_pool_, tensor_beta, idx); - } - inline std::unique_ptr &gamma(int idx = 0) { - return tensor_item(tensor_pool_, tensor_gamma, idx); - } - inline std::unique_ptr &tensor(TensorType type) { - switch (type) { - case tensor_biases: - return biases(); - break; - case tensor_weights: - return weights(); - break; - case tensor_output: - return output(); - break; - default: - case tensor_input: - return input(); - break; - } - return input(); - } - - explicit ACLOperator(bool is_gpu = false); - virtual ~ACLOperator(); - inline TargetHint getTargetHint() { -#ifdef USE_OPENCL - if (target_hint_ == TargetHint::DONT_CARE) { - if (is_gpu_) { - return TargetHint::OPENCL; - } - return TargetHint::NEON; - } - return target_hint_; -#elif defined(USE_OPENGLES) - if (target_hint_ == TargetHint::DONT_CARE) { - if (is_gpu_) { - return TargetHint::OPENGLES; - } - return TargetHint::NEON; - } - return target_hint_; -#else - return TargetHint::NEON; -#endif - } - inline void setTargetHint(TargetHint hint) { target_hint_ = hint; } - inline ConvolutionMethodHint &getConvMethod() { - return convolution_method_hint_; - } - inline void setConvMethod() { - convolution_method_hint_ = ConvolutionMethodHint::DIRECT; - } - inline bool tensor_mem(std::unique_ptr &tensor, void *mem) { - tensor->bindmem(mem); - return true; - } - inline bool tensor_mem(void *mem, std::unique_ptr &tensor) { - tensor->tensor_copy(mem, false); - return true; - } - bool new_tensor(std::unique_ptr &tensor, - arm_compute::TensorShape &shape, void *mem = nullptr, - bool commit = false); - bool new_tensor(std::unique_ptr &tensor, - std::unique_ptr &parent, - arm_compute::TensorShape &shape, - arm_compute::Coordinates &coord); - inline int &group() { return _group; } - inline void set_operator_property(OperateType type, const char *name) { - name_ = name; - type_ = type; - } - inline void acl_run(void *input_data, void *output_data) { - if (input_data) tensor_mem(input(), input_data); - run(); - tensor_mem(output_data, output()); - } - inline int &input_idx() { return input_idx_; } - inline int &output_idx() { return output_idx_; } - - protected: - inline bool isGPUMode() { -#ifdef USE_OPENCL - if (!support_opencl_) return false; - return getTargetHint() == TargetHint::OPENCL; -#elif defined(USE_OPENGLES) - if (!support_opengles_) return false; - return getTargetHint() == TargetHint::OPENGLES; -#endif - return false; - } - inline OperatorState &opstate() { return operator_state_; } - inline bool is_operator_init_done(arm_compute::TensorShape shape, - TensorType type = tensor_input) { - checkreshape(shape, type); - return operator_state_ == operator_init_done; - } - inline void set_operator_init_done() { - opstate() = operator_init_done; - set_bypass_state(false); - } - inline void set_bypass_state(bool state = false) { - force_bypass_acl_path_ = state; - } - inline OperatorState checkreshape(arm_compute::TensorShape shape, - TensorType type = tensor_input) { - opstate() = reshape(shape, type); - if (opstate() == operator_reinit) { - freeres(); - } - return opstate(); - } - inline OperatorState reshape(arm_compute::TensorShape &shape, - TensorType type) { - arm_compute::TensorShape _shape; - std::unique_ptr &acl_tensor = tensor(type); - if (!acl_tensor.get()) return operator_not_init; - _shape = acl_tensor->info().tensor_shape(); - if (_shape.total_size() == shape.total_size() && _shape[0] == shape[0] && - _shape[1] == shape[1]) { - return operator_init_done; - } - return operator_reinit; - } - inline void freeres() { - tensor_pool_.clear(); - subtensor_pool_.clear(); - funcs_.clear(); - } - inline const char *&name() { return name_; } - inline void set_in_out_index(int indata_idx, int outdata_idx) { - input_idx() = indata_idx; - output_idx() = outdata_idx; - } - - protected: - std::vector>> tensor_pool_; - std::vector>> subtensor_pool_; - std::vector> funcs_; - OperatorState operator_state_; - bool force_bypass_acl_path_; - TargetHint target_hint_; - ConvolutionMethodHint convolution_method_hint_; - static bool support_opengles_; - static bool support_opencl_; - static bool init_gpu_env; - int _group; - const char *name_; - OperateType type_; - int input_idx_, output_idx_; - bool is_gpu_; -}; - -int isScheduleEnable(); - -template -std::unique_ptr instantiate_function( - arm_compute::ITensor *input, arm_compute::ITensor *output) { - auto op = cpp14::make_unique(); - op->configure(dynamic_cast(input), - dynamic_cast(output)); - - return std::move(op); -} - -template -std::unique_ptr instantiate( - arm_compute::ITensor *input, arm_compute::ITensor *output) { - return instantiate_function(input, output); -} - -template -std::unique_ptr instantiate_op_func( - std::unique_ptr &input, std::unique_ptr &output, - TargetHint &hint) { - std::unique_ptr func; - func = instantiate(input->tensor(), output->tensor()); - return func; -} - -template -std::unique_ptr instantiate_function( - VectorTensor inputs, arm_compute::ITensor *output) { - auto op = cpp14::make_unique(); - op->configure(inputs, dynamic_cast(output)); - - return std::move(op); -} - -template -std::unique_ptr instantiate( - VectorTensor inputs, arm_compute::ITensor *output) { - return instantiate_function(inputs, - output); -} - -template -std::unique_ptr instantiate_op_func_lists( - ACLOperator *&acl_op, std::unique_ptr &output, int num, - TargetHint &hint) { - std::unique_ptr func; - static std::vector tensors; - tensors.clear(); - for (int i = 0; i < num; ++i) { - tensors.push_back( - dynamic_cast(acl_op->cinput(i).get()->tensor())); - } - func = instantiate>( - tensors, output->tensor()); - return func; -} - -template -std::unique_ptr instantiate_function( - arm_compute::ITensor *input, arm_compute::ITensor *output, - const OperatorInfo &info) { - auto op = cpp14::make_unique(); - op->configure(dynamic_cast(input), - dynamic_cast(output), info); - - return std::move(op); -} - -template -std::unique_ptr instantiate( - arm_compute::ITensor *input, arm_compute::ITensor *output, - const OperatorInfo &info) { - return instantiate_function( - input, output, info); -} - -template -std::unique_ptr instantiate_op_func( - std::unique_ptr &input, std::unique_ptr &output, - const OperatorInfo &info, TargetHint &hint) { - std::unique_ptr func; - func = instantiate(input->tensor(), - output->tensor(), info); - return func; -} - -template -std::unique_ptr instantiate_function( - arm_compute::ITensor *input, arm_compute::ITensor *weights, - arm_compute::ITensor *biases, arm_compute::ITensor *output, - const OperatorInfo &info) { - auto op = cpp14::make_unique(); - op->configure(dynamic_cast(input), - dynamic_cast(weights), - dynamic_cast(biases), - dynamic_cast(output), info); - return std::move(op); -} - -template -std::unique_ptr instantiate( - arm_compute::ITensor *input, arm_compute::ITensor *weights, - arm_compute::ITensor *biases, arm_compute::ITensor *output, - const OperatorInfo &info) { - return instantiate_function( - input, weights, biases, output, info); -} - -template -std::unique_ptr instantiate_op_func( - std::unique_ptr &input, std::unique_ptr &weights, - std::unique_ptr &biases, std::unique_ptr &output, - const OperatorInfo &info, TargetHint &hint) { - std::unique_ptr func; - arm_compute::ITensor *biases_tensor = NULL; - - if (biases.get()) { - biases_tensor = biases->tensor(); - } - func = instantiate( - input->tensor(), weights->tensor(), biases_tensor, output->tensor(), - info); - return func; -} - -template -std::unique_ptr instantiate_function( - arm_compute::ITensor *input, arm_compute::ITensor *output, - arm_compute::ITensor *mean, arm_compute::ITensor *var, - arm_compute::ITensor *beta, arm_compute::ITensor *gamma, Dtype &eps) { - auto op = cpp14::make_unique(); - op->configure( - dynamic_cast(input), dynamic_cast(output), - dynamic_cast(mean), dynamic_cast(var), - dynamic_cast(beta), dynamic_cast(gamma), eps); - - return std::move(op); -} - -template -std::unique_ptr instantiate( - arm_compute::ITensor *input, arm_compute::ITensor *output, - arm_compute::ITensor *mean, arm_compute::ITensor *var, - arm_compute::ITensor *beta, arm_compute::ITensor *gamma, Dtype eps) { - return instantiate_function( - input, output, mean, var, beta, gamma, eps); -} - -template -std::unique_ptr instantiate_op_func( - std::unique_ptr &input, std::unique_ptr &output, - std::unique_ptr &mean, std::unique_ptr &var, - std::unique_ptr &beta, std::unique_ptr &gamma, - Dtype eps, TargetHint hint) { - std::unique_ptr func; - func = instantiate( - input->tensor(), output->tensor(), mean->tensor(), var->tensor(), - beta->tensor(), gamma->tensor(), eps); - return func; -} - -template -bool instantiate_op_pooling( - ACLOperator *acl_op, - std::vector> &func, - std::unique_ptr &input, std::unique_ptr &output, - TargetHint hint, const OperatorInfo &info) { -#ifdef USE_OPENCL - if (hint == TargetHint::OPENCL) { - func.push_back( - instantiate_op_func(input, output, info, - hint)); - return true; - } -#elif defined(USE_OPENGLES) - if (hint == TargetHint::OPENGLES) { - func.push_back( - instantiate_op_func(input, output, info, - hint)); - return true; - } -#endif - { - func.push_back( - instantiate_op_func(input, output, info, - hint)); - } - return true; -} -template -bool instantiate_op_activation( - ACLOperator *acl_op, - std::vector> &func, - std::unique_ptr &input, std::unique_ptr &output, - TargetHint hint, const OperatorInfo &info) { -#ifdef USE_OPENCL - if (hint == TargetHint::OPENCL) { - func.push_back(instantiate_op_func( - input, output, info, hint)); - return true; - } -#elif defined(USE_OPENGLES) - if (hint == TargetHint::OPENGLES) { - func.push_back(instantiate_op_func( - input, output, info, hint)); - return true; - } -#endif - { - func.push_back(instantiate_op_func( - input, output, info, hint)); - } - return true; -} -template -bool instantiate_op_lrn( - ACLOperator *acl_op, - std::vector> &func, - std::unique_ptr &input, std::unique_ptr &output, - TargetHint hint, const OperatorInfo &info) { -#ifdef USE_OPENCL - if (hint == TargetHint::OPENCL) { - func.push_back(instantiate_op_func( - input, output, info, hint)); - return true; - } -#elif defined(USE_OPENGLES) - if (hint == TargetHint::OPENGLES) { - func.push_back(instantiate_op_func( - input, output, info, hint)); - return true; - } -#endif - { - func.push_back(instantiate_op_func( - input, output, info, hint)); - } - return true; -} -template -bool instantiate_op_conv( - ACLOperator *acl_op, - std::vector> &func, - std::unique_ptr &input, std::unique_ptr &output, - TargetHint hint, const OperatorInfo &info) { - std::unique_ptr &weights = acl_op->weights(); - std::unique_ptr &biases = acl_op->biases(); - ConvolutionMethodHint &conv_method = acl_op->getConvMethod(); - bool has_biases = biases.get() ? true : false; - int &groups = acl_op->group(); - arm_compute::TensorShape input_shape = input->info().tensor_shape(); - arm_compute::TensorShape weights_shape = weights->info().tensor_shape(); - arm_compute::TensorShape biases_shape; - if (has_biases) { - biases_shape = biases->info().tensor_shape(); - } - arm_compute::TensorShape output_shape = output->info().tensor_shape(); - - if (groups == 1) { - if (conv_method == ConvolutionMethodHint::GEMM) { -#ifdef USE_OPENCL - if (hint == TargetHint::OPENCL) { - func.push_back(instantiate_op_func( - acl_op->input(), acl_op->weights(), acl_op->biases(), - acl_op->output(), info, hint)); - return true; - } -#elif defined(USE_OPENGLES) - if (hint == TargetHint::OPENGLES) { - func.push_back(instantiate_op_func( - acl_op->input(), acl_op->weights(), acl_op->biases(), - acl_op->output(), info, hint)); - return true; - } -#endif - { - func.push_back(instantiate_op_func( - acl_op->input(), acl_op->weights(), acl_op->biases(), - acl_op->output(), info, hint)); - } - } else { -#ifdef USE_OPENCL - if (hint == TargetHint::OPENCL) { - func.push_back( - instantiate_op_func( - acl_op->input(), acl_op->weights(), acl_op->biases(), - acl_op->output(), info, hint)); - return true; - } -#elif defined(USE_OPENGLES) - if (hint == TargetHint::OPENGLES) { - func.push_back( - instantiate_op_func( - acl_op->input(), acl_op->weights(), acl_op->biases(), - acl_op->output(), info, hint)); - return true; - } -#endif - { - func.push_back( - instantiate_op_func( - acl_op->input(), acl_op->weights(), acl_op->biases(), - acl_op->output(), info, hint)); - } - } - return true; - } - - // Calculate sub-tensor splits - const int input_split = input_shape.z() / groups; - const int output_split = output_shape.z() / groups; - const int weights_split = weights_shape[3] / groups; - const int biases_split = biases_shape.x() / groups; - - // Calculate sub-tensor shapes - input_shape.set(2, input_split); - output_shape.set(2, output_split); - weights_shape.set(3, weights_split); - biases_shape.set(0, biases_split); - - for (auto i = 0; i < groups; ++i) { - // Calculate sub-tensors starting coordinates - arm_compute::Coordinates input_coord(0, 0, input_split * i); - arm_compute::Coordinates output_coord(0, 0, output_split * i); - arm_compute::Coordinates weights_coord(0, 0, 0, weights_split * i); - arm_compute::Coordinates biases_coord(biases_split * i); - - // Create sub-tensors for input, output, weights and bias - acl_op->new_tensor(acl_op->sinput(i), acl_op->input(), input_shape, - input_coord); - acl_op->new_tensor(acl_op->soutput(i), acl_op->output(), output_shape, - output_coord); - acl_op->new_tensor(acl_op->sweights(i), acl_op->weights(), weights_shape, - weights_coord); - if (has_biases) { - acl_op->new_tensor(acl_op->sbiases(i), acl_op->biases(), biases_shape, - biases_coord); - } - - bool use_opencl = false; - if (conv_method == ConvolutionMethodHint::GEMM) { -#ifdef USE_OPENCL - if (hint == TargetHint::OPENCL) { - use_opencl = true; - func.push_back( - instantiate_op_func( - acl_op->sinput(i), acl_op->sweights(i), acl_op->sbiases(i), - acl_op->soutput(i), info, hint)); - } -#endif - if (!use_opencl) { - func.push_back( - instantiate_op_func( - acl_op->sinput(i), acl_op->sweights(i), acl_op->sbiases(i), - acl_op->soutput(i), info, hint)); - } - } else { -#ifdef USE_OPENCL - if (hint == TargetHint::OPENCL) { - use_opencl = true; - func.push_back( - instantiate_op_func( - acl_op->sinput(i), acl_op->sweights(i), acl_op->sbiases(i), - acl_op->soutput(i), info, hint)); - } -#endif - if (!use_opencl) { - func.push_back( - instantiate_op_func( - acl_op->sinput(i), acl_op->sweights(i), acl_op->sbiases(i), - acl_op->soutput(i), info, hint)); - } - } - } - return true; -} -template -bool instantiate_op_lc( - ACLOperator *acl_op, - std::vector> &func, - std::unique_ptr &input, std::unique_ptr &output, - TargetHint hint, const OperatorInfo &info) { - std::unique_ptr &weights = acl_op->weights(); - std::unique_ptr &biases = acl_op->biases(); -#ifdef USE_OPENCL - if (hint == TargetHint::OPENCL) { - func.push_back( - instantiate_op_func( - input, weights, biases, output, info, hint)); - return true; - } -#endif - { - func.push_back( - instantiate_op_func( - input, weights, biases, output, info, hint)); - } - return true; -} -template -bool instantiate_op_fc( - ACLOperator *acl_op, - std::vector> &func, - std::unique_ptr &input, std::unique_ptr &output, - TargetHint hint, const OperatorInfo &info) { - std::unique_ptr &weights = acl_op->weights(); - std::unique_ptr &biases = acl_op->biases(); -#ifdef USE_OPENCL - if (hint == TargetHint::OPENCL) { - func.push_back(instantiate_op_func( - input, weights, biases, output, info, hint)); - return true; - } -#elif defined(USE_OPENGLES) - if (hint == TargetHint::OPENGLES) { - func.push_back(instantiate_op_func( - input, weights, biases, output, info, hint)); - return true; - } -#endif - { - func.push_back(instantiate_op_func( - input, weights, biases, output, info, hint)); - } - return true; -} -template -bool instantiate_op_bn( - ACLOperator *acl_op, - std::vector> &func, - std::unique_ptr &input, std::unique_ptr &output, - TargetHint hint, Dtype eps) { - std::unique_ptr &mean = acl_op->mean(); - std::unique_ptr &var = acl_op->var(); - std::unique_ptr &beta = acl_op->beta(); - std::unique_ptr &gamma = acl_op->gamma(); -#ifdef USE_OPENCL - if (hint == TargetHint::OPENCL) { - func.push_back( - instantiate_op_func(input, output, mean, var, - beta, gamma, eps, hint)); - return true; - } -#elif defined(USE_OPENGLES) - if (hint == TargetHint::OPENGLES) { - func.push_back( - instantiate_op_func(input, output, mean, var, - beta, gamma, eps, hint)); - return true; - } -#endif - { - func.push_back( - instantiate_op_func(input, output, mean, var, - beta, gamma, eps, hint)); - } - return true; -} -inline bool instantiate_op_softmax( - ACLOperator *acl_op, - std::vector> &func, - std::unique_ptr &input, std::unique_ptr &output, - TargetHint hint, void *data) { -#ifdef USE_OPENCL - if (hint == TargetHint::OPENCL) { - func.push_back( - instantiate_op_func(input, output, hint)); - return true; - } -#elif defined(USE_OPENGLES) - if (hint == TargetHint::OPENGLES) { - func.push_back( - instantiate_op_func(input, output, hint)); - return true; - } -#endif - { - func.push_back( - instantiate_op_func( - input, output, hint)); - } - return true; -} -inline bool instantiate_op_concat( - ACLOperator *acl_op, - std::vector> &func, - std::unique_ptr &input, std::unique_ptr &output, - TargetHint hint, int num) { -#ifdef USE_OPENCL - if (hint == TargetHint::OPENCL) { - func.push_back( - instantiate_op_func_lists(acl_op, output, num, - hint)); - return true; - } -#elif defined(USE_OPENGLES) - if (hint == TargetHint::OPENGLES) { - func.push_back( - instantiate_op_func_lists(acl_op, output, num, - hint)); - return true; - } -#endif - { - func.push_back( - instantiate_op_func_lists(acl_op, output, num, - hint)); - } - return true; -} -template -void *InputdataPtr(ACLOperator *op, - const std::vector &input_data, - Dtype type, int index = -1) { - if (index == -1) index = 0; - return (void *)(input_data[index]->mutable_data()); -} - -template -void acl_run(ACLOperator *op, - const std::vector &in_data, void *out_data, - Dtype type, bool multi_input_run = true) { - for (int i = 0; i < in_data.size(); ++i) { - op->tensor_mem(op->cinput(i), InputdataPtr(op, in_data, type, i)); - } - op->acl_run(NULL, out_data); -} -} // namespace acl -} // namespace operators -} // namespace paddle_mobile - -#ifdef USE_PROFILING -#define acl_configure(opname, acl_op, args...) \ - { \ - set_operator_property(acl::operate_type_##opname, #opname); \ - logtime_util log_time(ACL_CONFIG_INFO); \ - instantiate_op_##opname(acl_op, acl_op->funcs(), acl_op->input(), \ - acl_op->output(), acl_op->getTargetHint(), args); \ - } -#else -#define acl_configure(opname, acl_op, args...) \ - { \ - set_operator_property(acl::operate_type_##opname, #opname); \ - instantiate_op_##opname(acl_op, acl_op->funcs(), acl_op->input(), \ - acl_op->output(), acl_op->getTargetHint(), args); \ - } -#endif - -#define ACLOp_Ptr(a) dynamic_cast(a) - -#endif // USE_ACL - -#endif // ACL_OPERATOR_H_ diff --git a/src/operators/kernel/mali/acl_tensor.cc b/src/operators/kernel/mali/acl_tensor.cc deleted file mode 100755 index 97a6add20a7ca1b9a6b4f9c9a7e6d1ba1f4e2e0a..0000000000000000000000000000000000000000 --- a/src/operators/kernel/mali/acl_tensor.cc +++ /dev/null @@ -1,160 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "acl_tensor.h" - -namespace paddle_mobile { -namespace operators { -namespace acl { - -#ifdef USE_ACL -template -std::unique_ptr initialise_tensor( - arm_compute::TensorInfo &info) { - auto tensor = cpp14::make_unique(); - tensor->allocator()->init(info); - return std::move(tensor); -} - -template -void tensor_allocate(arm_compute::ITensor &tensor) { - auto itensor = dynamic_cast(&tensor); - itensor->allocator()->allocate(); -} - -Tensor::Tensor(arm_compute::TensorInfo &info) noexcept - : _target(TargetHint::DONT_CARE), _info(info), _tensor(nullptr) {} - -Tensor::Tensor(Tensor &&src) noexcept - : _target(src._target), - _info(std::move(src._info)), - _tensor(std::move(src._tensor)) {} - -arm_compute::ITensor *Tensor::set_target(TargetHint target) { - switch (target) { -#ifdef USE_OPENCL - case TargetHint::OPENCL: - _tensor = initialise_tensor(_info); - break; -#elif defined(USE_OPENGLES) - case TargetHint::OPENGLES: - _tensor = initialise_tensor(_info); - break; -#endif - case TargetHint::NEON: - _tensor = initialise_tensor(_info); - break; - default: - break; - } - _target = target; - return _tensor.get(); -} - -void Tensor::allocate() { - switch (_target) { -#ifdef USE_OPENCL - case TargetHint::OPENCL: - tensor_allocate(*_tensor); - break; -#elif defined(USE_OPENGLES) - case TargetHint::OPENGLES: - tensor_allocate(*_tensor); - break; -#endif - case TargetHint::NEON: - tensor_allocate(*_tensor); - break; - default: - break; - } -} -void Tensor::map(bool blocking) { -#ifdef USE_OPENCL - if (_target == TargetHint::OPENCL) - dynamic_cast(tensor())->map(blocking); -#elif defined(USE_OPENGLES) - if (_target == TargetHint::OPENGLES) - dynamic_cast(tensor())->map(blocking); -#endif -} -void Tensor::unmap() { -#ifdef USE_OPENCL - if (_target == TargetHint::OPENCL) - dynamic_cast(tensor())->unmap(); -#elif defined(USE_OPENGLES) - if (_target == TargetHint::OPENGLES) - dynamic_cast(tensor())->unmap(); -#endif -} - -template -std::unique_ptr initialise_subtensor( - arm_compute::ITensor *parent, arm_compute::TensorShape shape, - arm_compute::Coordinates coords) { - auto ptensor = dynamic_cast(parent); - auto subtensor = cpp14::make_unique(ptensor, shape, coords); - return std::move(subtensor); -} -SubTensor::SubTensor(Tensor *parent, arm_compute::TensorShape &tensor_shape, - arm_compute::Coordinates &coords) noexcept - : _target(TargetHint::DONT_CARE), - _tensor_shape(tensor_shape), - _coords(coords), - _parent(nullptr), - _subtensor(nullptr) { - _parent = parent->tensor(); - _target = parent->target(); - - instantiate_subtensor(); -} -arm_compute::ITensor *SubTensor::set_target(TargetHint target) { - return (target == _target) ? _subtensor.get() : nullptr; -} - -arm_compute::ITensor *SubTensor::tensor() { return _subtensor.get(); } - -const arm_compute::ITensor *SubTensor::tensor() const { - return _subtensor.get(); -} - -TargetHint SubTensor::target() const { return _target; } - -void SubTensor::allocate() { - // NOP for sub-tensors -} - -void SubTensor::instantiate_subtensor() { - switch (_target) { -#ifdef USE_OPENCL - case TargetHint::OPENCL: - _subtensor = initialise_subtensor( - _parent, _tensor_shape, _coords); - break; -#endif - default: - case TargetHint::NEON: - _subtensor = - initialise_subtensor( - _parent, _tensor_shape, _coords); - break; - } -} - -#endif - -} // namespace acl -} // namespace operators -} // namespace paddle_mobile diff --git a/src/operators/kernel/mali/acl_tensor.h b/src/operators/kernel/mali/acl_tensor.h deleted file mode 100755 index 1d4f59371e355ddd2e89a709eec0b5451c1c3502..0000000000000000000000000000000000000000 --- a/src/operators/kernel/mali/acl_tensor.h +++ /dev/null @@ -1,128 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef ACL_TENSOR_H_ -#define ACL_TENSOR_H_ - -#ifdef USE_ACL -#ifdef USE_OPENCL -#include "arm_compute/runtime/CL/CLSubTensor.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#elif defined(USE_OPENGLES) -#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" -#endif -#include "arm_compute/runtime/SubTensor.h" -#include "arm_compute/runtime/Tensor.h" - -#include - -namespace paddle_mobile { -namespace operators { -namespace acl { -enum class TargetHint { - DONT_CARE, - OPENCL, - OPENGLES, - NEON, -}; - -enum class ConvolutionMethodHint { - GEMM, - DIRECT, -}; -namespace cpp14 { -template -struct _Unique_if { - typedef std::unique_ptr _Single_object; -}; - -template -struct _Unique_if { - typedef std::unique_ptr _Unknown_bound; -}; - -template -struct _Unique_if { - typedef void _Known_bound; -}; - -template -typename _Unique_if::_Single_object make_unique(Args &&... args) { - return std::unique_ptr(new T(std::forward(args)...)); -} - -template -typename _Unique_if::_Unknown_bound make_unique(size_t n) { - typedef typename std::remove_extent::type U; - return std::unique_ptr(new U[n]()); -} - -template -typename _Unique_if::_Known_bound make_unique(Args &&...); -} // namespace cpp14 - -class Tensor { - public: - explicit Tensor(arm_compute::TensorInfo &info) noexcept; - virtual ~Tensor() {} - Tensor(Tensor &&src) noexcept; - void set_info(arm_compute::TensorInfo &&info) { _info = info; } - arm_compute::ITensor *set_target(TargetHint target); - const arm_compute::TensorInfo &info() const { return _info; } - arm_compute::ITensor *tensor() { return _tensor.get(); } - void allocate(); - void init() {} - TargetHint target() const { return _target; } - virtual void map(bool blocking = true); - virtual void unmap(); - - private: - TargetHint _target; - arm_compute::TensorInfo _info; - std::unique_ptr _tensor; -}; - -class SubTensor { - public: - SubTensor(Tensor *parent, arm_compute::TensorShape &tensor_shape, - arm_compute::Coordinates &coords) noexcept; - ~SubTensor() {} - arm_compute::ITensor *tensor(); - const arm_compute::ITensor *tensor() const; - TargetHint target() const; - void allocate(); - arm_compute::ITensor *set_target(TargetHint target); - - private: - /** Instantiates a sub-tensor */ - void instantiate_subtensor(); - - private: - /**< Target that this tensor is pinned on */ - TargetHint _target; - /**< SubTensor shape */ - arm_compute::TensorShape _tensor_shape; - /**< SubTensor Coordinates */ - arm_compute::Coordinates _coords; - /**< Parent tensor */ - arm_compute::ITensor *_parent; - /**< SubTensor */ - std::unique_ptr _subtensor; -}; - -} // namespace acl -} // namespace operators -} // namespace paddle_mobile -#endif -#endif // ACL_TENSOR_H_ diff --git a/src/operators/kernel/mali/batchnorm_kernel.cpp b/src/operators/kernel/mali/batchnorm_kernel.cpp deleted file mode 100755 index 5d50ca9a7250f66f20b6bfaf0d93db18014d791c..0000000000000000000000000000000000000000 --- a/src/operators/kernel/mali/batchnorm_kernel.cpp +++ /dev/null @@ -1,164 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef BATCHNORM_OP - -#include "operators/kernel/batchnorm_kernel.h" -#ifdef PADDLE_MOBILE_MALI_GPU -#include "acl_operator.h" -#include "framework/operator.h" -#include "operators/op_param.h" - -namespace paddle_mobile { -namespace operators { - -template -class AclBatchNormOp : public acl::ACLOperator { - public: - AclBatchNormOp() { - this->force_bypass_acl_path_ = bypass_acl_class_layer & FLAGS_ENABLE_ACL_BN; - } - ~AclBatchNormOp() = default; - AclBatchNormOp(const AclBatchNormOp&) = delete; - AclBatchNormOp& operator=(const AclBatchNormOp&) = delete; - AclBatchNormOp(AclBatchNormOp&&) = delete; - AclBatchNormOp& operator=(AclBatchNormOp&&) = delete; - - acl::AclParameters& getargs() { return args; } - void InitAclLayer(const BatchNormParam& param) { - setTargetHint(acl::TargetHint::OPENCL); - arm_compute::TensorShape input_shape(args.in_cols, args.in_rows, - args.in_depth, args.batch); - arm_compute::TensorShape output_shape(args.out_cols, args.out_rows, - args.out_depth, args.out_num); - - if (is_operator_init_done(input_shape)) return; - set_operator_init_done(); - this->force_bypass_acl_path_ = false; - - arm_compute::TensorShape mean_shape(args.in_depth); - arm_compute::TensorShape var_shape = mean_shape; - arm_compute::TensorShape beta_shape = mean_shape; - arm_compute::TensorShape gamma_shape = mean_shape; - - //[width, height, IFM] - new_tensor(input(), input_shape, args.input_data); - //[width, height, OFM] - new_tensor(output(), output_shape, args.output_data); - - new_tensor(mean(), mean_shape, args.mean_data); - new_tensor(var(), var_shape, args.var_data); - new_tensor(beta(), beta_shape, args.biases_data); - new_tensor(gamma(), gamma_shape, args.weight_data); - - acl_configure(bn, this, args.epsilon); - } - - void RunAcl(void* input, void* output) { - acl::ACLOperator::acl_run(input, output); - } - bool Bypass_acl(const BatchNormParam& param) { - bool bypass_acl = false; - AclParametersByContext(param); - InitAclLayer(param); - // for performance, more groups impact GPU performance - if (this->force_bypass_acl_path_) { - bypass_acl = true; - } - - return bypass_acl; - } - - private: - void AclParametersByContext(const BatchNormParam& param) { - const Tensor* in_x = param.InputX(); - Tensor* out = param.OutputY(); - const Tensor* scale = param.InputScale(); - const Tensor* bias = param.InputBias(); - const Tensor* saved_mean = param.InputMean(); - const Tensor* saved_variance = param.InputVariance(); - - const T* input_data = in_x->data(); - T* output_data = out->mutable_data(); - const T* weight_data = scale->data(); - const T* bias_data = bias->data(); - const T* mean_data = saved_mean->data(); - const T* var_data = saved_variance->data(); - - float epsilon = param.Epsilon(); - - args.input_data = (void*)input_data; - args.output_data = (void*)output_data; - // args.weight_data = (void*)weight_data; - // args.biases_data = (void*)bias_data; - args.mean_data = (void*)mean_data; - args.var_data = (void*)var_data; - args.epsilon = epsilon; - - args.dim = in_x->dims().size(); - - args.batch = in_x->dims()[0]; - args.in_depth = in_x->dims()[1]; - args.in_rows = in_x->dims()[2]; - args.in_cols = in_x->dims()[3]; - - args.out_num = out->dims()[0]; - args.out_depth = out->dims()[1]; - args.out_rows = out->dims()[2]; - args.out_cols = out->dims()[3]; - - args.weight_data = (void*)weight_data; - args.biases_data = (void*)bias_data; - - // std::cout - // << "Out C: " << args.out_depth - // << " H: " << args.out_rows << " W: " << args.out_cols << "\n"; - } - acl::AclParameters args; -}; - -template <> -bool BatchNormKernel::Init(BatchNormParam* param) { - AclBatchNormOp* acl_op = - reinterpret_cast*>(this->GetAclOp()); - if (acl_op == nullptr) { - acl_op = new AclBatchNormOp(); - this->SetAclOp((void*)acl_op, (void*)this); - } - if (acl_op->Bypass_acl(*param)) { - std::cout << "init acl failed" << std::endl; - return false; - } - return true; -} - -template <> -void BatchNormKernel::Compute( - const BatchNormParam& param) { - std::cout << "init acl" << std::endl; - AclBatchNormOp* acl_op = - reinterpret_cast*>(this->GetAclOp()); - if (acl_op == nullptr) { - return; - } - acl::AclParameters& args = acl_op->getargs(); - acl_op->RunAcl(args.input_data, args.output_data); -} - -template class BatchNormKernel; -} // namespace operators -} // namespace paddle_mobile - -#endif -#endif diff --git a/src/operators/kernel/mali/concat_kernel.cpp b/src/operators/kernel/mali/concat_kernel.cpp deleted file mode 100644 index 2fb05ab10eccf4e0dca9c74bbcc83067b438e981..0000000000000000000000000000000000000000 --- a/src/operators/kernel/mali/concat_kernel.cpp +++ /dev/null @@ -1,137 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef CONCAT_OP - -#include "operators/kernel/concat_kernel.h" -#ifdef PADDLE_MOBILE_MALI_GPU -#include "acl_operator.h" -#include "framework/operator.h" -#include "operators/op_param.h" - -namespace paddle_mobile { -namespace operators { - -template -class AclConcatOp : public acl::ACLOperator { - public: - AclConcatOp() { - this->force_bypass_acl_path_ = - bypass_acl_class_layer & FLAGS_ENABLE_ACL_CONCAT; - } - ~AclConcatOp() = default; - AclConcatOp(const AclConcatOp&) = delete; - AclConcatOp& operator=(const AclConcatOp&) = delete; - AclConcatOp(AclConcatOp&&) = delete; - AclConcatOp& operator=(AclConcatOp&&) = delete; - - acl::AclParameters& getargs() { return args; } - - void InitAclLayer(const ConcatParam& param) { - setTargetHint(acl::TargetHint::OPENCL); - const std::vector* input_data = &args.in_tensor; - arm_compute::TensorShape output_shape(args.out_cols, args.out_rows, - args.out_depth, args.batch); - - if (is_operator_init_done(output_shape)) return; - set_operator_init_done(); - this->force_bypass_acl_path_ = false; - T type; - - for (int i = 0; i < input_data->size(); i++) { - int in_batch = (*input_data)[i]->dims()[0]; - int in_channels = (*input_data)[i]->dims()[1]; - int in_width = (*input_data)[i]->dims()[2]; - int in_height = (*input_data)[i]->dims()[3]; - arm_compute::TensorShape in_shape(in_width, in_height, in_channels); - - new_tensor(cinput(i), in_shape, - acl::InputdataPtr(this, args.in_tensor, type, i)); - } - - //[width, height, OFM] - new_tensor(output(), output_shape, args.output_data); - - acl_configure(concat, this, input_data->size()); - } - - void RunAcl(const std::vector& input, void* output) { - T type; - acl::acl_run(this, input, output, type); - } - bool Bypass_acl(const ConcatParam& param) { - bool bypass_acl = false; - AclParametersByContext(param); - InitAclLayer(param); - // for performance, more groups impact GPU performance - if (this->force_bypass_acl_path_ || !args.is_channel_concat) { - bypass_acl = true; - } - return bypass_acl; - } - - private: - void AclParametersByContext(const ConcatParam& param) { - auto inputs = param.Inputs(); - auto* output = param.Out(); - int64_t axis = param.Axis(); - - T* output_data = output->mutable_data(); - - args.is_channel_concat = (axis == 1); - args.in_tensor = inputs; - args.output_data = (void*)output_data; - - args.batch = output->dims()[0]; - args.out_depth = output->dims()[1]; - args.out_rows = output->dims()[2]; - args.out_cols = output->dims()[3]; - } - acl::AclParameters args; -}; - -template <> -bool ConcatKernel::Init(ConcatParam* param) { - AclConcatOp* acl_op = - reinterpret_cast*>(this->GetAclOp()); - if (acl_op == nullptr) { - acl_op = new AclConcatOp(); - this->SetAclOp((void*)acl_op, (void*)this); - } - if (acl_op->Bypass_acl(*param)) { - std::cout << "init acl failed" << std::endl; - return false; - } - return true; -} - -template <> -void ConcatKernel::Compute( - const ConcatParam& param) { - std::cout << "init acl" << std::endl; - AclConcatOp* acl_op = - reinterpret_cast*>(this->GetAclOp()); - if (acl_op == nullptr) { - return; - } - acl::AclParameters& args = acl_op->getargs(); - acl_op->RunAcl(args.in_tensor, args.output_data); -} - -template class ConcatKernel; -} // namespace operators -} // namespace paddle_mobile - -#endif -#endif diff --git a/src/operators/kernel/mali/conv_add_kernel.cpp b/src/operators/kernel/mali/conv_add_kernel.cpp deleted file mode 100644 index 427bcd596f71bf434ea155d04f192c5bdedfded5..0000000000000000000000000000000000000000 --- a/src/operators/kernel/mali/conv_add_kernel.cpp +++ /dev/null @@ -1,232 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef FUSION_CONVADD_OP - -#include "operators/kernel/conv_add_kernel.h" -#ifdef PADDLE_MOBILE_MALI_GPU -#include "acl_operator.h" -#include "framework/operator.h" -#include "operators/op_param.h" - -namespace paddle_mobile { -namespace operators { - -template -class AclConvAddOp : public acl::ACLOperator { - public: - AclConvAddOp() { - this->force_bypass_acl_path_ = - bypass_acl_class_layer & FLAGS_ENABLE_ACL_CONV; - } - ~AclConvAddOp() = default; - AclConvAddOp(const AclConvAddOp&) = delete; - AclConvAddOp& operator=(const AclConvAddOp&) = delete; - AclConvAddOp(AclConvAddOp&&) = delete; - AclConvAddOp& operator=(AclConvAddOp&&) = delete; - - acl::AclParameters& getargs() { return args; } - void InitAclLayer(const FusionConvAddParam& param) { - setTargetHint(acl::TargetHint::OPENCL); - arm_compute::TensorShape input_shape(args.in_cols, args.in_rows, - args.in_depth, args.batch); - arm_compute::TensorShape output_shape(args.out_cols, args.out_rows, - args.out_depth, args.out_num); - arm_compute::TensorShape weights_shape(args.filter_cols, args.filter_rows, - args.in_depth / args.num_group, - args.out_depth); - arm_compute::TensorShape biases_shape(args.out_depth); - arm_compute::PadStrideInfo conv_info( - args.stride_cols, args.stride_rows, args.pad_cols, args.pad_rows, - arm_compute::DimensionRoundingType::FLOOR); - - if (is_operator_init_done(input_shape)) return; - set_operator_init_done(); - this->force_bypass_acl_path_ = false; - - // check_direct_conv(); - group() = args.num_group; - //[kernel_x, kernel_y, IFM, OFM] - new_tensor(weights(), weights_shape, args.weight_data); - //[OFM] - if (args.biases_data) { - new_tensor(biases(), biases_shape, args.biases_data); - } - - //[width, height, IFM] - new_tensor(input(), input_shape, args.input_data); - //[width, height, OFM] - new_tensor(output(), output_shape, args.output_data); - - acl_configure(conv, this, conv_info); - } - - void RunAcl(void* input, void* output) { - acl::ACLOperator::acl_run(input, output); - } - bool Bypass_acl(const FusionConvAddParam& param) { - bool bypass_acl = false; - AclParametersByContext(param); - InitAclLayer(param); - // for performance, more groups impact GPU performance - if (this->force_bypass_acl_path_ || args.num_group >= 5) { - bypass_acl = true; - } - if (args.dim > 2) { - bypass_acl = true; - } - if (args.dilated) { - bypass_acl = true; - } - return bypass_acl; - } - - private: - void check_direct_conv() { - bool use_direct_conv = false; - const char* pDirectConv; - pDirectConv = getenv("DIRECTCONV"); - if (pDirectConv) { - unsigned int bdirectconv; - sscanf(pDirectConv, "%i", &bdirectconv); - if (bdirectconv != use_direct_conv) { - use_direct_conv = bdirectconv; - printf("DIRECTCONV<%s>\n", pDirectConv); - printf("DIRECTCONV: %x\n", use_direct_conv); - } - } - int pad_data[2], kernel[2]; - pad_data[1] = args.pad_rows; - pad_data[0] = args.pad_cols; - kernel[1] = args.filter_rows; - kernel[0] = args.filter_cols; - if (use_direct_conv && ((kernel[0] == 1 && kernel[1] == 1 && - pad_data[0] == 0 && pad_data[1] == 0) || - (kernel[0] == 3 && kernel[1] == 3 && - pad_data[0] <= 1 && pad_data[1] <= 1))) { - setConvMethod(); // NEDirectConvolutionLayer only for 1x1 and 3x3 - } - } - - void AclParametersByContext(const FusionConvAddParam& param) { - const Tensor* input = param.Input(); - Tensor filter = *param.Filter(); - Tensor* output = param.Output(); - Tensor* bias; - - int groups = param.Groups(); - std::vector strides = param.Strides(); - std::vector paddings = param.Paddings(); - std::vector dilations = param.Dilations(); - - const T* input_data = input->data(); - T* output_data = output->mutable_data(); - const T* weight_data = filter.data(); - - args.input_data = (void*)input_data; - args.output_data = (void*)output_data; - args.weight_data = (void*)weight_data; - args.biases_data = nullptr; - - try { - bias = param.Bias(); - } catch (const std::exception& e) { - } - if (bias) { - const T* biases_data = bias->data(); - args.biases_data = (void*)biases_data; - } - - args.num_group = groups; - - args.dilation_rows = dilations[0]; - args.dilation_cols = dilations[1]; - if (dilations[0] != 1 || dilations[1] != 1) { - args.dilated = true; - } - - // NCHW - // std::cout << "In dims: " << (input->dims()).size() << std::endl; - args.batch = input->dims()[0]; - args.in_depth = input->dims()[1]; - args.in_rows = input->dims()[2]; - args.in_cols = input->dims()[3]; - // std::cout <<"In N: " << args.batch << " C: " << args.in_depth - // << " H: " << args.in_rows << " W: " << args.in_cols << "\n"; - // NCHW - // std::cout << "Out dims: " << (output->dims()).size() << std::endl; - args.out_num = output->dims()[0]; - args.out_depth = output->dims()[1]; - args.out_rows = output->dims()[2]; - args.out_cols = output->dims()[3]; - // std::cout <<"Out N: " << static_cast(output->dims()[0]) - // << " C: " << args.out_depth - // << " H: " << args.out_rows << " W: " << args.out_cols << "\n"; - // MCHW = OIHW - args.filter_rows = filter.dims()[2]; - args.filter_cols = filter.dims()[3]; - // std::cout <<"Filter O: " << static_cast(filter.dims()[0]) - // << " I: " << static_cast(filter.dims()[1]) - // << " H: " << args.filter_rows << " W: " << args.filter_cols << "\n"; - - // strides(h_stride, w_stride) - args.stride_rows = strides[0]; - args.stride_cols = strides[1]; - // std::cout <<"Stride H: " << args.stride_rows << " W: " << - // args.stride_cols << "\n"; - - // paddings(h_pad, w_pad) - args.pad_rows = paddings[0]; - args.pad_cols = paddings[1]; - // std::cout <<"Pad H: " << args.pad_rows << " W: " << args.pad_cols << - // "\n"; - } - acl::AclParameters args; -}; - -template <> -bool ConvAddKernel::Init(FusionConvAddParam* param) { - AclConvAddOp* acl_op = - reinterpret_cast*>(this->GetAclOp()); - if (acl_op == nullptr) { - acl_op = new AclConvAddOp(); - this->SetAclOp((void*)acl_op, (void*)this); - } - if (acl_op->Bypass_acl(*param)) { - std::cout << "init acl failed" << std::endl; - return false; - } - return true; -} - -template <> -void ConvAddKernel::Compute( - const FusionConvAddParam& param) { - std::cout << "init acl" << std::endl; - AclConvAddOp* acl_op = - reinterpret_cast*>(this->GetAclOp()); - if (acl_op == nullptr) { - return; - } - acl::AclParameters& args = acl_op->getargs(); - - acl_op->RunAcl(args.input_data, args.output_data); -} - -template class ConvAddKernel; -} // namespace operators -} // namespace paddle_mobile - -#endif -#endif diff --git a/src/operators/kernel/mali/conv_kernel.cpp b/src/operators/kernel/mali/conv_kernel.cpp deleted file mode 100644 index 7cca16274ecc7ae1707f8d5ed8faf2fde810ab30..0000000000000000000000000000000000000000 --- a/src/operators/kernel/mali/conv_kernel.cpp +++ /dev/null @@ -1,230 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef CONV_OP - -#include "operators/kernel/conv_kernel.h" -#ifdef PADDLE_MOBILE_MALI_GPU -#include "acl_operator.h" -#include "framework/operator.h" -#include "operators/op_param.h" - -namespace paddle_mobile { -namespace operators { - -template -class AclConvOp : public acl::ACLOperator { - public: - AclConvOp() { - this->force_bypass_acl_path_ = - bypass_acl_class_layer & FLAGS_ENABLE_ACL_CONV; - } - ~AclConvOp() = default; - AclConvOp(const AclConvOp&) = delete; - AclConvOp& operator=(const AclConvOp&) = delete; - AclConvOp(AclConvOp&&) = delete; - AclConvOp& operator=(AclConvOp&&) = delete; - - acl::AclParameters& getargs() { return args; } - void InitAclLayer(const ConvParam& param) { - setTargetHint(acl::TargetHint::OPENCL); - arm_compute::TensorShape input_shape(args.in_cols, args.in_rows, - args.in_depth, args.batch); - arm_compute::TensorShape output_shape(args.out_cols, args.out_rows, - args.out_depth, args.out_num); - arm_compute::TensorShape weights_shape(args.filter_cols, args.filter_rows, - args.in_depth / args.num_group, - args.out_depth); - // arm_compute::TensorShape biases_shape(args.out_depth); - arm_compute::PadStrideInfo conv_info( - args.stride_cols, args.stride_rows, args.pad_cols, args.pad_rows, - arm_compute::DimensionRoundingType::FLOOR); - - if (is_operator_init_done(input_shape)) return; - set_operator_init_done(); - this->force_bypass_acl_path_ = false; - - check_direct_conv(); - //[kernel_x, kernel_y, IFM, OFM] - new_tensor(weights(), weights_shape, args.weight_data); - //[OFM] - // if (args.biases_data) { - // new_tensor(biases(),biases_shape,args.biases_data); - //} - - group() = args.num_group; - - //[width, height, IFM] - new_tensor(input(), input_shape, args.input_data); - //[width, height, OFM] - new_tensor(output(), output_shape, args.output_data); - - acl_configure(conv, this, conv_info); - } - - void RunAcl(void* input, void* output) { - acl::ACLOperator::acl_run(input, output); - } - bool Bypass_acl(const ConvParam& param) { - bool bypass_acl = false; - AclParametersByContext(param); - InitAclLayer(param); - // for performance, more groups impact GPU performance - if (this->force_bypass_acl_path_ || args.num_group >= 5) { - bypass_acl = true; - } - if (args.dim > 2) { - bypass_acl = true; - } - if (args.dilated) { - bypass_acl = true; - } - return bypass_acl; - } - - private: - void check_direct_conv() { - bool use_direct_conv = false; - const char* pDirectConv; - pDirectConv = getenv("DIRECTCONV"); - if (pDirectConv) { - unsigned int bdirectconv; - sscanf(pDirectConv, "%i", &bdirectconv); - if (bdirectconv != use_direct_conv) { - use_direct_conv = bdirectconv; - printf("DIRECTCONV<%s>\n", pDirectConv); - printf("DIRECTCONV: %x\n", use_direct_conv); - } - } - int pad_data[2], kernel[2]; - pad_data[1] = args.pad_rows; - pad_data[0] = args.pad_cols; - kernel[1] = args.filter_rows; - kernel[0] = args.filter_cols; - if (use_direct_conv && ((kernel[0] == 1 && kernel[1] == 1 && - pad_data[0] == 0 && pad_data[1] == 0) || - (kernel[0] == 3 && kernel[1] == 3 && - pad_data[0] <= 1 && pad_data[1] <= 1))) { - setConvMethod(); // NEDirectConvolutionLayer only for 1x1 and 3x3 - } - } - - void AclParametersByContext(const ConvParam& param) { - const Tensor* input = param.Input(); - Tensor filter = *param.Filter(); - Tensor* output = param.Output(); - - int groups = param.Groups(); - std::vector strides = param.Strides(); - std::vector paddings = param.Paddings(); - std::vector dilations = param.Dilations(); - - const T* input_data = input->data(); - T* output_data = output->mutable_data(); - const T* weight_data = filter.data(); - - args.input_data = (void*)input_data; - args.output_data = (void*)output_data; - args.weight_data = (void*)weight_data; - args.biases_data = nullptr; - - // try { - // bias = context.Input("Bias"); - // } catch (const std::exception& e) { - // } - // if (bias) { - // const T* biases_data = bias->data(); - // args.biases_data = (void*)biases_data; - // } - - args.num_group = groups; - - args.dilation_rows = dilations[0]; - args.dilation_cols = dilations[1]; - if (dilations[0] != 1 || dilations[1] != 1) { - args.dilated = true; - } - - // NCHW - // std::cout << "In dims: " << (input->dims()).size() << std::endl; - args.batch = input->dims()[0]; - args.in_depth = input->dims()[1]; - args.in_rows = input->dims()[2]; - args.in_cols = input->dims()[3]; - std::cout << "In N: " << args.batch << " C: " << args.in_depth - << " H: " << args.in_rows << " W: " << args.in_cols << "\n"; - // NCHW - // std::cout << "Out dims: " << (output->dims()).size() << std::endl; - args.out_num = output->dims()[0]; - args.out_depth = output->dims()[1]; - args.out_rows = output->dims()[2]; - args.out_cols = output->dims()[3]; - // std::cout <<"Out N: " << static_cast(output->dims()[0]) - // << " C: " << args.out_depth - // << " H: " << args.out_rows << " W: " << args.out_cols << "\n"; - // MCHW = OIHW - args.filter_rows = filter.dims()[2]; - args.filter_cols = filter.dims()[3]; - // std::cout <<"Filter O: " << static_cast(filter.dims()[0]) - // << " I: " << static_cast(filter.dims()[1]) - // << " H: " << args.filter_rows << " W: " << args.filter_cols << "\n"; - - // strides(h_stride, w_stride) - args.stride_rows = strides[0]; - args.stride_cols = strides[1]; - // std::cout <<"Stride H: " << args.stride_rows << " W: " << - // args.stride_cols << "\n"; - - // paddings(h_pad, w_pad) - args.pad_rows = paddings[0]; - args.pad_cols = paddings[1]; - // std::cout <<"Pad H: " << args.pad_rows << " W: " << args.pad_cols << - // "\n"; - } - acl::AclParameters args; -}; - -template <> -bool ConvKernel::Init(ConvParam* param) { - AclConvOp* acl_op = - reinterpret_cast*>(this->GetAclOp()); - if (acl_op == nullptr) { - acl_op = new AclConvOp(); - this->SetAclOp((void*)acl_op, (void*)this); - } - if (acl_op->Bypass_acl(*param)) { - std::cout << "init acl failed" << std::endl; - return false; - } - return true; -} - -template <> -void ConvKernel::Compute(const ConvParam& param) { - std::cout << "init acl" << std::endl; - AclConvOp* acl_op = - reinterpret_cast*>(this->GetAclOp()); - if (acl_op == nullptr) { - return; - } - acl::AclParameters& args = acl_op->getargs(); - acl_op->RunAcl(args.input_data, args.output_data); -} - -template class ConvKernel; -} // namespace operators -} // namespace paddle_mobile - -#endif -#endif diff --git a/src/operators/kernel/mali/elementwise_add_kernel.cpp b/src/operators/kernel/mali/elementwise_add_kernel.cpp deleted file mode 100644 index 3711a946b508c9ad71f59dd85f2e01c99bccc9e5..0000000000000000000000000000000000000000 --- a/src/operators/kernel/mali/elementwise_add_kernel.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef ELEMENTWISEADD_OP - -#pragma once - -#include "operators/kernel/elementwise_add_kernel.h" - -namespace paddle_mobile { -namespace operators { - -template -struct AddFunctor { - inline T operator()(T a, T b) const { return a + b; } -}; - -template <> -bool ElementwiseAddKernel::Init( - ElementwiseAddParam *param) { - return true; -} - -template <> -void ElementwiseAddKernel::Compute( - const ElementwiseAddParam ¶m) { - const Tensor *input_x = param.InputX(); - const Tensor *input_y = param.InputY(); - Tensor *Out = param.Out(); - Out->mutable_data(); - int axis = param.Axis(); - ElementwiseComputeEx, float>(input_x, input_y, axis, - AddFunctor(), Out); -} - -template class ElementwiseAddKernel; - -} // namespace operators -} // namespace paddle_mobile - -#endif diff --git a/src/operators/kernel/mali/feed_kernel.cpp b/src/operators/kernel/mali/feed_kernel.cpp deleted file mode 100644 index 6af6c1a88b8031da4a23dad1d3269935ce81b9a8..0000000000000000000000000000000000000000 --- a/src/operators/kernel/mali/feed_kernel.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "operators/kernel/feed_kernel.h" - -namespace paddle_mobile { -namespace operators { - -template <> -bool FeedKernel::Init(FeedParam *param) { - return true; -} - -template <> -void FeedKernel::Compute(const FeedParam ¶m) { - param.Out()->ShareDataWith(*(param.InputX())); - param.Out()->set_lod(param.InputX()->lod()); -} - -template class FeedKernel; - -} // namespace operators -} // namespace paddle_mobile - -#endif diff --git a/src/operators/kernel/mali/fetch_kernel.cpp b/src/operators/kernel/mali/fetch_kernel.cpp deleted file mode 100644 index f74280cfb322b8135d99ca7fb7e2652a08588bb3..0000000000000000000000000000000000000000 --- a/src/operators/kernel/mali/fetch_kernel.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#ifdef FUSION_CONVADD_OP - -#include "operators/kernel/fetch_kernel.h" - -namespace paddle_mobile { -namespace operators { - -template <> -bool FetchKernel::Init(FetchParam *param) { - return true; -} - -template <> -void FetchKernel::Compute(const FetchParam ¶m) { - param.Out()->ShareDataWith(*(param.InputX())); -} - -template class FetchKernel; - -} // namespace operators -} // namespace paddle_mobile - -#endif diff --git a/src/operators/kernel/mali/fushion_fc_kernel.cpp b/src/operators/kernel/mali/fushion_fc_kernel.cpp deleted file mode 100755 index 39b36d756734a69320060d99297f8d6f3acaeef9..0000000000000000000000000000000000000000 --- a/src/operators/kernel/mali/fushion_fc_kernel.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef FUSION_FC_OP - -#include "operators/kernel/fusion_fc_kernel.h" - -namespace paddle_mobile { -namespace operators { - -template <> -bool FusionFcKernel::Init(FusionFcParam *param) { - return true; -} - -template <> -void FusionFcKernel::Compute( - const FusionFcParam ¶m) { - const Tensor *input_x = param.InputX(); - const Tensor *input_y = param.InputY(); - const Tensor *input_z = param.InputZ(); - auto *input_z_data = input_z->data(); - int axis = param.Axis(); - Tensor *out = param.Out(); - auto *out_data = out->mutable_data(); - const Tensor x_matrix = - input_x->dims().size() > 2 - ? framework::ReshapeToMatrix(*input_x, param.XNumColDims()) - : *input_x; - const Tensor y_matrix = - input_y->dims().size() > 2 - ? framework::ReshapeToMatrix(*input_y, param.YNumColDims()) - : *input_y; - auto out_dim = out->dims(); - if (out_dim.size() != 2) { - out->Resize({x_matrix.dims()[0], y_matrix.dims()[1]}); - } - PADDLE_MOBILE_ENFORCE(out_dim.size() == 2, " out_dim.size must be 2."); - PADDLE_MOBILE_ENFORCE(input_z->dims().size() == 1, "inpu_z size must be 1"); - PADDLE_MOBILE_ENFORCE(out_dim[1] == input_z->dims()[0], - " out_dim.size must be 2."); - axis = (axis == -1 ? out_dim.size() - input_z->dims().size() : axis); - PADDLE_MOBILE_ENFORCE(axis == 1, " to fit broadcast, axis = 1. ") - - int64_t classes = input_z->numel(); - for (int i = 0; i < out_dim[0]; i++) { - memory::Copy(out_data + i * classes, input_z_data, sizeof(float) * classes); - } - - for (int i = 0; i < out->numel(); i++) { - DLOG << out_data[i]; - } - math::MatMul(x_matrix, false, y_matrix, false, static_cast(1), - out, static_cast(1)); - PADDLE_MOBILE_ENFORCE(out_dim.size() == 2, " out_dim.size must be 2."); - // if (out_dim.size() != 2) { - // out->Resize(out_dim); - // } -} - -} // namespace operators -} // namespace paddle_mobile - -#endif diff --git a/src/operators/kernel/mali/lrn_kernel.cpp b/src/operators/kernel/mali/lrn_kernel.cpp deleted file mode 100644 index b46c9680d576ead3e7ab309c08894654a9fad04a..0000000000000000000000000000000000000000 --- a/src/operators/kernel/mali/lrn_kernel.cpp +++ /dev/null @@ -1,157 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef LRN_OP - -#pragma once - -#include "operators/kernel/lrn_kernel.h" -#ifdef PADDLE_MOBILE_MALI_GPU -#include "acl_operator.h" -#include "framework/operator.h" -#include "operators/kernel/central-arm-func/lrn_arm_func.h" -#include "operators/op_param.h" - -namespace paddle_mobile { -namespace operators { - -template -class AclLrnOp : public acl::ACLOperator { - public: - AclLrnOp() { - this->force_bypass_acl_path_ = - bypass_acl_class_layer & FLAGS_ENABLE_ACL_LRN; - } - ~AclLrnOp() = default; - AclLrnOp(const AclLrnOp&) = delete; - AclLrnOp& operator=(const AclLrnOp&) = delete; - AclLrnOp(AclLrnOp&&) = delete; - AclLrnOp& operator=(AclLrnOp&&) = delete; - - acl::AclParameters& getargs() { return args; } - void InitAclLayer(const LrnParam& param) { - setTargetHint(acl::TargetHint::OPENCL); - arm_compute::TensorShape shape(args.in_cols, args.in_rows, args.in_depth); - - if (is_operator_init_done(shape)) return; - set_operator_init_done(); - this->force_bypass_acl_path_ = false; - - arm_compute::NormalizationLayerInfo norm_info( - arm_compute::NormType::CROSS_MAP, args.nsize, args.alpha, args.beta, - args.knorm); - - //[width, height, IFM] - new_tensor(input(), shape, args.input_data); - //[width, height, OFM] - new_tensor(output(), shape, args.output_data); - - acl_configure(lrn, this, norm_info); - } - - void Set_bypass(bool bypass) { args.is_bypass = bypass; } - - void RunAcl(void* input, void* output) { - acl::ACLOperator::acl_run(input, output); - } - bool Bypass_acl(const LrnParam& param) { - bool bypass_acl = false; - AclParametersByContext(param); - InitAclLayer(param); - // for performance, more groups impact GPU performance - if (this->force_bypass_acl_path_) { - bypass_acl = true; - } - - return bypass_acl; - } - - private: - void AclParametersByContext(const LrnParam& param) { - const Tensor* in_x = param.InputX(); - Tensor* out = param.Out(); - - int n = param.N(); - T alpha = param.Alpha(); - T beta = param.Beta(); - T k = param.K(); - - const T* input_data = in_x->data(); - T* output_data = out->mutable_data(); - - args.input_data = (void*)input_data; - args.output_data = (void*)output_data; - - args.nsize = n; - args.alpha = alpha; - args.beta = beta; - args.knorm = k; - - // NCHW - args.batch = in_x->dims()[0]; - args.in_depth = in_x->dims()[1]; - args.in_rows = in_x->dims()[2]; - args.in_cols = in_x->dims()[3]; - // std::cout - // << "Out C: " << args.out_depth - // << " H: " << args.out_rows << " W: " << args.out_cols << "\n"; - } - acl::AclParameters args; -}; - -template <> -bool LrnKernel::Init(LrnParam* param) { - AclLrnOp* acl_op = - reinterpret_cast*>(this->GetAclOp()); - if (acl_op == nullptr) { - acl_op = new AclLrnOp(); - this->SetAclOp((void*)acl_op, (void*)this); - } - if (acl_op->Bypass_acl(*param)) { - acl_op->Set_bypass(true); - std::cout << "init acl failed" << std::endl; - return true; - } - return true; -} - -template <> -void LrnKernel::Compute(const LrnParam& param) { - std::cout << "init acl" << std::endl; - AclLrnOp* acl_op = - reinterpret_cast*>(this->GetAclOp()); - if (acl_op == nullptr) { - return; - } - acl::AclParameters& args = acl_op->getargs(); - if (args.is_bypass) { - std::cout << "bypass op" << std::endl; - LrnCompute(param); - return; - } - const float* input_data = (const float*)args.input_data; - const float* output_data = (const float*)args.output_data; - for (int n = 0; n < args.batch; ++n) { - acl_op->RunAcl((void*)input_data, (void*)output_data); - input_data += args.in_depth * args.in_cols * args.in_rows; - output_data += args.in_depth * args.in_cols * args.in_rows; - } -} - -template class LrnKernel; -} // namespace operators -} // namespace paddle_mobile - -#endif -#endif diff --git a/src/operators/kernel/mali/mul_kernel.cpp b/src/operators/kernel/mali/mul_kernel.cpp deleted file mode 100644 index 6148ae702558f2d1dc28e68d733938510db1082b..0000000000000000000000000000000000000000 --- a/src/operators/kernel/mali/mul_kernel.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef MUL_OP - -#pragma once - -#include "operators/kernel/mul_kernel.h" - -namespace paddle_mobile { -namespace operators { - -template <> -bool MulKernel::Init(MulParam *param) { - return true; -} - -template <> -void MulKernel::Compute(const MulParam ¶m) { - const Tensor *input_x = param.InputX(); - const Tensor *input_y = param.InputY(); - Tensor *out = param.Out(); - out->mutable_data(); - const Tensor x_matrix = - input_x->dims().size() > 2 - ? framework::ReshapeToMatrix(*input_x, param.XNumColDims()) - : *input_x; - const Tensor y_matrix = - input_y->dims().size() > 2 - ? framework::ReshapeToMatrix(*input_y, param.YNumColDims()) - : *input_y; - auto out_dim = out->dims(); - if (out_dim.size() != 2) { - out->Resize({x_matrix.dims()[0], y_matrix.dims()[1]}); - } - math::MatMul(x_matrix, false, y_matrix, false, static_cast(1), - out, static_cast(0)); - if (out_dim.size() != 2) { - out->Resize(out_dim); - } -} - -template class MulKernel; - -} // namespace operators -} // namespace paddle_mobile - -#endif diff --git a/src/operators/kernel/mali/pool_kernel.cpp b/src/operators/kernel/mali/pool_kernel.cpp deleted file mode 100644 index ec5d35a8f600d63a623b468c9c97c3540bf9c3f7..0000000000000000000000000000000000000000 --- a/src/operators/kernel/mali/pool_kernel.cpp +++ /dev/null @@ -1,220 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef POOL_OP - -#pragma once - -#include "operators/kernel/pool_kernel.h" -#ifdef PADDLE_MOBILE_MALI_GPU -#include "acl_operator.h" -#include "framework/operator.h" -#include "operators/op_param.h" - -namespace paddle_mobile { -namespace operators { - -template -class AclPoolOp : public acl::ACLOperator { - public: - AclPoolOp() { - this->force_bypass_acl_path_ = - bypass_acl_class_layer & FLAGS_ENABLE_ACL_POOLING; - } - ~AclPoolOp() = default; - AclPoolOp(const AclPoolOp&) = delete; - AclPoolOp& operator=(const AclPoolOp&) = delete; - AclPoolOp(AclPoolOp&&) = delete; - AclPoolOp& operator=(AclPoolOp&&) = delete; - - acl::AclParameters& getargs() { return args; } - void InitAclLayer(const PoolParam& param) { - setTargetHint(acl::TargetHint::OPENCL); - arm_compute::TensorShape input_shape(args.in_cols, args.in_rows, - args.in_depth); - arm_compute::TensorShape output_shape(args.out_cols, args.out_rows, - args.out_depth); - // arm_compute::TensorShape weights_shape( - // args.filter_cols, args.filter_rows, args.in_depth, args.out_depth); - // arm_compute::TensorShape biases_shape(args.out_depth); - arm_compute::PoolingLayerInfo pool_info; - - if (args.pool_type == "max") { - pool_info = arm_compute::PoolingLayerInfo( - arm_compute::PoolingType::MAX, args.filter_rows, - arm_compute::PadStrideInfo(args.stride_cols, args.stride_rows, - args.pad_cols, args.pad_rows, - arm_compute::DimensionRoundingType::CEIL)); - } else { - pool_info = arm_compute::PoolingLayerInfo( - arm_compute::PoolingType::AVG, args.filter_rows, - arm_compute::PadStrideInfo(args.stride_cols, args.stride_rows, - args.pad_cols, args.pad_rows, - arm_compute::DimensionRoundingType::CEIL)); - } - - if (is_operator_init_done(input_shape)) return; - set_operator_init_done(); - this->force_bypass_acl_path_ = false; - - //[width, height, IFM] - new_tensor(input(), input_shape, args.input_data); - //[width, height, OFM] - new_tensor(output(), output_shape, args.output_data); - - acl_configure(pooling, this, pool_info); - } - - void RunAcl(void* input, void* output) { - acl::ACLOperator::acl_run(input, output); - } - bool Bypass_acl(const PoolParam& param) { - bool bypass_acl = false; - AclParametersByContext(param); - InitAclLayer(param); - // for performance, more groups impact GPU performance - if (this->force_bypass_acl_path_) { - bypass_acl = true; - } - if (args.pool_type != "max" && args.pool_type != "avg") { - bypass_acl = true; - } - if (args.filter_rows != args.filter_cols) { - bypass_acl = true; - } - // if (args.filter_rows!=2 && args.filter_rows!=3) { - // bypass_acl = true; - // } - return bypass_acl; - } - - private: - void AclParametersByContext(const PoolParam& param) { - const Tensor* in_x = param.Input(); - Tensor* out = param.Output(); - std::string pooling_type = param.PoolingType(); - - std::vector ksize = param.Ksize(); - - std::vector strides = param.Strides(); - - std::vector paddings = param.Paddings(); - - bool is_global_pooling = param.isGlobalPooling(); - - const T* input_data = in_x->data(); - T* output_data = out->mutable_data(); - - args.input_data = (void*)input_data; - args.output_data = (void*)output_data; - - args.is_global_pool = is_global_pooling; - args.pool_type = pooling_type; - - args.filter_rows = ksize[0]; - args.filter_cols = ksize[1]; - args.dim = ksize.size(); - - // NCHW - args.batch = in_x->dims()[0]; - args.in_depth = in_x->dims()[1]; - args.in_rows = in_x->dims()[2]; - args.in_cols = in_x->dims()[3]; - // std::cout <<"In N: " << args.batch << " C: " << args.in_depth - // << " H: " << args.in_rows << " W: " << args.in_cols << "\n"; - // NCHW - // std::cout <<"Out N: " << static_cast(output->dims()[0]) - // << " C: " << args.out_depth - // << " H: " << args.out_rows << " W: " << args.out_cols << "\n"; - // MCHW = OIHW - // std::cout <<"Filter O: " << static_cast(filter->dims()[0]) - // << " I: " << static_cast(filter->dims()[1]) - // << " H: " << args.filter_rows << " W: " << args.filter_cols << "\n"; - - // strides(h_stride, w_stride) - args.stride_rows = strides[0]; - args.stride_cols = strides[1]; - // std::cout <<"PoolingType: " << args.pool_type << "\n"; - // std::cout <<"Stride H: " << args.stride_rows << " W: " << - // args.stride_cols << "\n"; - - // paddings(h_pad, w_pad) - args.pad_rows = paddings[0]; - args.pad_cols = paddings[1]; - // std::cout <<"Pad H: " << args.pad_rows << " W: " << args.pad_cols << - // "\n"; - - args.out_depth = args.in_depth; - // args.out_rows = out->dims()[2]; - // args.out_cols = out->dims()[3]; - args.out_rows = static_cast(ceil(static_cast(args.in_rows + - 2 * args.pad_rows - - args.filter_rows) / - args.stride_rows)) + - 1; - args.out_cols = static_cast(ceil(static_cast(args.in_cols + - 2 * args.pad_cols - - args.filter_cols) / - args.stride_cols)) + - 1; - - if (is_global_pooling) { - args.filter_rows = args.in_rows; - args.filter_cols = args.in_cols; - args.pad_rows = 0; - args.pad_cols = 0; - } - } - acl::AclParameters args; -}; - -template <> -bool PoolKernel::Init(PoolParam* param) { - AclPoolOp* acl_op = - reinterpret_cast*>(this->GetAclOp()); - if (acl_op == nullptr) { - acl_op = new AclPoolOp(); - this->SetAclOp((void*)acl_op, (void*)this); - } - if (acl_op->Bypass_acl(*param)) { - std::cout << "init acl failed" << std::endl; - return false; - } - return true; -} - -template <> -void PoolKernel::Compute(const PoolParam& param) { - std::cout << "init acl" << std::endl; - AclPoolOp* acl_op = - reinterpret_cast*>(this->GetAclOp()); - if (acl_op == nullptr) { - return; - } - acl::AclParameters& args = acl_op->getargs(); - const float* input_data = (const float*)args.input_data; - const float* output_data = (const float*)args.output_data; - for (int n = 0; n < args.batch; ++n) { - acl_op->RunAcl((void*)input_data, (void*)output_data); - input_data += args.in_depth * args.in_cols * args.in_rows; - output_data += args.in_depth * args.out_cols * args.out_rows; - } -} - -template class PoolKernel; -} // namespace operators -} // namespace paddle_mobile - -#endif -#endif diff --git a/src/operators/kernel/mali/relu_kernel.cpp b/src/operators/kernel/mali/relu_kernel.cpp deleted file mode 100644 index 68bb52af3ab9b262218223d971b044edd759b347..0000000000000000000000000000000000000000 --- a/src/operators/kernel/mali/relu_kernel.cpp +++ /dev/null @@ -1,134 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef RELU_OP - -#pragma once - -#include "operators/kernel/relu_kernel.h" -#ifdef PADDLE_MOBILE_MALI_GPU -#include "acl_operator.h" -#include "framework/operator.h" -#include "operators/op_param.h" - -namespace paddle_mobile { -namespace operators { - -template -class AclReluOp : public acl::ACLOperator { - public: - AclReluOp() { - this->force_bypass_acl_path_ = - bypass_acl_class_layer & FLAGS_ENABLE_ACL_RELU; - } - ~AclReluOp() = default; - AclReluOp(const AclReluOp&) = delete; - AclReluOp& operator=(const AclReluOp&) = delete; - AclReluOp(AclReluOp&&) = delete; - AclReluOp& operator=(AclReluOp&&) = delete; - - acl::AclParameters& getargs() { return args; } - void InitAclLayer(const ReluParam& param) { - setTargetHint(acl::TargetHint::OPENCL); - arm_compute::TensorShape input_shape(args.in_cols, args.in_rows, - args.in_depth, args.batch); - arm_compute::TensorShape output_shape(args.in_cols, args.in_rows, - args.in_depth, args.out_num); - // arm_compute::TensorShape weights_shape( - // args.filter_cols, args.filter_rows, args.in_depth, args.out_depth); - // arm_compute::TensorShape biases_shape(args.out_depth); - arm_compute::ActivationLayerInfo::ActivationFunction type; - type = arm_compute::ActivationLayerInfo::ActivationFunction::RELU; - - arm_compute::ActivationLayerInfo act_info(type); - - if (is_operator_init_done(input_shape)) return; - set_operator_init_done(); - this->force_bypass_acl_path_ = false; - - //[width, height, IFM] - new_tensor(input(), input_shape, args.input_data); - //[width, height, OFM] - new_tensor(output(), output_shape, args.output_data); - - acl_configure(activation, this, act_info); - } - - void RunAcl(void* input, void* output) { - acl::ACLOperator::acl_run(input, output); - } - bool Bypass_acl(const ReluParam& param) { - bool bypass_acl = false; - AclParametersByContext(param); - InitAclLayer(param); - // for performance, more groups impact GPU performance - if (this->force_bypass_acl_path_) { - bypass_acl = true; - } - return bypass_acl; - } - - private: - void AclParametersByContext(const ReluParam& param) { - const auto* input_x = param.InputX(); - auto* out = param.Out(); - - const T* input_data = input_x->data(); - T* output_data = out->mutable_data(); - - args.input_data = (void*)input_data; - args.output_data = (void*)output_data; - - args.batch = input_x->dims()[0]; - args.in_depth = input_x->dims()[1]; - args.in_rows = input_x->dims()[2]; - args.in_cols = input_x->dims()[3]; - args.out_num = out->dims()[0]; - } - acl::AclParameters args; -}; - -template <> -bool ReluKernel::Init(ReluParam* param) { - AclReluOp* acl_op = - reinterpret_cast*>(this->GetAclOp()); - if (acl_op == nullptr) { - acl_op = new AclReluOp(); - this->SetAclOp((void*)acl_op, (void*)this); - } - if (acl_op->Bypass_acl(*param)) { - std::cout << "init acl failed" << std::endl; - return false; - } - return true; -} - -template <> -void ReluKernel::Compute(const ReluParam& param) { - std::cout << "init acl" << std::endl; - AclReluOp* acl_op = - reinterpret_cast*>(this->GetAclOp()); - if (acl_op == nullptr) { - return; - } - acl::AclParameters& args = acl_op->getargs(); - acl_op->RunAcl(args.input_data, args.output_data); -} - -template class ReluKernel; -} // namespace operators -} // namespace paddle_mobile - -#endif -#endif diff --git a/src/operators/kernel/mali/reshape_kernel.cpp b/src/operators/kernel/mali/reshape_kernel.cpp deleted file mode 100644 index f98906c0a982c10896e75101eaa2732d75d6cdf4..0000000000000000000000000000000000000000 --- a/src/operators/kernel/mali/reshape_kernel.cpp +++ /dev/null @@ -1,61 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef RESHAPE_OP - -#pragma once - -#include "operators/kernel/reshape_kernel.h" - -namespace paddle_mobile { -namespace operators { - -template <> -bool ReshapeKernel::Init(ReshapeParam *param) { - return true; -} - -template <> -void ReshapeKernel::Compute( - const ReshapeParam ¶m) { - const auto *input_x = param.InputX(); - const auto &input_x_dims = input_x->dims(); - auto *out = param.Out(); - framework::DDim out_dims = out->dims(); - const auto *input_shape = param.InputShape(); - - if (input_shape) { - auto *shape_data = input_shape->data(); - framework::Tensor cpu_shape_tensor; - auto shape = - std::vector(shape_data, shape_data + input_shape->numel()); - out_dims = ValidateShape(shape, input_x->dims()); - } - - bool inplace = param.Inplace(); - out->Resize(out_dims); - if (!inplace) { - out->mutable_data(); - framework::TensorCopy(*input_x, out); - out->Resize(out_dims); - } else { - out->ShareDataWith(*input_x); - out->Resize(out_dims); - } -} - -} // namespace operators -} // namespace paddle_mobile - -#endif diff --git a/src/operators/kernel/mali/softmax_kernel.cpp b/src/operators/kernel/mali/softmax_kernel.cpp deleted file mode 100644 index d6ce1ecb61c2790c68883231eb6b90dcde43a956..0000000000000000000000000000000000000000 --- a/src/operators/kernel/mali/softmax_kernel.cpp +++ /dev/null @@ -1,139 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef SOFTMAX_OP - -#pragma once - -#include "operators/kernel/softmax_kernel.h" -#ifdef PADDLE_MOBILE_MALI_GPU -#include "acl_operator.h" -#include "framework/operator.h" -#include "operators/op_param.h" - -namespace paddle_mobile { -namespace operators { - -template -class AclSoftmaxOp : public acl::ACLOperator { - public: - AclSoftmaxOp() { - this->force_bypass_acl_path_ = - bypass_acl_class_layer & FLAGS_ENABLE_ACL_SOFTMAX; - } - ~AclSoftmaxOp() = default; - AclSoftmaxOp(const AclSoftmaxOp&) = delete; - AclSoftmaxOp& operator=(const AclSoftmaxOp&) = delete; - AclSoftmaxOp(AclSoftmaxOp&&) = delete; - AclSoftmaxOp& operator=(AclSoftmaxOp&&) = delete; - - acl::AclParameters& getargs() { return args; } - void InitAclLayer(const SoftmaxParam& param) { - setTargetHint(acl::TargetHint::OPENCL); - arm_compute::TensorShape shape(args.in_depth, args.batch); - - if (is_operator_init_done(shape)) return; - set_operator_init_done(); - this->force_bypass_acl_path_ = false; - - //[width, height, IFM] - new_tensor(input(), shape, args.input_data); - //[width, height, OFM] - new_tensor(output(), shape, args.output_data); - - acl_configure(softmax, this, NULL); - } - - void RunAcl(void* input, void* output) { - acl::ACLOperator::acl_run(input, output); - } - bool Bypass_acl(const SoftmaxParam& param) { - bool bypass_acl = false; - AclParametersByContext(param); - InitAclLayer(param); - // for performance, more groups impact GPU performance - if (this->force_bypass_acl_path_) { - bypass_acl = true; - } - - return bypass_acl; - } - - private: - void AclParametersByContext(const SoftmaxParam& param) { - const framework::Tensor* in_x = param.InputX(); - framework::Tensor* out = param.Out(); - auto x_dims = in_x->dims(); - out->Resize(x_dims); - - const T* input_data = in_x->data(); - T* output_data = out->data(); - - args.input_data = (void*)input_data; - args.output_data = (void*)output_data; - - // NCHW - args.batch = in_x->dims()[0]; - args.in_depth = in_x->dims()[1]; - - args.out_num = out->dims()[0]; - - // std::cout - // << "Out C: " << args.out_depth - // << " H: " << args.out_rows << " W: " << args.out_cols << "\n"; - } - acl::AclParameters args; -}; - -template <> -bool SoftmaxKernel::Init(SoftmaxParam* param) { - AclSoftmaxOp* acl_op = - reinterpret_cast*>(this->GetAclOp()); - if (acl_op == nullptr) { - acl_op = new AclSoftmaxOp(); - this->SetAclOp((void*)acl_op, (void*)this); - } - if (acl_op->Bypass_acl(*param)) { - std::cout << "init acl failed" << std::endl; - return false; - } - return true; -} - -template <> -void SoftmaxKernel::Compute( - const SoftmaxParam& param) { - std::cout << "init acl" << std::endl; - AclSoftmaxOp* acl_op = - reinterpret_cast*>(this->GetAclOp()); - if (acl_op == nullptr) { - return; - } - acl::AclParameters& args = acl_op->getargs(); - const float* input_data = (const float*)args.input_data; - const float* output_data = (const float*)args.output_data; - - for (int n = 0; n < args.out_num; ++n) { - acl_op->RunAcl((void*)input_data, (void*)output_data); - input_data += args.in_depth; - output_data += args.in_depth; - } -} - -template class SoftmaxKernel; -} // namespace operators -} // namespace paddle_mobile - -#endif -#endif diff --git a/src/operators/lookup_op.cpp b/src/operators/lookup_op.cpp index 33f2b434adaec19acd36aab0d5157138ebd3e91e..682e71221e7bc7d207294fffcf4b289369b90565 100644 --- a/src/operators/lookup_op.cpp +++ b/src/operators/lookup_op.cpp @@ -59,8 +59,7 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(lookup_table, ops::LookupOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -#endif + #ifdef PADDLE_MOBILE_FPGA #endif diff --git a/src/operators/lrn_op.cpp b/src/operators/lrn_op.cpp index b63d2f2fbe594fc35cd580ea772562a263c97bd5..9b0745b113d3d362a0a5dc421862d82e3f611c9a 100644 --- a/src/operators/lrn_op.cpp +++ b/src/operators/lrn_op.cpp @@ -35,8 +35,5 @@ REGISTER_OPERATOR_CPU(lrn, ops::LrnOp); #ifdef PADDLE_MOBILE_CL REGISTER_OPERATOR_CL(lrn, ops::LrnOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(lrn, ops::LrnOp); -#endif #endif diff --git a/src/operators/mul_op.cpp b/src/operators/mul_op.cpp index ec9c8e225422bc9c0cda0550775e67c962426490..b11f8f95f10db2a8a446edc5991209c0d9fe2d3a 100644 --- a/src/operators/mul_op.cpp +++ b/src/operators/mul_op.cpp @@ -61,9 +61,6 @@ REGISTER_OPERATOR_CPU(mul, ops::MulOp); #ifdef PADDLE_MOBILE_CL REGISTER_OPERATOR_CL(mul, ops::MulOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(mul, ops::MulOp); -#endif #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(mul, ops::MulOp); #endif diff --git a/src/operators/norm_op.cpp b/src/operators/norm_op.cpp index deed9f69d1cf40ee70a211b0c9a84e4afeef6623..5541755eb03799779bc4b8f2df82ea7dc42fc203 100644 --- a/src/operators/norm_op.cpp +++ b/src/operators/norm_op.cpp @@ -41,8 +41,7 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(norm, ops::NormOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -#endif + #ifdef PADDLE_MOBILE_FPGA #endif diff --git a/src/operators/pool_op.cpp b/src/operators/pool_op.cpp index 241f278ec0c5dd10e103b3ab1aa6f296323eebce..f73fe01cc7f8df737b19986b81a4dcf09ba8af4b 100644 --- a/src/operators/pool_op.cpp +++ b/src/operators/pool_op.cpp @@ -63,9 +63,6 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(pool2d, ops::PoolOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(pool2d, ops::PoolOp); -#endif #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(pool2d, ops::PoolOp); #endif diff --git a/src/operators/prelu_op.cpp b/src/operators/prelu_op.cpp index 2e79c2acd20fd00a8c17627196a385e69cc3c94d..0c373ca7112b6919b3476202f4919f71847f0a6c 100644 --- a/src/operators/prelu_op.cpp +++ b/src/operators/prelu_op.cpp @@ -36,8 +36,5 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(prelu, ops::PReluOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(prelu, ops::PReluOp); -#endif #endif diff --git a/src/operators/prior_box_op.cpp b/src/operators/prior_box_op.cpp index 8647db41d78b3dab0c87a692ebf207b03386761b..b2b43f6418e08e56f6b1af0023bc18fc342fb11d 100644 --- a/src/operators/prior_box_op.cpp +++ b/src/operators/prior_box_op.cpp @@ -52,8 +52,6 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(prior_box, ops::PriorBoxOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -#endif #ifdef PADDLE_MOBILE_CL REGISTER_OPERATOR_CL(prior_box, ops::PriorBoxOp); #endif diff --git a/src/operators/reshape2_op.cpp b/src/operators/reshape2_op.cpp index c0f2a2450d29b2f95edb2ff049cea8280913afc8..b43f2996623f31160827054802195152d8d2d873 100644 --- a/src/operators/reshape2_op.cpp +++ b/src/operators/reshape2_op.cpp @@ -40,9 +40,6 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(reshape2, ops::Reshape2Op); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(reshape2, ops::Reshape2Op); -#endif #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(reshape2, ops::Reshape2Op); #endif diff --git a/src/operators/reshape_op.cpp b/src/operators/reshape_op.cpp index 28351051098a57a59a11f53a268bf4b8ceac018e..a58a607207c4fcb2b46868131f2257c1719befbe 100644 --- a/src/operators/reshape_op.cpp +++ b/src/operators/reshape_op.cpp @@ -35,9 +35,6 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(reshape, ops::ReshapeOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(reshape, ops::ReshapeOp); -#endif #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(reshape, ops::ReshapeOp); #endif diff --git a/src/operators/resize_op.cpp b/src/operators/resize_op.cpp index dc7a532e7912416738679f5c06eca253be4c3eff..fcdf59b4730d72236f19b3105cadea07f87d58b7 100644 --- a/src/operators/resize_op.cpp +++ b/src/operators/resize_op.cpp @@ -32,8 +32,5 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(resize, ops::ResizeOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(resize, ops::ResizeOp); -#endif #endif diff --git a/src/operators/scale_op.cpp b/src/operators/scale_op.cpp index ceabbaf7a4a94d49c34cbd7e6a38fda8292b8828..d778e8ab6915db12f7f723148535d1f8209df2dd 100644 --- a/src/operators/scale_op.cpp +++ b/src/operators/scale_op.cpp @@ -32,8 +32,5 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(scale, ops::ScaleOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(scale, ops::ScaleOp); -#endif #endif diff --git a/src/operators/shape_op.cpp b/src/operators/shape_op.cpp index 6b7754f93c238b0687395194f17bf1df8737dc52..f3ef72c16f049df4b0f3ea346dfb2dd8c7d39d3e 100644 --- a/src/operators/shape_op.cpp +++ b/src/operators/shape_op.cpp @@ -34,7 +34,5 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(shape, ops::ShapeOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -#endif #endif diff --git a/src/operators/slice_op.cpp b/src/operators/slice_op.cpp index 5704737902c03c476907ab527495b46c52567ed5..85b2fea07f8bbe1ce3452566abb0c41111a03d88 100644 --- a/src/operators/slice_op.cpp +++ b/src/operators/slice_op.cpp @@ -31,9 +31,6 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(slice, ops::SliceOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(slice, ops::SliceOp); -#endif #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(slice, ops::SliceOp); #endif diff --git a/src/operators/softmax_op.cpp b/src/operators/softmax_op.cpp index e4e6a8cf30ce946a2bf9f84ee66f06c651bfac73..caa967bb9420989d33708baf08b5fb4aa08708e0 100644 --- a/src/operators/softmax_op.cpp +++ b/src/operators/softmax_op.cpp @@ -31,9 +31,6 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(softmax, ops::SoftmaxOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(softmax, ops::SoftmaxOp); -#endif #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(softmax, ops::SoftmaxOp); #endif diff --git a/src/operators/sum_op.cpp b/src/operators/sum_op.cpp index 2e10363b07498128b5573e27a3d63b59c454d8b6..1049edcbd5aa8878c586f9faa4dc5fc7b0999669 100644 --- a/src/operators/sum_op.cpp +++ b/src/operators/sum_op.cpp @@ -61,9 +61,6 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(sum, ops::SumOp); #endif -#ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(sum, ops::ConcatOp); -#endif #ifdef PADDLE_MOBILE_FPGA #endif