提交 a29db744 编写于 作者: xiebaiyuan's avatar xiebaiyuan

remove mali

上级 ad3844d6
......@@ -9,7 +9,6 @@ option(WITH_TEST "build with unit tests" ON)
# select the platform to build
option(CPU "build with arm CPU support" ON)
option(GPU_MALI "build with arm mali GPU support" OFF)
option(GPU_CL "build with OpenCL support" OFF)
option(FPGA "build with FPGA support" OFF)
if(FPGA)
......@@ -97,31 +96,6 @@ else()
endforeach()
endif()
if (GPU_MALI)
add_definitions(-DPADDLE_MOBILE_MALI_GPU)
add_definitions(-DUSE_ACL=1)
add_definitions(-DUSE_OPENCL)
set(ACL_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/kernel/mali/ACL_Android)
include_directories(${ACL_ROOT} ${ACL_ROOT}/include)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -L${ACL_ROOT}/build")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -larm_compute")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -larm_compute_core")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -larm_compute_graph")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -L${ACL_ROOT}/build/opencl-1.2-stubs")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lOpenCL")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_ACL=1")
else()
file(GLOB_RECURSE _tmp_list src/operators/kernel/mali/*.cpp src/operators/kernel/mali/*.cc)
foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_CC ${f})
endforeach()
file(GLOB_RECURSE _tmp_list_h src/operators/kernel/mali/*.h)
foreach(f ${_tmp_list_h})
list(REMOVE_ITEM PADDLE_MOBILE_H ${f})
endforeach()
endif()
if(FPGA)
add_definitions(-DPADDLE_MOBILE_FPGA)
file(GLOB_RECURSE _tmp_list src/operators/math/*.cpp src/operators/kernel/fpga/*.cc)
......
......@@ -46,7 +46,6 @@ root@5affd29d4fc5:/ # ccmake .
DEBUGING ON
FPGA OFF
LOG_PROFILE ON
MALI_GPU OFF
NET googlenet
USE_EXCEPTION ON
USE_OPENMP OFF
......
......@@ -109,11 +109,6 @@ USE_OP_CPU(conv2d);
REGISTER_OPERATOR_CPU(conv2d, ops::ConvOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
USE_OP_MALI_GPU(conv2d);
REGISTER_OPERATOR_MALI_GPU(conv2d, ops::ConvOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(conv2d);
REGISTER_OPERATOR_FPGA(conv2d, ops::ConvOp);
......
......@@ -23,15 +23,6 @@ limitations under the License. */
#define LOAD_CPU_OP(op_type)
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#define LOAD_MALI_GPU_OP(op_type) \
extern int TouchOpRegistrar_##op_type##_##mali_gpu(); \
static int use_op_itself_##op_type##_##mali_gpu __attribute__((unused)) = \
TouchOpRegistrar_##op_type##_##mali_gpu()
#else
#define LOAD_MALI_GPU_OP(op_type)
#endif
#ifdef PADDLE_MOBILE_FPGA
#define LOAD_FPGA_OP(op_type) \
extern int TouchOpRegistrar_##op_type##_##fpga(); \
......@@ -46,9 +37,8 @@ limitations under the License. */
static int use_fusion_matcher_itself_##op_type __attribute__((unused)) = \
TouchFusionMatcherRegistrar_##op_type();
#define LOAD_OP(op_type) \
LOAD_CPU_OP(op_type); \
LOAD_MALI_GPU_OP(op_type); \
#define LOAD_OP(op_type) \
LOAD_CPU_OP(op_type); \
LOAD_FPGA_OP(op_type);
#define LOAD_OP1(op_type, device_type) LOAD_##device_type##_OP(op_type);
......@@ -68,7 +58,7 @@ LOAD_OP(fetch)
LOAD_OP(fill_constant)
#endif
#ifdef BATCHNORM_OP
LOAD_OP2(batch_norm, CPU, MALI_GPU);
LOAD_OP1(batch_norm, CPU);
#endif
#ifdef BILINEAR_INTERP_OP
LOAD_OP1(bilinear_interp, CPU);
......@@ -77,40 +67,40 @@ LOAD_OP1(bilinear_interp, CPU);
LOAD_OP1(box_coder, CPU);
#endif
#ifdef CONCAT_OP
LOAD_OP3(concat, CPU, MALI_GPU, FPGA);
LOAD_OP2(concat, CPU, FPGA);
#endif
#ifdef CONV_OP
LOAD_OP3(conv2d, CPU, MALI_GPU, FPGA);
LOAD_OP2(conv2d, CPU, FPGA);
#endif
#ifdef LRN_OP
LOAD_OP2(lrn, CPU, MALI_GPU);
LOAD_OP1(lrn, CPU);
#endif
#ifdef SIGMOID_OP
LOAD_OP1(sigmoid, CPU);
#endif
#ifdef FUSION_FC_RELU_OP
LOAD_OP3(fusion_fc_relu, CPU, MALI_GPU, FPGA);
LOAD_OP3(fusion_fc_relu, CPU, FPGA);
LOAD_FUSION_MATCHER(fusion_fc_relu);
#endif
#ifdef FUSION_ELEMENTWISEADDRELU_OP
LOAD_OP3(fusion_elementwise_add_relu, CPU, MALI_GPU, FPGA);
LOAD_OP3(fusion_elementwise_add_relu, CPU, FPGA);
LOAD_FUSION_MATCHER(fusion_elementwise_add_relu);
#endif
#ifdef SPLIT_OP
LOAD_OP1(split, CPU);
#endif
#ifdef RESIZE_OP
LOAD_OP2(resize, CPU, MALI_GPU);
LOAD_OP1(resize, CPU);
#endif
#ifdef FUSION_CONVADDBNRELU_OP
LOAD_OP2(fusion_conv_add_bn_relu, CPU, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_add_bn_relu);
#endif
#ifdef RESHAPE_OP
LOAD_OP2(reshape, CPU, MALI_GPU);
LOAD_OP1(reshape, CPU);
#endif
#ifdef RESHAPE2_OP
LOAD_OP2(reshape2, CPU, MALI_GPU);
LOAD_OP1(reshape2, CPU);
#endif
#ifdef TRANSPOSE_OP
LOAD_OP1(transpose, CPU);
......@@ -126,11 +116,11 @@ LOAD_OP2(fusion_conv_add_relu, CPU, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_add_relu);
#endif
#ifdef FUSION_CONVADD_OP
LOAD_OP2(fusion_conv_add, CPU, MALI_GPU);
LOAD_OP1(fusion_conv_add, CPU);
LOAD_FUSION_MATCHER(fusion_conv_add);
#endif
#ifdef SOFTMAX_OP
LOAD_OP2(softmax, CPU, MALI_GPU);
LOAD_OP1(softmax, CPU);
#endif
#ifdef SHAPE_OP
LOAD_OP1(shape, CPU);
......@@ -142,13 +132,13 @@ LOAD_OP1(depthwise_conv2d, CPU);
LOAD_OP1(conv2d_transpose, CPU);
#endif
#ifdef SCALE_OP
LOAD_OP2(scale, CPU, MALI_GPU);
LOAD_OP1(scale, CPU);
#endif
#ifdef ELEMENTWISEADD_OP
LOAD_OP2(elementwise_add, CPU, MALI_GPU);
LOAD_OP1(elementwise_add, CPU);
#endif
#ifdef PRELU_OP
LOAD_OP2(prelu, CPU, MALI_GPU);
LOAD_OP1(prelu, CPU);
#endif
#ifdef FLATTEN_OP
LOAD_OP1(flatten, CPU);
......@@ -182,13 +172,13 @@ LOAD_FUSION_MATCHER(fusion_dwconv_bn_relu);
LOAD_OP1(crf_decoding, CPU);
#endif
#ifdef MUL_OP
LOAD_OP2(mul, CPU, MALI_GPU);
LOAD_OP1(mul, CPU);
#endif
#ifdef NORM_OP
LOAD_OP1(norm, CPU);
#endif
#ifdef RELU_OP
LOAD_OP2(relu, CPU, MALI_GPU);
LOAD_OP1(relu, CPU);
LOAD_OP1(relu6, CPU);
#endif
#ifdef IM2SEQUENCE_OP
......@@ -198,11 +188,11 @@ LOAD_OP1(im2sequence, CPU);
LOAD_OP1(lookup_table, CPU);
#endif
#ifdef FUSION_FC_OP
LOAD_OP3(fusion_fc, CPU, MALI_GPU, FPGA);
LOAD_OP2(fusion_fc, CPU, FPGA);
LOAD_FUSION_MATCHER(fusion_fc);
#endif
#ifdef POOL_OP
LOAD_OP3(pool2d, CPU, MALI_GPU, FPGA);
LOAD_OP2(pool2d, CPU, FPGA);
#endif
#ifdef MULTICLASSNMS_OP
LOAD_OP1(multiclass_nms, CPU);
......@@ -217,7 +207,7 @@ LOAD_OP1(sum, CPU);
LOAD_OP1(elementwise_mul, CPU);
#endif
#ifdef SLICE_OP
LOAD_OP2(slice, CPU, MALI_GPU);
LOAD_OP1(slice, CPU);
#endif
#ifdef FUSION_CONVBN_OP
LOAD_OP2(fusion_conv_bn, CPU, FPGA);
......
......@@ -115,9 +115,6 @@ class OpRegistry {
#define REGISTER_OPERATOR_CPU(op_type, op_class) \
REGISTER_OPERATOR(op_type, op_class, cpu, paddle_mobile::CPU);
#define REGISTER_OPERATOR_MALI_GPU(op_type, op_class) \
REGISTER_OPERATOR(op_type, op_class, mali_gpu, paddle_mobile::GPU_MALI);
#define REGISTER_OPERATOR_FPGA(op_type, op_class) \
REGISTER_OPERATOR(op_type, op_class, fpga, paddle_mobile::FPGA);
......
......@@ -130,13 +130,6 @@ class OpKernelBase {
}
#endif
#ifdef PADDLE_McOBILE_MALI_GPU
OpKernelBase() { acl_op_ = nullptr; }
void *GetAclOp() const { return acl_op_; }
void SetAclOp(void *op, void *ob) const {
reinterpret_cast<OpKernelBase<Dtype, P> *>(ob)->acl_op_ = op;
}
#endif
virtual void Compute(const P &para) = 0;
virtual bool Init(P *para) { return true; }
virtual ~OpKernelBase() = default;
......@@ -147,9 +140,6 @@ class OpKernelBase {
#endif
private:
#ifdef PADDLE_MOBILE_MALI_GPU
void *acl_op_;
#endif
};
class FusionOpMatcher {
......
......@@ -55,9 +55,6 @@ namespace ops = paddle_mobile::operators;
REGISTER_OPERATOR_CPU(relu, ops::ReluOp);
REGISTER_OPERATOR_CPU(relu6, ops::Relu6Op);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(relu, ops::ReluOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(relu, ops::ReluOp);
#endif
......
......@@ -34,9 +34,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(batch_norm, ops::BatchNormOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(batch_norm, ops::BatchNormOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
......
......@@ -48,8 +48,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(bilinear_interp, ops::BilinearOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
......
......@@ -58,8 +58,6 @@ REGISTER_OPERATOR_CPU(box_coder, ops::BoxCoderOp);
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(box_coder, ops::BoxCoderOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
......
......@@ -69,9 +69,7 @@ REGISTER_OPERATOR_CPU(concat, ops::ConcatOp);
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(concat, ops::ConcatOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(concat, ops::ConcatOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(concat, ops::ConcatOp);
#endif
......
......@@ -55,9 +55,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(conv2d, ops::ConvOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(conv2d, ops::ConvOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(conv2d, ops::ConvOp);
#endif
......
......@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(conv2d_transpose, ops::ConvOpTranspose);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(conv2d_transpose, ops::ConvOpTranspose);
#endif
......
......@@ -48,8 +48,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(crf_decoding, ops::CrfOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
......
......@@ -33,9 +33,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(elementwise_add, ops::ElementwiseAddOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(elementwise_add, ops::ElementwiseAddOp);
#endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(elementwise_add, ops::ElementwiseAddOp);
......
......@@ -32,9 +32,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(elementwise_mul, ops::ElementwiseMulOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(elementwise_mul, ops::ElementwiseMulOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(elementwise_mul, ops::ElementwiseMulOp);
#endif
......
......@@ -32,9 +32,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(elementwise_sub, ops::ElementwiseSubOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(elementwise_sub, ops::ElementwiseSubOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
......
......@@ -38,9 +38,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(feed, ops::FeedOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(feed, ops::FeedOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(feed, ops::FeedOp);
#endif
......
......@@ -30,9 +30,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(fetch, ops::FetchOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(fetch, ops::FetchOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fetch, ops::FetchOp);
#endif
......
......@@ -54,9 +54,6 @@ REGISTER_FUSION_MATCHER(fusion_conv_add, ops::FusionConvAddMatcher);
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(fusion_conv_add, ops::FusionConvAddOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(fusion_conv_add, ops::FusionConvAddOp);
#endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(fusion_conv_add, ops::FusionConvAddOp);
......
......@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators;
REGISTER_FUSION_MATCHER(fusion_deconv_add_bn, ops::FusionDeconvAddBNMatcher);
#ifdef PADDLE_MOBILE_CPU
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_deconv_add_bn, ops::FusionDeconvAddBNOp);
#endif
......
......@@ -25,8 +25,7 @@ REGISTER_FUSION_MATCHER(fusion_deconv_add_bn_relu,
ops::FusionDeconvAddBNReluMatcher);
#ifdef PADDLE_MOBILE_CPU
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_deconv_add_bn_relu, ops::FusionDeconvAddBNReluOp);
#endif
......
......@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators;
REGISTER_FUSION_MATCHER(fusion_deconv_add, ops::FusionDeconvAddMatcher);
#ifdef PADDLE_MOBILE_CPU
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_deconv_add, ops::FusionDeconvAddOp);
#endif
......
......@@ -25,8 +25,7 @@ REGISTER_FUSION_MATCHER(fusion_deconv_add_relu,
ops::FusionDeconvAddReluMatcher);
#ifdef PADDLE_MOBILE_CPU
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_deconv_add_relu, ops::FusionDeconvAddReluOp);
#endif
......
......@@ -24,8 +24,7 @@ namespace ops = paddle_mobile::operators;
REGISTER_FUSION_MATCHER(fusion_deconv_bn_relu, ops::FusionDeconvBNReluMatcher);
#ifdef PADDLE_MOBILE_CPU
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_deconv_bn_relu, ops::FusionDeconvBNReluOp);
#endif
......
......@@ -23,8 +23,7 @@ namespace operators {}
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_deconv_relu, ops::FusionDeconvReluOp);
#endif
......
......@@ -36,10 +36,6 @@ REGISTER_FUSION_MATCHER(fusion_elementwise_add_relu,
// REGISTER_OPERATOR_CPU(fusion_elementwise_add_relu,
// ops::FusionElementwiseAddReluOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
// REGISTER_OPERATOR_MALI_GPU(fusion_elementwise_add_relu,
// ops::FusionElementwiseAddReluOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_elementwise_add_relu,
ops::FusionElementwiseAddReluOp);
......
......@@ -63,9 +63,6 @@ REGISTER_OPERATOR_CPU(fusion_fc, ops::FusionFcOp);
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(fusion_fc, ops::FusionFcOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(fusion_fc, ops::FusionFcOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_fc, ops::FusionFcOp);
#endif
......
......@@ -60,9 +60,6 @@ REGISTER_FUSION_MATCHER(fusion_fc_relu, ops::FusionFcReluMatcher);
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(fusion_fc_relu, ops::FusionFcReluOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(fusion_fc_relu, ops::FusionFcReluOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_fc_relu, ops::FusionFcReluOp);
#endif
......
......@@ -59,8 +59,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(gru_unit, ops::GruUnitOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
......
......@@ -37,8 +37,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(increment, ops::IncrementOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
......
......@@ -34,8 +34,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(is_empty, ops::IsEmptyOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
......
Subproject commit 591027fcffea084100c756e48356e0f8a48e35e5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#if USE_ACL == 1
#include "acl_operator.h"
unsigned int bypass_acl_class_layer =
(0 | FLAGS_ENABLE_ACL_CONCAT |
/*0xffffffff |*/ /*FLAGS_ENABLE_ACL_FC |*/ /*FLAGS_ENABLE_ACL_LRN
|*/
0);
int enable_schedule = 0;
#ifdef USE_PROFILING
#include "arm_neon.h"
unsigned int acl_log_flags =
(0 | MASK_LOG_APP_TIME | /*MASK_LOG_ALLOCATE | */ /*MASK_LOG_ALLOCATE | */
/*MASK_LOG_RUN | */ /*MASK_LOG_CONFIG | */ /*MASK_LOG_COPY | */
MASK_LOG_ABSVAL | MASK_LOG_BNLL | MASK_LOG_CONV | MASK_LOG_FC |
MASK_LOG_LRN | MASK_LOG_POOLING | MASK_LOG_RELU | MASK_LOG_SIGMOID |
MASK_LOG_SOFTMAX | MASK_LOG_TANH | MASK_LOG_LC | MASK_LOG_BN |
MASK_LOG_CONCAT | 0);
#include <stdio.h> /* printf */
#include <stdlib.h> /* getenv */
#endif // USE_PROFILING
static bool force_enable_gpu = false;
bool AclEnableSchedule(int enable) {
enable_schedule = enable;
if (enable) {
force_enable_gpu = true;
}
return true;
}
int isScheduleEnable() { return enable_schedule; }
namespace paddle_mobile {
namespace operators {
namespace acl {
bool ACLOperator::init_gpu_env = true;
#ifdef USE_OPENCL
bool ACLOperator::support_opencl_ = false;
bool opencl_is_available() { return arm_compute::opencl_is_available(); }
#elif defined(USE_OPENGLES)
bool ACLOperator::support_opengles_ = false;
#endif
ACLOperator::ACLOperator(bool is_gpu)
: operator_state_(operator_not_init),
force_bypass_acl_path_(false),
target_hint_(TargetHint::DONT_CARE),
convolution_method_hint_(ConvolutionMethodHint::GEMM),
_group(1),
name_(""),
input_idx_(0),
output_idx_(0),
is_gpu_(is_gpu) {
const char* pBypassACL;
if (init_gpu_env) {
#ifdef USE_OPENCL
try {
if (opencl_is_available()) {
arm_compute::CLScheduler::get().default_init();
support_opencl_ = true;
}
} catch (std::exception& e) {
support_opencl_ = false;
}
#elif defined(USE_OPENGLES)
try {
arm_compute::GCScheduler::get().default_init();
support_opengles_ = true;
} catch (std::exception& e) {
support_opengles_ = false;
}
#endif
init_gpu_env = false;
}
if (force_enable_gpu) is_gpu_ = true;
pBypassACL = getenv("BYPASSACL");
if (pBypassACL) {
unsigned int bacl;
sscanf(pBypassACL, "%i", &bacl);
if (bacl != bypass_acl_class_layer) {
bypass_acl_class_layer = bacl;
printf("BYPASSACL<%s>\n", pBypassACL);
printf("BYPASSACL: %x\n", bypass_acl_class_layer);
}
}
#ifdef USE_PROFILING
const char* pLogACL;
pLogACL = getenv("LOGACL");
if (pLogACL) {
unsigned int alf;
sscanf(pLogACL, "%i", &alf);
if (alf != acl_log_flags) {
acl_log_flags = alf;
printf("LOGACL<%s>\n", pLogACL);
printf("LOGACL: %x\n", acl_log_flags);
}
}
#endif // USE_PROFILING
const char* pEnableSchedule;
pEnableSchedule = getenv("ENABLESCHEDULE");
if (pEnableSchedule) {
int bshedule;
sscanf(pEnableSchedule, "%i", &bshedule);
if (bshedule != enable_schedule) {
enable_schedule = bshedule;
printf("ENABLESCHEDULE<%s>\n", pEnableSchedule);
printf("ENABLESCHEDULE: %x\n", enable_schedule);
}
if (enable_schedule) {
AclEnableSchedule(1);
}
}
}
ACLOperator::~ACLOperator() {}
bool ACLOperator::new_tensor(std::unique_ptr<ACLTensor>& tensor,
arm_compute::TensorShape& shape, void* mem,
bool commit) {
auto acl_tensor =
new ACLTensor(arm_compute::TensorInfo(shape, arm_compute::Format::F32));
acl_tensor->set_target(getTargetHint());
acl_tensor->bindmem(mem);
if (commit) acl_tensor->commit();
tensor = (std::unique_ptr<ACLTensor>)std::move(acl_tensor);
return true;
}
bool ACLOperator::new_tensor(std::unique_ptr<ACLSubTensor>& tensor,
std::unique_ptr<ACLTensor>& parent,
arm_compute::TensorShape& shape,
arm_compute::Coordinates& coord) {
auto acl_tensor = new ACLSubTensor(parent, shape, coord);
acl_tensor->set_target(getTargetHint());
tensor = (std::unique_ptr<ACLSubTensor>)std::move(acl_tensor);
return true;
}
void ACLTensor::commit(TensorType type) {
settensortype(type);
if (mem_) {
if (!allocate_) {
#ifdef USE_PROFILING
logtime_util log_time(ACL_ALLOCATE_INFO);
#endif // USE_PROFILING
allocate();
allocate_ = true;
}
if (type_ != tensor_output) {
tensor_copy(mem_);
}
mem_ = nullptr;
}
}
int BaseACLTensor::tensor_copy(arm_compute::ITensor* tensor, void* mem,
bool toTensor) {
#ifdef USE_PROFILING
logtime_util log_time(ACL_COPY_INFO);
#endif // USE_PROFILING
arm_compute::Window window;
// Iterate through the rows (not each element)
window.use_tensor_dimensions(tensor->info()->tensor_shape(),
/* first_dimension =*/arm_compute::Window::DimY);
int width = tensor->info()->tensor_shape()[0];
int height = tensor->info()->tensor_shape()[1];
int deepth = tensor->info()->tensor_shape()[2];
map();
// Create an iterator:
arm_compute::Iterator it(tensor, window);
// Except it works for an arbitrary number of dimensions
if (toTensor) { // mem->tensor
arm_compute::execute_window_loop(
window,
[&](const arm_compute::Coordinates& id) {
memcpy(it.ptr(),
((char*)mem) +
((id[3] * (width * height * deepth) +
id.z() * (width * height) + id.y() * width + id.x()) *
tensor->info()->element_size()),
width * tensor->info()->element_size());
},
it);
} else { // tensor-->mem
arm_compute::execute_window_loop(
window,
[&](const arm_compute::Coordinates& id) {
memcpy(((char*)mem) + ((id[3] * (width * height * deepth) +
id.z() * (width * height) + id.y() * width) *
tensor->info()->element_size()),
it.ptr(), width * tensor->info()->element_size());
},
it);
}
unmap();
return 0;
}
} // namespace acl
} // namespace operators
} // namespace paddle_mobile
#endif
此差异已折叠。
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "acl_tensor.h"
namespace paddle_mobile {
namespace operators {
namespace acl {
#ifdef USE_ACL
template <typename TensorType>
std::unique_ptr<arm_compute::ITensor> initialise_tensor(
arm_compute::TensorInfo &info) {
auto tensor = cpp14::make_unique<TensorType>();
tensor->allocator()->init(info);
return std::move(tensor);
}
template <typename TensorType>
void tensor_allocate(arm_compute::ITensor &tensor) {
auto itensor = dynamic_cast<TensorType *>(&tensor);
itensor->allocator()->allocate();
}
Tensor::Tensor(arm_compute::TensorInfo &info) noexcept
: _target(TargetHint::DONT_CARE), _info(info), _tensor(nullptr) {}
Tensor::Tensor(Tensor &&src) noexcept
: _target(src._target),
_info(std::move(src._info)),
_tensor(std::move(src._tensor)) {}
arm_compute::ITensor *Tensor::set_target(TargetHint target) {
switch (target) {
#ifdef USE_OPENCL
case TargetHint::OPENCL:
_tensor = initialise_tensor<arm_compute::CLTensor>(_info);
break;
#elif defined(USE_OPENGLES)
case TargetHint::OPENGLES:
_tensor = initialise_tensor<arm_compute::GCTensor>(_info);
break;
#endif
case TargetHint::NEON:
_tensor = initialise_tensor<arm_compute::Tensor>(_info);
break;
default:
break;
}
_target = target;
return _tensor.get();
}
void Tensor::allocate() {
switch (_target) {
#ifdef USE_OPENCL
case TargetHint::OPENCL:
tensor_allocate<arm_compute::CLTensor>(*_tensor);
break;
#elif defined(USE_OPENGLES)
case TargetHint::OPENGLES:
tensor_allocate<arm_compute::GCTensor>(*_tensor);
break;
#endif
case TargetHint::NEON:
tensor_allocate<arm_compute::Tensor>(*_tensor);
break;
default:
break;
}
}
void Tensor::map(bool blocking) {
#ifdef USE_OPENCL
if (_target == TargetHint::OPENCL)
dynamic_cast<arm_compute::CLTensor *>(tensor())->map(blocking);
#elif defined(USE_OPENGLES)
if (_target == TargetHint::OPENGLES)
dynamic_cast<arm_compute::GCTensor *>(tensor())->map(blocking);
#endif
}
void Tensor::unmap() {
#ifdef USE_OPENCL
if (_target == TargetHint::OPENCL)
dynamic_cast<arm_compute::CLTensor *>(tensor())->unmap();
#elif defined(USE_OPENGLES)
if (_target == TargetHint::OPENGLES)
dynamic_cast<arm_compute::GCTensor *>(tensor())->unmap();
#endif
}
template <typename SubTensorType, typename ParentTensorType>
std::unique_ptr<arm_compute::ITensor> initialise_subtensor(
arm_compute::ITensor *parent, arm_compute::TensorShape shape,
arm_compute::Coordinates coords) {
auto ptensor = dynamic_cast<ParentTensorType *>(parent);
auto subtensor = cpp14::make_unique<SubTensorType>(ptensor, shape, coords);
return std::move(subtensor);
}
SubTensor::SubTensor(Tensor *parent, arm_compute::TensorShape &tensor_shape,
arm_compute::Coordinates &coords) noexcept
: _target(TargetHint::DONT_CARE),
_tensor_shape(tensor_shape),
_coords(coords),
_parent(nullptr),
_subtensor(nullptr) {
_parent = parent->tensor();
_target = parent->target();
instantiate_subtensor();
}
arm_compute::ITensor *SubTensor::set_target(TargetHint target) {
return (target == _target) ? _subtensor.get() : nullptr;
}
arm_compute::ITensor *SubTensor::tensor() { return _subtensor.get(); }
const arm_compute::ITensor *SubTensor::tensor() const {
return _subtensor.get();
}
TargetHint SubTensor::target() const { return _target; }
void SubTensor::allocate() {
// NOP for sub-tensors
}
void SubTensor::instantiate_subtensor() {
switch (_target) {
#ifdef USE_OPENCL
case TargetHint::OPENCL:
_subtensor = initialise_subtensor<arm_compute::CLSubTensor,
arm_compute::ICLTensor>(
_parent, _tensor_shape, _coords);
break;
#endif
default:
case TargetHint::NEON:
_subtensor =
initialise_subtensor<arm_compute::SubTensor, arm_compute::ITensor>(
_parent, _tensor_shape, _coords);
break;
}
}
#endif
} // namespace acl
} // namespace operators
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef ACL_TENSOR_H_
#define ACL_TENSOR_H_
#ifdef USE_ACL
#ifdef USE_OPENCL
#include "arm_compute/runtime/CL/CLSubTensor.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#elif defined(USE_OPENGLES)
#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
#endif
#include "arm_compute/runtime/SubTensor.h"
#include "arm_compute/runtime/Tensor.h"
#include <memory>
namespace paddle_mobile {
namespace operators {
namespace acl {
enum class TargetHint {
DONT_CARE,
OPENCL,
OPENGLES,
NEON,
};
enum class ConvolutionMethodHint {
GEMM,
DIRECT,
};
namespace cpp14 {
template <class T>
struct _Unique_if {
typedef std::unique_ptr<T> _Single_object;
};
template <class T>
struct _Unique_if<T[]> {
typedef std::unique_ptr<T[]> _Unknown_bound;
};
template <class T, size_t N>
struct _Unique_if<T[N]> {
typedef void _Known_bound;
};
template <class T, class... Args>
typename _Unique_if<T>::_Single_object make_unique(Args &&... args) {
return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
}
template <class T>
typename _Unique_if<T>::_Unknown_bound make_unique(size_t n) {
typedef typename std::remove_extent<T>::type U;
return std::unique_ptr<T>(new U[n]());
}
template <class T, class... Args>
typename _Unique_if<T>::_Known_bound make_unique(Args &&...);
} // namespace cpp14
class Tensor {
public:
explicit Tensor(arm_compute::TensorInfo &info) noexcept;
virtual ~Tensor() {}
Tensor(Tensor &&src) noexcept;
void set_info(arm_compute::TensorInfo &&info) { _info = info; }
arm_compute::ITensor *set_target(TargetHint target);
const arm_compute::TensorInfo &info() const { return _info; }
arm_compute::ITensor *tensor() { return _tensor.get(); }
void allocate();
void init() {}
TargetHint target() const { return _target; }
virtual void map(bool blocking = true);
virtual void unmap();
private:
TargetHint _target;
arm_compute::TensorInfo _info;
std::unique_ptr<arm_compute::ITensor> _tensor;
};
class SubTensor {
public:
SubTensor(Tensor *parent, arm_compute::TensorShape &tensor_shape,
arm_compute::Coordinates &coords) noexcept;
~SubTensor() {}
arm_compute::ITensor *tensor();
const arm_compute::ITensor *tensor() const;
TargetHint target() const;
void allocate();
arm_compute::ITensor *set_target(TargetHint target);
private:
/** Instantiates a sub-tensor */
void instantiate_subtensor();
private:
/**< Target that this tensor is pinned on */
TargetHint _target;
/**< SubTensor shape */
arm_compute::TensorShape _tensor_shape;
/**< SubTensor Coordinates */
arm_compute::Coordinates _coords;
/**< Parent tensor */
arm_compute::ITensor *_parent;
/**< SubTensor */
std::unique_ptr<arm_compute::ITensor> _subtensor;
};
} // namespace acl
} // namespace operators
} // namespace paddle_mobile
#endif
#endif // ACL_TENSOR_H_
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef BATCHNORM_OP
#include "operators/kernel/batchnorm_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
class AclBatchNormOp : public acl::ACLOperator {
public:
AclBatchNormOp() {
this->force_bypass_acl_path_ = bypass_acl_class_layer & FLAGS_ENABLE_ACL_BN;
}
~AclBatchNormOp() = default;
AclBatchNormOp(const AclBatchNormOp&) = delete;
AclBatchNormOp& operator=(const AclBatchNormOp&) = delete;
AclBatchNormOp(AclBatchNormOp&&) = delete;
AclBatchNormOp& operator=(AclBatchNormOp&&) = delete;
acl::AclParameters& getargs() { return args; }
void InitAclLayer(const BatchNormParam<DeviceType>& param) {
setTargetHint(acl::TargetHint::OPENCL);
arm_compute::TensorShape input_shape(args.in_cols, args.in_rows,
args.in_depth, args.batch);
arm_compute::TensorShape output_shape(args.out_cols, args.out_rows,
args.out_depth, args.out_num);
if (is_operator_init_done(input_shape)) return;
set_operator_init_done();
this->force_bypass_acl_path_ = false;
arm_compute::TensorShape mean_shape(args.in_depth);
arm_compute::TensorShape var_shape = mean_shape;
arm_compute::TensorShape beta_shape = mean_shape;
arm_compute::TensorShape gamma_shape = mean_shape;
//[width, height, IFM]
new_tensor(input(), input_shape, args.input_data);
//[width, height, OFM]
new_tensor(output(), output_shape, args.output_data);
new_tensor(mean(), mean_shape, args.mean_data);
new_tensor(var(), var_shape, args.var_data);
new_tensor(beta(), beta_shape, args.biases_data);
new_tensor(gamma(), gamma_shape, args.weight_data);
acl_configure(bn, this, args.epsilon);
}
void RunAcl(void* input, void* output) {
acl::ACLOperator::acl_run(input, output);
}
bool Bypass_acl(const BatchNormParam<DeviceType>& param) {
bool bypass_acl = false;
AclParametersByContext(param);
InitAclLayer(param);
// for performance, more groups impact GPU performance
if (this->force_bypass_acl_path_) {
bypass_acl = true;
}
return bypass_acl;
}
private:
void AclParametersByContext(const BatchNormParam<DeviceType>& param) {
const Tensor* in_x = param.InputX();
Tensor* out = param.OutputY();
const Tensor* scale = param.InputScale();
const Tensor* bias = param.InputBias();
const Tensor* saved_mean = param.InputMean();
const Tensor* saved_variance = param.InputVariance();
const T* input_data = in_x->data<T>();
T* output_data = out->mutable_data<T>();
const T* weight_data = scale->data<T>();
const T* bias_data = bias->data<T>();
const T* mean_data = saved_mean->data<T>();
const T* var_data = saved_variance->data<T>();
float epsilon = param.Epsilon();
args.input_data = (void*)input_data;
args.output_data = (void*)output_data;
// args.weight_data = (void*)weight_data;
// args.biases_data = (void*)bias_data;
args.mean_data = (void*)mean_data;
args.var_data = (void*)var_data;
args.epsilon = epsilon;
args.dim = in_x->dims().size();
args.batch = in_x->dims()[0];
args.in_depth = in_x->dims()[1];
args.in_rows = in_x->dims()[2];
args.in_cols = in_x->dims()[3];
args.out_num = out->dims()[0];
args.out_depth = out->dims()[1];
args.out_rows = out->dims()[2];
args.out_cols = out->dims()[3];
args.weight_data = (void*)weight_data;
args.biases_data = (void*)bias_data;
// std::cout
// << "Out C: " << args.out_depth
// << " H: " << args.out_rows << " W: " << args.out_cols << "\n";
}
acl::AclParameters args;
};
template <>
bool BatchNormKernel<GPU_MALI, float>::Init(BatchNormParam<GPU_MALI>* param) {
AclBatchNormOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclBatchNormOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) {
acl_op = new AclBatchNormOp<GPU_MALI, float>();
this->SetAclOp((void*)acl_op, (void*)this);
}
if (acl_op->Bypass_acl(*param)) {
std::cout << "init acl failed" << std::endl;
return false;
}
return true;
}
template <>
void BatchNormKernel<GPU_MALI, float>::Compute(
const BatchNormParam<GPU_MALI>& param) {
std::cout << "init acl" << std::endl;
AclBatchNormOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclBatchNormOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) {
return;
}
acl::AclParameters& args = acl_op->getargs();
acl_op->RunAcl(args.input_data, args.output_data);
}
template class BatchNormKernel<GPU_MALI, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef CONCAT_OP
#include "operators/kernel/concat_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
class AclConcatOp : public acl::ACLOperator {
public:
AclConcatOp() {
this->force_bypass_acl_path_ =
bypass_acl_class_layer & FLAGS_ENABLE_ACL_CONCAT;
}
~AclConcatOp() = default;
AclConcatOp(const AclConcatOp&) = delete;
AclConcatOp& operator=(const AclConcatOp&) = delete;
AclConcatOp(AclConcatOp&&) = delete;
AclConcatOp& operator=(AclConcatOp&&) = delete;
acl::AclParameters& getargs() { return args; }
void InitAclLayer(const ConcatParam<DeviceType>& param) {
setTargetHint(acl::TargetHint::OPENCL);
const std::vector<framework::LoDTensor*>* input_data = &args.in_tensor;
arm_compute::TensorShape output_shape(args.out_cols, args.out_rows,
args.out_depth, args.batch);
if (is_operator_init_done(output_shape)) return;
set_operator_init_done();
this->force_bypass_acl_path_ = false;
T type;
for (int i = 0; i < input_data->size(); i++) {
int in_batch = (*input_data)[i]->dims()[0];
int in_channels = (*input_data)[i]->dims()[1];
int in_width = (*input_data)[i]->dims()[2];
int in_height = (*input_data)[i]->dims()[3];
arm_compute::TensorShape in_shape(in_width, in_height, in_channels);
new_tensor(cinput(i), in_shape,
acl::InputdataPtr(this, args.in_tensor, type, i));
}
//[width, height, OFM]
new_tensor(output(), output_shape, args.output_data);
acl_configure(concat, this, input_data->size());
}
void RunAcl(const std::vector<framework::LoDTensor*>& input, void* output) {
T type;
acl::acl_run(this, input, output, type);
}
bool Bypass_acl(const ConcatParam<DeviceType>& param) {
bool bypass_acl = false;
AclParametersByContext(param);
InitAclLayer(param);
// for performance, more groups impact GPU performance
if (this->force_bypass_acl_path_ || !args.is_channel_concat) {
bypass_acl = true;
}
return bypass_acl;
}
private:
void AclParametersByContext(const ConcatParam<DeviceType>& param) {
auto inputs = param.Inputs();
auto* output = param.Out();
int64_t axis = param.Axis();
T* output_data = output->mutable_data<T>();
args.is_channel_concat = (axis == 1);
args.in_tensor = inputs;
args.output_data = (void*)output_data;
args.batch = output->dims()[0];
args.out_depth = output->dims()[1];
args.out_rows = output->dims()[2];
args.out_cols = output->dims()[3];
}
acl::AclParameters args;
};
template <>
bool ConcatKernel<GPU_MALI, float>::Init(ConcatParam<GPU_MALI>* param) {
AclConcatOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclConcatOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) {
acl_op = new AclConcatOp<GPU_MALI, float>();
this->SetAclOp((void*)acl_op, (void*)this);
}
if (acl_op->Bypass_acl(*param)) {
std::cout << "init acl failed" << std::endl;
return false;
}
return true;
}
template <>
void ConcatKernel<GPU_MALI, float>::Compute(
const ConcatParam<GPU_MALI>& param) {
std::cout << "init acl" << std::endl;
AclConcatOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclConcatOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) {
return;
}
acl::AclParameters& args = acl_op->getargs();
acl_op->RunAcl(args.in_tensor, args.output_data);
}
template class ConcatKernel<GPU_MALI, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADD_OP
#include "operators/kernel/conv_add_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
class AclConvAddOp : public acl::ACLOperator {
public:
AclConvAddOp() {
this->force_bypass_acl_path_ =
bypass_acl_class_layer & FLAGS_ENABLE_ACL_CONV;
}
~AclConvAddOp() = default;
AclConvAddOp(const AclConvAddOp&) = delete;
AclConvAddOp& operator=(const AclConvAddOp&) = delete;
AclConvAddOp(AclConvAddOp&&) = delete;
AclConvAddOp& operator=(AclConvAddOp&&) = delete;
acl::AclParameters& getargs() { return args; }
void InitAclLayer(const FusionConvAddParam<DeviceType>& param) {
setTargetHint(acl::TargetHint::OPENCL);
arm_compute::TensorShape input_shape(args.in_cols, args.in_rows,
args.in_depth, args.batch);
arm_compute::TensorShape output_shape(args.out_cols, args.out_rows,
args.out_depth, args.out_num);
arm_compute::TensorShape weights_shape(args.filter_cols, args.filter_rows,
args.in_depth / args.num_group,
args.out_depth);
arm_compute::TensorShape biases_shape(args.out_depth);
arm_compute::PadStrideInfo conv_info(
args.stride_cols, args.stride_rows, args.pad_cols, args.pad_rows,
arm_compute::DimensionRoundingType::FLOOR);
if (is_operator_init_done(input_shape)) return;
set_operator_init_done();
this->force_bypass_acl_path_ = false;
// check_direct_conv();
group() = args.num_group;
//[kernel_x, kernel_y, IFM, OFM]
new_tensor(weights(), weights_shape, args.weight_data);
//[OFM]
if (args.biases_data) {
new_tensor(biases(), biases_shape, args.biases_data);
}
//[width, height, IFM]
new_tensor(input(), input_shape, args.input_data);
//[width, height, OFM]
new_tensor(output(), output_shape, args.output_data);
acl_configure(conv, this, conv_info);
}
void RunAcl(void* input, void* output) {
acl::ACLOperator::acl_run(input, output);
}
bool Bypass_acl(const FusionConvAddParam<DeviceType>& param) {
bool bypass_acl = false;
AclParametersByContext(param);
InitAclLayer(param);
// for performance, more groups impact GPU performance
if (this->force_bypass_acl_path_ || args.num_group >= 5) {
bypass_acl = true;
}
if (args.dim > 2) {
bypass_acl = true;
}
if (args.dilated) {
bypass_acl = true;
}
return bypass_acl;
}
private:
void check_direct_conv() {
bool use_direct_conv = false;
const char* pDirectConv;
pDirectConv = getenv("DIRECTCONV");
if (pDirectConv) {
unsigned int bdirectconv;
sscanf(pDirectConv, "%i", &bdirectconv);
if (bdirectconv != use_direct_conv) {
use_direct_conv = bdirectconv;
printf("DIRECTCONV<%s>\n", pDirectConv);
printf("DIRECTCONV: %x\n", use_direct_conv);
}
}
int pad_data[2], kernel[2];
pad_data[1] = args.pad_rows;
pad_data[0] = args.pad_cols;
kernel[1] = args.filter_rows;
kernel[0] = args.filter_cols;
if (use_direct_conv && ((kernel[0] == 1 && kernel[1] == 1 &&
pad_data[0] == 0 && pad_data[1] == 0) ||
(kernel[0] == 3 && kernel[1] == 3 &&
pad_data[0] <= 1 && pad_data[1] <= 1))) {
setConvMethod(); // NEDirectConvolutionLayer only for 1x1 and 3x3
}
}
void AclParametersByContext(const FusionConvAddParam<DeviceType>& param) {
const Tensor* input = param.Input();
Tensor filter = *param.Filter();
Tensor* output = param.Output();
Tensor* bias;
int groups = param.Groups();
std::vector<int> strides = param.Strides();
std::vector<int> paddings = param.Paddings();
std::vector<int> dilations = param.Dilations();
const T* input_data = input->data<T>();
T* output_data = output->mutable_data<T>();
const T* weight_data = filter.data<T>();
args.input_data = (void*)input_data;
args.output_data = (void*)output_data;
args.weight_data = (void*)weight_data;
args.biases_data = nullptr;
try {
bias = param.Bias();
} catch (const std::exception& e) {
}
if (bias) {
const T* biases_data = bias->data<T>();
args.biases_data = (void*)biases_data;
}
args.num_group = groups;
args.dilation_rows = dilations[0];
args.dilation_cols = dilations[1];
if (dilations[0] != 1 || dilations[1] != 1) {
args.dilated = true;
}
// NCHW
// std::cout << "In dims: " << (input->dims()).size() << std::endl;
args.batch = input->dims()[0];
args.in_depth = input->dims()[1];
args.in_rows = input->dims()[2];
args.in_cols = input->dims()[3];
// std::cout <<"In N: " << args.batch << " C: " << args.in_depth
// << " H: " << args.in_rows << " W: " << args.in_cols << "\n";
// NCHW
// std::cout << "Out dims: " << (output->dims()).size() << std::endl;
args.out_num = output->dims()[0];
args.out_depth = output->dims()[1];
args.out_rows = output->dims()[2];
args.out_cols = output->dims()[3];
// std::cout <<"Out N: " << static_cast<int>(output->dims()[0])
// << " C: " << args.out_depth
// << " H: " << args.out_rows << " W: " << args.out_cols << "\n";
// MCHW = OIHW
args.filter_rows = filter.dims()[2];
args.filter_cols = filter.dims()[3];
// std::cout <<"Filter O: " << static_cast<int>(filter.dims()[0])
// << " I: " << static_cast<int>(filter.dims()[1])
// << " H: " << args.filter_rows << " W: " << args.filter_cols << "\n";
// strides(h_stride, w_stride)
args.stride_rows = strides[0];
args.stride_cols = strides[1];
// std::cout <<"Stride H: " << args.stride_rows << " W: " <<
// args.stride_cols << "\n";
// paddings(h_pad, w_pad)
args.pad_rows = paddings[0];
args.pad_cols = paddings[1];
// std::cout <<"Pad H: " << args.pad_rows << " W: " << args.pad_cols <<
// "\n";
}
acl::AclParameters args;
};
template <>
bool ConvAddKernel<GPU_MALI, float>::Init(FusionConvAddParam<GPU_MALI>* param) {
AclConvAddOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclConvAddOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) {
acl_op = new AclConvAddOp<GPU_MALI, float>();
this->SetAclOp((void*)acl_op, (void*)this);
}
if (acl_op->Bypass_acl(*param)) {
std::cout << "init acl failed" << std::endl;
return false;
}
return true;
}
template <>
void ConvAddKernel<GPU_MALI, float>::Compute(
const FusionConvAddParam<GPU_MALI>& param) {
std::cout << "init acl" << std::endl;
AclConvAddOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclConvAddOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) {
return;
}
acl::AclParameters& args = acl_op->getargs();
acl_op->RunAcl(args.input_data, args.output_data);
}
template class ConvAddKernel<GPU_MALI, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef CONV_OP
#include "operators/kernel/conv_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
class AclConvOp : public acl::ACLOperator {
public:
AclConvOp() {
this->force_bypass_acl_path_ =
bypass_acl_class_layer & FLAGS_ENABLE_ACL_CONV;
}
~AclConvOp() = default;
AclConvOp(const AclConvOp&) = delete;
AclConvOp& operator=(const AclConvOp&) = delete;
AclConvOp(AclConvOp&&) = delete;
AclConvOp& operator=(AclConvOp&&) = delete;
acl::AclParameters& getargs() { return args; }
void InitAclLayer(const ConvParam<DeviceType>& param) {
setTargetHint(acl::TargetHint::OPENCL);
arm_compute::TensorShape input_shape(args.in_cols, args.in_rows,
args.in_depth, args.batch);
arm_compute::TensorShape output_shape(args.out_cols, args.out_rows,
args.out_depth, args.out_num);
arm_compute::TensorShape weights_shape(args.filter_cols, args.filter_rows,
args.in_depth / args.num_group,
args.out_depth);
// arm_compute::TensorShape biases_shape(args.out_depth);
arm_compute::PadStrideInfo conv_info(
args.stride_cols, args.stride_rows, args.pad_cols, args.pad_rows,
arm_compute::DimensionRoundingType::FLOOR);
if (is_operator_init_done(input_shape)) return;
set_operator_init_done();
this->force_bypass_acl_path_ = false;
check_direct_conv();
//[kernel_x, kernel_y, IFM, OFM]
new_tensor(weights(), weights_shape, args.weight_data);
//[OFM]
// if (args.biases_data) {
// new_tensor(biases(),biases_shape,args.biases_data);
//}
group() = args.num_group;
//[width, height, IFM]
new_tensor(input(), input_shape, args.input_data);
//[width, height, OFM]
new_tensor(output(), output_shape, args.output_data);
acl_configure(conv, this, conv_info);
}
void RunAcl(void* input, void* output) {
acl::ACLOperator::acl_run(input, output);
}
bool Bypass_acl(const ConvParam<DeviceType>& param) {
bool bypass_acl = false;
AclParametersByContext(param);
InitAclLayer(param);
// for performance, more groups impact GPU performance
if (this->force_bypass_acl_path_ || args.num_group >= 5) {
bypass_acl = true;
}
if (args.dim > 2) {
bypass_acl = true;
}
if (args.dilated) {
bypass_acl = true;
}
return bypass_acl;
}
private:
void check_direct_conv() {
bool use_direct_conv = false;
const char* pDirectConv;
pDirectConv = getenv("DIRECTCONV");
if (pDirectConv) {
unsigned int bdirectconv;
sscanf(pDirectConv, "%i", &bdirectconv);
if (bdirectconv != use_direct_conv) {
use_direct_conv = bdirectconv;
printf("DIRECTCONV<%s>\n", pDirectConv);
printf("DIRECTCONV: %x\n", use_direct_conv);
}
}
int pad_data[2], kernel[2];
pad_data[1] = args.pad_rows;
pad_data[0] = args.pad_cols;
kernel[1] = args.filter_rows;
kernel[0] = args.filter_cols;
if (use_direct_conv && ((kernel[0] == 1 && kernel[1] == 1 &&
pad_data[0] == 0 && pad_data[1] == 0) ||
(kernel[0] == 3 && kernel[1] == 3 &&
pad_data[0] <= 1 && pad_data[1] <= 1))) {
setConvMethod(); // NEDirectConvolutionLayer only for 1x1 and 3x3
}
}
void AclParametersByContext(const ConvParam<DeviceType>& param) {
const Tensor* input = param.Input();
Tensor filter = *param.Filter();
Tensor* output = param.Output();
int groups = param.Groups();
std::vector<int> strides = param.Strides();
std::vector<int> paddings = param.Paddings();
std::vector<int> dilations = param.Dilations();
const T* input_data = input->data<T>();
T* output_data = output->mutable_data<T>();
const T* weight_data = filter.data<T>();
args.input_data = (void*)input_data;
args.output_data = (void*)output_data;
args.weight_data = (void*)weight_data;
args.biases_data = nullptr;
// try {
// bias = context.Input<framework::Tensor>("Bias");
// } catch (const std::exception& e) {
// }
// if (bias) {
// const T* biases_data = bias->data<T>();
// args.biases_data = (void*)biases_data;
// }
args.num_group = groups;
args.dilation_rows = dilations[0];
args.dilation_cols = dilations[1];
if (dilations[0] != 1 || dilations[1] != 1) {
args.dilated = true;
}
// NCHW
// std::cout << "In dims: " << (input->dims()).size() << std::endl;
args.batch = input->dims()[0];
args.in_depth = input->dims()[1];
args.in_rows = input->dims()[2];
args.in_cols = input->dims()[3];
std::cout << "In N: " << args.batch << " C: " << args.in_depth
<< " H: " << args.in_rows << " W: " << args.in_cols << "\n";
// NCHW
// std::cout << "Out dims: " << (output->dims()).size() << std::endl;
args.out_num = output->dims()[0];
args.out_depth = output->dims()[1];
args.out_rows = output->dims()[2];
args.out_cols = output->dims()[3];
// std::cout <<"Out N: " << static_cast<int>(output->dims()[0])
// << " C: " << args.out_depth
// << " H: " << args.out_rows << " W: " << args.out_cols << "\n";
// MCHW = OIHW
args.filter_rows = filter.dims()[2];
args.filter_cols = filter.dims()[3];
// std::cout <<"Filter O: " << static_cast<int>(filter.dims()[0])
// << " I: " << static_cast<int>(filter.dims()[1])
// << " H: " << args.filter_rows << " W: " << args.filter_cols << "\n";
// strides(h_stride, w_stride)
args.stride_rows = strides[0];
args.stride_cols = strides[1];
// std::cout <<"Stride H: " << args.stride_rows << " W: " <<
// args.stride_cols << "\n";
// paddings(h_pad, w_pad)
args.pad_rows = paddings[0];
args.pad_cols = paddings[1];
// std::cout <<"Pad H: " << args.pad_rows << " W: " << args.pad_cols <<
// "\n";
}
acl::AclParameters args;
};
template <>
bool ConvKernel<GPU_MALI, float>::Init(ConvParam<GPU_MALI>* param) {
AclConvOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclConvOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) {
acl_op = new AclConvOp<GPU_MALI, float>();
this->SetAclOp((void*)acl_op, (void*)this);
}
if (acl_op->Bypass_acl(*param)) {
std::cout << "init acl failed" << std::endl;
return false;
}
return true;
}
template <>
void ConvKernel<GPU_MALI, float>::Compute(const ConvParam<GPU_MALI>& param) {
std::cout << "init acl" << std::endl;
AclConvOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclConvOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) {
return;
}
acl::AclParameters& args = acl_op->getargs();
acl_op->RunAcl(args.input_data, args.output_data);
}
template class ConvKernel<GPU_MALI, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISEADD_OP
#pragma once
#include "operators/kernel/elementwise_add_kernel.h"
namespace paddle_mobile {
namespace operators {
template <typename T>
struct AddFunctor {
inline T operator()(T a, T b) const { return a + b; }
};
template <>
bool ElementwiseAddKernel<GPU_MALI, float>::Init(
ElementwiseAddParam<GPU_MALI> *param) {
return true;
}
template <>
void ElementwiseAddKernel<GPU_MALI, float>::Compute(
const ElementwiseAddParam<GPU_MALI> &param) {
const Tensor *input_x = param.InputX();
const Tensor *input_y = param.InputY();
Tensor *Out = param.Out();
Out->mutable_data<float>();
int axis = param.Axis();
ElementwiseComputeEx<AddFunctor<float>, float>(input_x, input_y, axis,
AddFunctor<float>(), Out);
}
template class ElementwiseAddKernel<GPU_MALI, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/kernel/feed_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool FeedKernel<GPU_MALI, float>::Init(FeedParam<GPU_MALI> *param) {
return true;
}
template <>
void FeedKernel<GPU_MALI, float>::Compute(const FeedParam<GPU_MALI> &param) {
param.Out()->ShareDataWith(*(param.InputX()));
param.Out()->set_lod(param.InputX()->lod());
}
template class FeedKernel<GPU_MALI, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADD_OP
#include "operators/kernel/fetch_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool FetchKernel<GPU_MALI, float>::Init(FetchParam<GPU_MALI> *param) {
return true;
}
template <>
void FetchKernel<GPU_MALI, float>::Compute(const FetchParam<GPU_MALI> &param) {
param.Out()->ShareDataWith(*(param.InputX()));
}
template class FetchKernel<GPU_MALI, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_FC_OP
#include "operators/kernel/fusion_fc_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool FusionFcKernel<GPU_MALI, float>::Init(FusionFcParam<GPU_MALI> *param) {
return true;
}
template <>
void FusionFcKernel<GPU_MALI, float>::Compute(
const FusionFcParam<GPU_MALI> &param) {
const Tensor *input_x = param.InputX();
const Tensor *input_y = param.InputY();
const Tensor *input_z = param.InputZ();
auto *input_z_data = input_z->data<float>();
int axis = param.Axis();
Tensor *out = param.Out();
auto *out_data = out->mutable_data<float>();
const Tensor x_matrix =
input_x->dims().size() > 2
? framework::ReshapeToMatrix(*input_x, param.XNumColDims())
: *input_x;
const Tensor y_matrix =
input_y->dims().size() > 2
? framework::ReshapeToMatrix(*input_y, param.YNumColDims())
: *input_y;
auto out_dim = out->dims();
if (out_dim.size() != 2) {
out->Resize({x_matrix.dims()[0], y_matrix.dims()[1]});
}
PADDLE_MOBILE_ENFORCE(out_dim.size() == 2, " out_dim.size must be 2.");
PADDLE_MOBILE_ENFORCE(input_z->dims().size() == 1, "inpu_z size must be 1");
PADDLE_MOBILE_ENFORCE(out_dim[1] == input_z->dims()[0],
" out_dim.size must be 2.");
axis = (axis == -1 ? out_dim.size() - input_z->dims().size() : axis);
PADDLE_MOBILE_ENFORCE(axis == 1, " to fit broadcast, axis = 1. ")
int64_t classes = input_z->numel();
for (int i = 0; i < out_dim[0]; i++) {
memory::Copy(out_data + i * classes, input_z_data, sizeof(float) * classes);
}
for (int i = 0; i < out->numel(); i++) {
DLOG << out_data[i];
}
math::MatMul<float>(x_matrix, false, y_matrix, false, static_cast<float>(1),
out, static_cast<float>(1));
PADDLE_MOBILE_ENFORCE(out_dim.size() == 2, " out_dim.size must be 2.");
// if (out_dim.size() != 2) {
// out->Resize(out_dim);
// }
}
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef LRN_OP
#pragma once
#include "operators/kernel/lrn_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/kernel/central-arm-func/lrn_arm_func.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
class AclLrnOp : public acl::ACLOperator {
public:
AclLrnOp() {
this->force_bypass_acl_path_ =
bypass_acl_class_layer & FLAGS_ENABLE_ACL_LRN;
}
~AclLrnOp() = default;
AclLrnOp(const AclLrnOp&) = delete;
AclLrnOp& operator=(const AclLrnOp&) = delete;
AclLrnOp(AclLrnOp&&) = delete;
AclLrnOp& operator=(AclLrnOp&&) = delete;
acl::AclParameters& getargs() { return args; }
void InitAclLayer(const LrnParam<DeviceType>& param) {
setTargetHint(acl::TargetHint::OPENCL);
arm_compute::TensorShape shape(args.in_cols, args.in_rows, args.in_depth);
if (is_operator_init_done(shape)) return;
set_operator_init_done();
this->force_bypass_acl_path_ = false;
arm_compute::NormalizationLayerInfo norm_info(
arm_compute::NormType::CROSS_MAP, args.nsize, args.alpha, args.beta,
args.knorm);
//[width, height, IFM]
new_tensor(input(), shape, args.input_data);
//[width, height, OFM]
new_tensor(output(), shape, args.output_data);
acl_configure(lrn, this, norm_info);
}
void Set_bypass(bool bypass) { args.is_bypass = bypass; }
void RunAcl(void* input, void* output) {
acl::ACLOperator::acl_run(input, output);
}
bool Bypass_acl(const LrnParam<DeviceType>& param) {
bool bypass_acl = false;
AclParametersByContext(param);
InitAclLayer(param);
// for performance, more groups impact GPU performance
if (this->force_bypass_acl_path_) {
bypass_acl = true;
}
return bypass_acl;
}
private:
void AclParametersByContext(const LrnParam<DeviceType>& param) {
const Tensor* in_x = param.InputX();
Tensor* out = param.Out();
int n = param.N();
T alpha = param.Alpha();
T beta = param.Beta();
T k = param.K();
const T* input_data = in_x->data<T>();
T* output_data = out->mutable_data<T>();
args.input_data = (void*)input_data;
args.output_data = (void*)output_data;
args.nsize = n;
args.alpha = alpha;
args.beta = beta;
args.knorm = k;
// NCHW
args.batch = in_x->dims()[0];
args.in_depth = in_x->dims()[1];
args.in_rows = in_x->dims()[2];
args.in_cols = in_x->dims()[3];
// std::cout
// << "Out C: " << args.out_depth
// << " H: " << args.out_rows << " W: " << args.out_cols << "\n";
}
acl::AclParameters args;
};
template <>
bool LrnKernel<GPU_MALI, float>::Init(LrnParam<GPU_MALI>* param) {
AclLrnOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclLrnOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) {
acl_op = new AclLrnOp<GPU_MALI, float>();
this->SetAclOp((void*)acl_op, (void*)this);
}
if (acl_op->Bypass_acl(*param)) {
acl_op->Set_bypass(true);
std::cout << "init acl failed" << std::endl;
return true;
}
return true;
}
template <>
void LrnKernel<GPU_MALI, float>::Compute(const LrnParam<GPU_MALI>& param) {
std::cout << "init acl" << std::endl;
AclLrnOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclLrnOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) {
return;
}
acl::AclParameters& args = acl_op->getargs();
if (args.is_bypass) {
std::cout << "bypass op" << std::endl;
LrnCompute<float>(param);
return;
}
const float* input_data = (const float*)args.input_data;
const float* output_data = (const float*)args.output_data;
for (int n = 0; n < args.batch; ++n) {
acl_op->RunAcl((void*)input_data, (void*)output_data);
input_data += args.in_depth * args.in_cols * args.in_rows;
output_data += args.in_depth * args.in_cols * args.in_rows;
}
}
template class LrnKernel<GPU_MALI, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef MUL_OP
#pragma once
#include "operators/kernel/mul_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool MulKernel<GPU_MALI, float>::Init(MulParam<GPU_MALI> *param) {
return true;
}
template <>
void MulKernel<GPU_MALI, float>::Compute(const MulParam<GPU_MALI> &param) {
const Tensor *input_x = param.InputX();
const Tensor *input_y = param.InputY();
Tensor *out = param.Out();
out->mutable_data<float>();
const Tensor x_matrix =
input_x->dims().size() > 2
? framework::ReshapeToMatrix(*input_x, param.XNumColDims())
: *input_x;
const Tensor y_matrix =
input_y->dims().size() > 2
? framework::ReshapeToMatrix(*input_y, param.YNumColDims())
: *input_y;
auto out_dim = out->dims();
if (out_dim.size() != 2) {
out->Resize({x_matrix.dims()[0], y_matrix.dims()[1]});
}
math::MatMul<float>(x_matrix, false, y_matrix, false, static_cast<float>(1),
out, static_cast<float>(0));
if (out_dim.size() != 2) {
out->Resize(out_dim);
}
}
template class MulKernel<GPU_MALI, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef POOL_OP
#pragma once
#include "operators/kernel/pool_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
class AclPoolOp : public acl::ACLOperator {
public:
AclPoolOp() {
this->force_bypass_acl_path_ =
bypass_acl_class_layer & FLAGS_ENABLE_ACL_POOLING;
}
~AclPoolOp() = default;
AclPoolOp(const AclPoolOp&) = delete;
AclPoolOp& operator=(const AclPoolOp&) = delete;
AclPoolOp(AclPoolOp&&) = delete;
AclPoolOp& operator=(AclPoolOp&&) = delete;
acl::AclParameters& getargs() { return args; }
void InitAclLayer(const PoolParam<DeviceType>& param) {
setTargetHint(acl::TargetHint::OPENCL);
arm_compute::TensorShape input_shape(args.in_cols, args.in_rows,
args.in_depth);
arm_compute::TensorShape output_shape(args.out_cols, args.out_rows,
args.out_depth);
// arm_compute::TensorShape weights_shape(
// args.filter_cols, args.filter_rows, args.in_depth, args.out_depth);
// arm_compute::TensorShape biases_shape(args.out_depth);
arm_compute::PoolingLayerInfo pool_info;
if (args.pool_type == "max") {
pool_info = arm_compute::PoolingLayerInfo(
arm_compute::PoolingType::MAX, args.filter_rows,
arm_compute::PadStrideInfo(args.stride_cols, args.stride_rows,
args.pad_cols, args.pad_rows,
arm_compute::DimensionRoundingType::CEIL));
} else {
pool_info = arm_compute::PoolingLayerInfo(
arm_compute::PoolingType::AVG, args.filter_rows,
arm_compute::PadStrideInfo(args.stride_cols, args.stride_rows,
args.pad_cols, args.pad_rows,
arm_compute::DimensionRoundingType::CEIL));
}
if (is_operator_init_done(input_shape)) return;
set_operator_init_done();
this->force_bypass_acl_path_ = false;
//[width, height, IFM]
new_tensor(input(), input_shape, args.input_data);
//[width, height, OFM]
new_tensor(output(), output_shape, args.output_data);
acl_configure(pooling, this, pool_info);
}
void RunAcl(void* input, void* output) {
acl::ACLOperator::acl_run(input, output);
}
bool Bypass_acl(const PoolParam<DeviceType>& param) {
bool bypass_acl = false;
AclParametersByContext(param);
InitAclLayer(param);
// for performance, more groups impact GPU performance
if (this->force_bypass_acl_path_) {
bypass_acl = true;
}
if (args.pool_type != "max" && args.pool_type != "avg") {
bypass_acl = true;
}
if (args.filter_rows != args.filter_cols) {
bypass_acl = true;
}
// if (args.filter_rows!=2 && args.filter_rows!=3) {
// bypass_acl = true;
// }
return bypass_acl;
}
private:
void AclParametersByContext(const PoolParam<DeviceType>& param) {
const Tensor* in_x = param.Input();
Tensor* out = param.Output();
std::string pooling_type = param.PoolingType();
std::vector<int> ksize = param.Ksize();
std::vector<int> strides = param.Strides();
std::vector<int> paddings = param.Paddings();
bool is_global_pooling = param.isGlobalPooling();
const T* input_data = in_x->data<T>();
T* output_data = out->mutable_data<T>();
args.input_data = (void*)input_data;
args.output_data = (void*)output_data;
args.is_global_pool = is_global_pooling;
args.pool_type = pooling_type;
args.filter_rows = ksize[0];
args.filter_cols = ksize[1];
args.dim = ksize.size();
// NCHW
args.batch = in_x->dims()[0];
args.in_depth = in_x->dims()[1];
args.in_rows = in_x->dims()[2];
args.in_cols = in_x->dims()[3];
// std::cout <<"In N: " << args.batch << " C: " << args.in_depth
// << " H: " << args.in_rows << " W: " << args.in_cols << "\n";
// NCHW
// std::cout <<"Out N: " << static_cast<int>(output->dims()[0])
// << " C: " << args.out_depth
// << " H: " << args.out_rows << " W: " << args.out_cols << "\n";
// MCHW = OIHW
// std::cout <<"Filter O: " << static_cast<int>(filter->dims()[0])
// << " I: " << static_cast<int>(filter->dims()[1])
// << " H: " << args.filter_rows << " W: " << args.filter_cols << "\n";
// strides(h_stride, w_stride)
args.stride_rows = strides[0];
args.stride_cols = strides[1];
// std::cout <<"PoolingType: " << args.pool_type << "\n";
// std::cout <<"Stride H: " << args.stride_rows << " W: " <<
// args.stride_cols << "\n";
// paddings(h_pad, w_pad)
args.pad_rows = paddings[0];
args.pad_cols = paddings[1];
// std::cout <<"Pad H: " << args.pad_rows << " W: " << args.pad_cols <<
// "\n";
args.out_depth = args.in_depth;
// args.out_rows = out->dims()[2];
// args.out_cols = out->dims()[3];
args.out_rows = static_cast<int>(ceil(static_cast<float>(args.in_rows +
2 * args.pad_rows -
args.filter_rows) /
args.stride_rows)) +
1;
args.out_cols = static_cast<int>(ceil(static_cast<float>(args.in_cols +
2 * args.pad_cols -
args.filter_cols) /
args.stride_cols)) +
1;
if (is_global_pooling) {
args.filter_rows = args.in_rows;
args.filter_cols = args.in_cols;
args.pad_rows = 0;
args.pad_cols = 0;
}
}
acl::AclParameters args;
};
template <>
bool PoolKernel<GPU_MALI, float>::Init(PoolParam<GPU_MALI>* param) {
AclPoolOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclPoolOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) {
acl_op = new AclPoolOp<GPU_MALI, float>();
this->SetAclOp((void*)acl_op, (void*)this);
}
if (acl_op->Bypass_acl(*param)) {
std::cout << "init acl failed" << std::endl;
return false;
}
return true;
}
template <>
void PoolKernel<GPU_MALI, float>::Compute(const PoolParam<GPU_MALI>& param) {
std::cout << "init acl" << std::endl;
AclPoolOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclPoolOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) {
return;
}
acl::AclParameters& args = acl_op->getargs();
const float* input_data = (const float*)args.input_data;
const float* output_data = (const float*)args.output_data;
for (int n = 0; n < args.batch; ++n) {
acl_op->RunAcl((void*)input_data, (void*)output_data);
input_data += args.in_depth * args.in_cols * args.in_rows;
output_data += args.in_depth * args.out_cols * args.out_rows;
}
}
template class PoolKernel<GPU_MALI, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef RELU_OP
#pragma once
#include "operators/kernel/relu_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
class AclReluOp : public acl::ACLOperator {
public:
AclReluOp() {
this->force_bypass_acl_path_ =
bypass_acl_class_layer & FLAGS_ENABLE_ACL_RELU;
}
~AclReluOp() = default;
AclReluOp(const AclReluOp&) = delete;
AclReluOp& operator=(const AclReluOp&) = delete;
AclReluOp(AclReluOp&&) = delete;
AclReluOp& operator=(AclReluOp&&) = delete;
acl::AclParameters& getargs() { return args; }
void InitAclLayer(const ReluParam<DeviceType>& param) {
setTargetHint(acl::TargetHint::OPENCL);
arm_compute::TensorShape input_shape(args.in_cols, args.in_rows,
args.in_depth, args.batch);
arm_compute::TensorShape output_shape(args.in_cols, args.in_rows,
args.in_depth, args.out_num);
// arm_compute::TensorShape weights_shape(
// args.filter_cols, args.filter_rows, args.in_depth, args.out_depth);
// arm_compute::TensorShape biases_shape(args.out_depth);
arm_compute::ActivationLayerInfo::ActivationFunction type;
type = arm_compute::ActivationLayerInfo::ActivationFunction::RELU;
arm_compute::ActivationLayerInfo act_info(type);
if (is_operator_init_done(input_shape)) return;
set_operator_init_done();
this->force_bypass_acl_path_ = false;
//[width, height, IFM]
new_tensor(input(), input_shape, args.input_data);
//[width, height, OFM]
new_tensor(output(), output_shape, args.output_data);
acl_configure(activation, this, act_info);
}
void RunAcl(void* input, void* output) {
acl::ACLOperator::acl_run(input, output);
}
bool Bypass_acl(const ReluParam<DeviceType>& param) {
bool bypass_acl = false;
AclParametersByContext(param);
InitAclLayer(param);
// for performance, more groups impact GPU performance
if (this->force_bypass_acl_path_) {
bypass_acl = true;
}
return bypass_acl;
}
private:
void AclParametersByContext(const ReluParam<DeviceType>& param) {
const auto* input_x = param.InputX();
auto* out = param.Out();
const T* input_data = input_x->data<T>();
T* output_data = out->mutable_data<T>();
args.input_data = (void*)input_data;
args.output_data = (void*)output_data;
args.batch = input_x->dims()[0];
args.in_depth = input_x->dims()[1];
args.in_rows = input_x->dims()[2];
args.in_cols = input_x->dims()[3];
args.out_num = out->dims()[0];
}
acl::AclParameters args;
};
template <>
bool ReluKernel<GPU_MALI, float>::Init(ReluParam<GPU_MALI>* param) {
AclReluOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclReluOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) {
acl_op = new AclReluOp<GPU_MALI, float>();
this->SetAclOp((void*)acl_op, (void*)this);
}
if (acl_op->Bypass_acl(*param)) {
std::cout << "init acl failed" << std::endl;
return false;
}
return true;
}
template <>
void ReluKernel<GPU_MALI, float>::Compute(const ReluParam<GPU_MALI>& param) {
std::cout << "init acl" << std::endl;
AclReluOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclReluOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) {
return;
}
acl::AclParameters& args = acl_op->getargs();
acl_op->RunAcl(args.input_data, args.output_data);
}
template class ReluKernel<GPU_MALI, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef RESHAPE_OP
#pragma once
#include "operators/kernel/reshape_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ReshapeKernel<GPU_MALI, float>::Init(ReshapeParam<GPU_MALI> *param) {
return true;
}
template <>
void ReshapeKernel<GPU_MALI, float>::Compute(
const ReshapeParam<GPU_MALI> &param) {
const auto *input_x = param.InputX();
const auto &input_x_dims = input_x->dims();
auto *out = param.Out();
framework::DDim out_dims = out->dims();
const auto *input_shape = param.InputShape();
if (input_shape) {
auto *shape_data = input_shape->data<int>();
framework::Tensor cpu_shape_tensor;
auto shape =
std::vector<int>(shape_data, shape_data + input_shape->numel());
out_dims = ValidateShape(shape, input_x->dims());
}
bool inplace = param.Inplace();
out->Resize(out_dims);
if (!inplace) {
out->mutable_data<float>();
framework::TensorCopy(*input_x, out);
out->Resize(out_dims);
} else {
out->ShareDataWith(*input_x);
out->Resize(out_dims);
}
}
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SOFTMAX_OP
#pragma once
#include "operators/kernel/softmax_kernel.h"
#ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
class AclSoftmaxOp : public acl::ACLOperator {
public:
AclSoftmaxOp() {
this->force_bypass_acl_path_ =
bypass_acl_class_layer & FLAGS_ENABLE_ACL_SOFTMAX;
}
~AclSoftmaxOp() = default;
AclSoftmaxOp(const AclSoftmaxOp&) = delete;
AclSoftmaxOp& operator=(const AclSoftmaxOp&) = delete;
AclSoftmaxOp(AclSoftmaxOp&&) = delete;
AclSoftmaxOp& operator=(AclSoftmaxOp&&) = delete;
acl::AclParameters& getargs() { return args; }
void InitAclLayer(const SoftmaxParam<DeviceType>& param) {
setTargetHint(acl::TargetHint::OPENCL);
arm_compute::TensorShape shape(args.in_depth, args.batch);
if (is_operator_init_done(shape)) return;
set_operator_init_done();
this->force_bypass_acl_path_ = false;
//[width, height, IFM]
new_tensor(input(), shape, args.input_data);
//[width, height, OFM]
new_tensor(output(), shape, args.output_data);
acl_configure(softmax, this, NULL);
}
void RunAcl(void* input, void* output) {
acl::ACLOperator::acl_run(input, output);
}
bool Bypass_acl(const SoftmaxParam<DeviceType>& param) {
bool bypass_acl = false;
AclParametersByContext(param);
InitAclLayer(param);
// for performance, more groups impact GPU performance
if (this->force_bypass_acl_path_) {
bypass_acl = true;
}
return bypass_acl;
}
private:
void AclParametersByContext(const SoftmaxParam<DeviceType>& param) {
const framework::Tensor* in_x = param.InputX();
framework::Tensor* out = param.Out();
auto x_dims = in_x->dims();
out->Resize(x_dims);
const T* input_data = in_x->data<T>();
T* output_data = out->data<T>();
args.input_data = (void*)input_data;
args.output_data = (void*)output_data;
// NCHW
args.batch = in_x->dims()[0];
args.in_depth = in_x->dims()[1];
args.out_num = out->dims()[0];
// std::cout
// << "Out C: " << args.out_depth
// << " H: " << args.out_rows << " W: " << args.out_cols << "\n";
}
acl::AclParameters args;
};
template <>
bool SoftmaxKernel<GPU_MALI, float>::Init(SoftmaxParam<GPU_MALI>* param) {
AclSoftmaxOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclSoftmaxOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) {
acl_op = new AclSoftmaxOp<GPU_MALI, float>();
this->SetAclOp((void*)acl_op, (void*)this);
}
if (acl_op->Bypass_acl(*param)) {
std::cout << "init acl failed" << std::endl;
return false;
}
return true;
}
template <>
void SoftmaxKernel<GPU_MALI, float>::Compute(
const SoftmaxParam<GPU_MALI>& param) {
std::cout << "init acl" << std::endl;
AclSoftmaxOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclSoftmaxOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) {
return;
}
acl::AclParameters& args = acl_op->getargs();
const float* input_data = (const float*)args.input_data;
const float* output_data = (const float*)args.output_data;
for (int n = 0; n < args.out_num; ++n) {
acl_op->RunAcl((void*)input_data, (void*)output_data);
input_data += args.in_depth;
output_data += args.in_depth;
}
}
template class SoftmaxKernel<GPU_MALI, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
#endif
......@@ -59,8 +59,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(lookup_table, ops::LookupOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
......
......@@ -35,8 +35,5 @@ REGISTER_OPERATOR_CPU(lrn, ops::LrnOp);
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(lrn, ops::LrnOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(lrn, ops::LrnOp);
#endif
#endif
......@@ -61,9 +61,6 @@ REGISTER_OPERATOR_CPU(mul, ops::MulOp);
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(mul, ops::MulOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(mul, ops::MulOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(mul, ops::MulOp);
#endif
......
......@@ -41,8 +41,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(norm, ops::NormOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
......
......@@ -63,9 +63,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(pool2d, ops::PoolOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(pool2d, ops::PoolOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(pool2d, ops::PoolOp);
#endif
......
......@@ -36,8 +36,5 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(prelu, ops::PReluOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(prelu, ops::PReluOp);
#endif
#endif
......@@ -52,8 +52,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(prior_box, ops::PriorBoxOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(prior_box, ops::PriorBoxOp);
#endif
......
......@@ -40,9 +40,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(reshape2, ops::Reshape2Op);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(reshape2, ops::Reshape2Op);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(reshape2, ops::Reshape2Op);
#endif
......
......@@ -35,9 +35,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(reshape, ops::ReshapeOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(reshape, ops::ReshapeOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(reshape, ops::ReshapeOp);
#endif
......
......@@ -32,8 +32,5 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(resize, ops::ResizeOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(resize, ops::ResizeOp);
#endif
#endif
......@@ -32,8 +32,5 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(scale, ops::ScaleOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(scale, ops::ScaleOp);
#endif
#endif
......@@ -34,7 +34,5 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(shape, ops::ShapeOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#endif
......@@ -31,9 +31,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(slice, ops::SliceOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(slice, ops::SliceOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(slice, ops::SliceOp);
#endif
......
......@@ -31,9 +31,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(softmax, ops::SoftmaxOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(softmax, ops::SoftmaxOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(softmax, ops::SoftmaxOp);
#endif
......
......@@ -61,9 +61,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(sum, ops::SumOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(sum, ops::ConcatOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册