From d152ec5f12b48418ae25459c1bf4eb60e537ccb9 Mon Sep 17 00:00:00 2001 From: zhaojiaying01 Date: Mon, 27 May 2019 11:56:02 +0800 Subject: [PATCH] add fusion_conv_relu op for CPU and GPU_CL --- src/common/types.cpp | 2 + src/common/types.h | 1 + src/framework/load_ops.h | 4 + src/operators/fusion_conv_relu_op.cpp | 64 ++++++++++ src/operators/fusion_conv_relu_op.h | 66 ++++++++++ .../kernel/arm/activation_kernel.cpp | 81 +----------- .../arm/convolution/conv_relu_kernel.cpp | 63 ++++++++++ .../central-arm-func/activation_arm_func.h | 107 ++++++++++++++++ src/operators/kernel/cl/conv_relu_kernel.cpp | 117 ++++++++++++++++++ src/operators/kernel/conv_relu_kernel.h | 42 +++++++ src/operators/op_param.h | 16 +++ tools/op.cmake | 5 + 12 files changed, 488 insertions(+), 80 deletions(-) create mode 100644 src/operators/fusion_conv_relu_op.cpp create mode 100644 src/operators/fusion_conv_relu_op.h create mode 100644 src/operators/kernel/arm/convolution/conv_relu_kernel.cpp create mode 100644 src/operators/kernel/central-arm-func/activation_arm_func.h create mode 100644 src/operators/kernel/cl/conv_relu_kernel.cpp create mode 100644 src/operators/kernel/conv_relu_kernel.h diff --git a/src/common/types.cpp b/src/common/types.cpp index 20462ece92..d71befca20 100644 --- a/src/common/types.cpp +++ b/src/common/types.cpp @@ -31,6 +31,7 @@ const char *G_OP_TYPE_FUSION_CONV_ADD_ADD_PRELU = "fusion_conv_add_add_prelu"; const char *G_OP_TYPE_FUSION_CONV_ADD_BN_RELU = "fusion_conv_add_bn_relu"; const char *G_OP_TYPE_FUSION_CONV_BN_ADD_RELU = "fusion_conv_bn_add_relu"; const char *G_OP_TYPE_FUSION_DWCONV_BN_RELU = "fusion_dwconv_bn_relu"; +const char *G_OP_TYPE_FUSION_CONV_RELU = "fusion_conv_relu"; const char *G_OP_TYPE_FUSION_CONV_BN_RELU = "fusion_conv_bn_relu"; const char *G_OP_TYPE_FC = "fusion_fc"; const char *G_OP_TYPE_FUSION_CONV_ADD = "fusion_conv_add"; @@ -125,6 +126,7 @@ std::unordered_map< op_input_output_key = { {G_OP_TYPE_CONV, {{"Input"}, {"Output"}}}, {G_OP_TYPE_FUSION_DWCONV_BN_RELU, {{"Input"}, {"Out"}}}, + {G_OP_TYPE_FUSION_CONV_RELU, {{"Input"}, {"Out"}}}, {G_OP_TYPE_FUSION_CONV_BN_RELU, {{"Input"}, {"Out"}}}, {G_OP_TYPE_PRELU, {{"X", "Alpha"}, {"Out"}}}, {G_OP_TYPE_FUSION_CONV_ADD, {{"Input"}, {"Out"}}}, diff --git a/src/common/types.h b/src/common/types.h index b59a8df2df..9f0e792299 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -151,6 +151,7 @@ extern const char *G_OP_TYPE_FUSION_CONV_ADD_BN_RELU; extern const char *G_OP_TYPE_FUSION_CONV_BN_ADD_RELU; extern const char *G_OP_TYPE_FUSION_DWCONV_BN_RELU; extern const char *G_OP_TYPE_FUSION_CONV_BN_RELU; +extern const char *G_OP_TYPE_FUSION_CONV_RELU; extern const char *G_OP_TYPE_GRU; extern const char *G_OP_TYPE_GRU_UNIT; diff --git a/src/framework/load_ops.h b/src/framework/load_ops.h index 44f4f650b0..741b1402b1 100644 --- a/src/framework/load_ops.h +++ b/src/framework/load_ops.h @@ -168,6 +168,10 @@ LOAD_FUSION_MATCHER(fusion_conv_bn_add_relu); LOAD_OP3(fusion_conv_bn_relu, CPU, GPU_CL, FPGA); LOAD_FUSION_MATCHER(fusion_conv_bn_relu); #endif +#ifdef FUSION_CONVRELU_OP +LOAD_OP2(fusion_conv_relu, CPU, GPU_CL); +LOAD_FUSION_MATCHER(fusion_conv_relu); +#endif #ifdef GRU_OP LOAD_OP1(gru, CPU); #endif diff --git a/src/operators/fusion_conv_relu_op.cpp b/src/operators/fusion_conv_relu_op.cpp new file mode 100644 index 0000000000..d403ceae2f --- /dev/null +++ b/src/operators/fusion_conv_relu_op.cpp @@ -0,0 +1,64 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_CONVRELU_OP + +#include "operators/fusion_conv_relu_op.h" +#include "operators/kernel/central-arm-func/conv_arm_func.h" + +namespace paddle_mobile { +namespace operators { + +template +void FusionConvReluOp::InferShape() const { + auto in_dims = this->param_.Input()->dims(); + auto filter_dims = this->param_.Filter()->dims(); + const std::vector &strides = this->param_.Strides(); + std::vector paddings = this->param_.Paddings(); + int groups = this->param_.Groups(); + std::vector dilations = this->param_.Dilations(); + + PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() && + dilations.size() == paddings.size() && + paddings.size() == strides.size()), + "ConvParam is not suitable"); + + std::vector output_shape({in_dims[0], filter_dims[0]}); + for (size_t i = 0; i < strides.size(); ++i) { + output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], + dilations[i], paddings[i], + strides[i])); + } + + framework::DDim ddim = framework::make_ddim(output_shape); + this->param_.Output()->Resize(ddim); +} + +} // namespace operators +} // namespace paddle_mobile + +namespace ops = paddle_mobile::operators; +REGISTER_FUSION_MATCHER(fusion_conv_relu, ops::FusionConvReluMatcher); + +#ifdef PADDLE_MOBILE_CPU +REGISTER_OPERATOR_CPU(fusion_conv_relu, ops::FusionConvReluOp); +#endif +#ifdef PADDLE_MOBILE_CL +REGISTER_OPERATOR_CL(fusion_conv_relu, ops::FusionConvReluOp); +#endif +#if defined(PADDLE_MOBILE_FPGA) || defined(PADDLE_MOBILE_FPGA_KD) +REGISTER_OPERATOR_FPGA(fusion_conv_relu, ops::FusionConvReluOp); +#endif + +#endif diff --git a/src/operators/fusion_conv_relu_op.h b/src/operators/fusion_conv_relu_op.h new file mode 100644 index 0000000000..6444b6b739 --- /dev/null +++ b/src/operators/fusion_conv_relu_op.h @@ -0,0 +1,66 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_CONVRELU_OP + +#pragma once + +#include +#include +#include "framework/operator.h" +#include "framework/program/program-optimize/fusion_op_register.h" +#include "operators/kernel/conv_relu_kernel.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +class FusionConvReluMatcher : public framework::FusionOpMatcher { + public: + FusionConvReluMatcher() { + node_ = framework::Node(G_OP_TYPE_CONV); + node_ > std::make_shared(G_OP_TYPE_RELU); + } + + void FolderNodes( + framework::Node *node, + std::vector> *removed_nodes) { + node->Folder(node_.Depth(), Type(), {}, removed_nodes); + } + std::string Type() { return G_OP_TYPE_FUSION_CONV_RELU; } +}; + +template +class FusionConvReluOp : public framework::OperatorWithKernel< + DeviceType, FusionConvReluParam, + operators::ConvReluKernel> { + public: + FusionConvReluOp(const string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, + const framework::AttributeMap &attrs, + framework::Scope *scope) + : framework::OperatorWithKernel, + operators::ConvReluKernel>( + type, inputs, outputs, attrs, scope) {} + + void InferShape() const override; + + protected: +}; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/arm/activation_kernel.cpp b/src/operators/kernel/arm/activation_kernel.cpp index 37c31f6ac0..d5343e5a04 100644 --- a/src/operators/kernel/arm/activation_kernel.cpp +++ b/src/operators/kernel/arm/activation_kernel.cpp @@ -14,6 +14,7 @@ limitations under the License. */ #include "operators/kernel/activation_kernel.h" #include "common/types.h" +#include "operators/kernel/central-arm-func/activation_arm_func.h" #include "operators/math/activation.h" #if defined(__ARM_NEON__) || defined(__ARM_NEON) #include @@ -22,86 +23,6 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { -template -struct ActivationCompute { - void operator()(const Tensor *input, Tensor *output) {} - void operator()(const Tensor *input, Tensor *output, float alpha) {} -}; - -template -struct ActivationCompute { - void operator()(const Tensor *input, Tensor *output) { - const float *x = input->data(); - float *y = output->mutable_data(); - size_t remain = input->numel(); -#if defined(__ARM_NEON__) || defined(__ARM_NEON) - size_t loop = remain >> 4; - remain = remain & 0xF; - -#pragma omp parallel for - for (size_t i = 0; i < loop; ++i) { - const float *local_x = x + (i << 4); - float *local_y = y + (i << 4); - float32x4_t r0 = vld1q_f32(local_x); - float32x4_t r1 = vld1q_f32(local_x + 4); - float32x4_t r2 = vld1q_f32(local_x + 8); - float32x4_t r3 = vld1q_f32(local_x + 12); - r0 = math::vActiveq_f32(r0); - r1 = math::vActiveq_f32(r1); - r2 = math::vActiveq_f32(r2); - r3 = math::vActiveq_f32(r3); - vst1q_f32(local_y, r0); - vst1q_f32(local_y + 4, r1); - vst1q_f32(local_y + 8, r2); - vst1q_f32(local_y + 12, r3); - } - x += (loop << 4); - y += (loop << 4); -#endif - for (size_t i = 0; i < remain; ++i) { - y[i] = math::Active(x[i]); - } - } - - void operator()(const Tensor *input, Tensor *output, float falpha) { - const float *x = input->data(); - float *y = output->mutable_data(); - size_t remain = input->numel(); - float alphas[4] = {falpha, falpha, falpha, falpha}; -#if defined(__ARM_NEON__) || defined(__ARM_NEON) - size_t loop = remain >> 4; - remain = remain & 0xF; - -#pragma omp parallel for - for (size_t i = 0; i < loop; ++i) { - const float *local_x = x + (i << 4); - float *local_y = y + (i << 4); - float32x4_t r0 = vld1q_f32(local_x); - float32x4_t r1 = vld1q_f32(local_x + 4); - float32x4_t r2 = vld1q_f32(local_x + 8); - float32x4_t r3 = vld1q_f32(local_x + 12); - float32x4_t a_r0 = vld1q_f32(alphas); - float32x4_t a_r1 = vld1q_f32(alphas); - float32x4_t a_r2 = vld1q_f32(alphas); - float32x4_t a_r3 = vld1q_f32(alphas); - r0 = math::vActiveq_f32(r0, a_r0); - r1 = math::vActiveq_f32(r1, a_r1); - r2 = math::vActiveq_f32(r2, a_r2); - r3 = math::vActiveq_f32(r3, a_r3); - vst1q_f32(local_y, r0); - vst1q_f32(local_y + 4, r1); - vst1q_f32(local_y + 8, r2); - vst1q_f32(local_y + 12, r3); - } - x += (loop << 4); - y += (loop << 4); -#endif - for (size_t i = 0; i < remain; ++i) { - y[i] = math::Active(x[i], falpha); - } - } -}; - #ifdef RELU_OP template <> bool ReluKernel::Init(ReluParam *param) { diff --git a/src/operators/kernel/arm/convolution/conv_relu_kernel.cpp b/src/operators/kernel/arm/convolution/conv_relu_kernel.cpp new file mode 100644 index 0000000000..58c00dabcb --- /dev/null +++ b/src/operators/kernel/arm/convolution/conv_relu_kernel.cpp @@ -0,0 +1,63 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_CONVRELU_OP + +#include "operators/kernel/conv_relu_kernel.h" +#include "operators/kernel/arm/convolution/conv_common.h" +#include "operators/kernel/central-arm-func/activation_arm_func.h" +#include "operators/kernel/central-arm-func/conv_arm_func.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool ConvReluKernel::Init(FusionConvReluParam *param) { + InitBaseConvKernel(param); + return true; +} + +template <> +void ConvReluKernel::Compute( + const FusionConvReluParam ¶m) { + switch (param.ExecMode()) { + case ConvParam::EXEC_DEPTHWISE3x3S1_FLOAT: + case ConvParam::EXEC_DEPTHWISE3x3S2_FLOAT: + DepthwiseConv3x3(param); + break; + case ConvParam::EXEC_DEPTHWISE5x5_FLOAT: + DepthwiseConv5x5(param); + break; + case ConvParam::EXEC_WINOGRAD3X3_FLOAT: + WinogradConv3x3<8, 3>(param); + break; + case ConvParam::EXEC_GEMM_FLOAT: + GemmConv(param); + break; + case ConvParam::EXEC_SLIDINGWINDOW3x3S1_FLOAT: + case ConvParam::EXEC_SLIDINGWINDOW3x3S2_FLOAT: + SlidingwindowConv3x3(param); + break; + default: + PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d", + param.ExecMode()); + } + ActivationCompute()(param.Output(), param.Output()); +} +template class ConvReluKernel; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/central-arm-func/activation_arm_func.h b/src/operators/kernel/central-arm-func/activation_arm_func.h new file mode 100644 index 0000000000..07663ae2ae --- /dev/null +++ b/src/operators/kernel/central-arm-func/activation_arm_func.h @@ -0,0 +1,107 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "operators/math/activation.h" +#include "operators/op_param.h" +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#include +#endif // __ARM_NEON__ + +namespace paddle_mobile { +namespace operators { + +template +struct ActivationCompute { + void operator()(const Tensor *input, Tensor *output) {} + void operator()(const Tensor *input, Tensor *output, float alpha) {} +}; + +template +struct ActivationCompute { + void operator()(const Tensor *input, Tensor *output) { + const float *x = input->data(); + float *y = output->mutable_data(); + size_t remain = input->numel(); +#if defined(__ARM_NEON__) || defined(__ARM_NEON) + size_t loop = remain >> 4; + remain = remain & 0xF; + +#pragma omp parallel for + for (size_t i = 0; i < loop; ++i) { + const float *local_x = x + (i << 4); + float *local_y = y + (i << 4); + float32x4_t r0 = vld1q_f32(local_x); + float32x4_t r1 = vld1q_f32(local_x + 4); + float32x4_t r2 = vld1q_f32(local_x + 8); + float32x4_t r3 = vld1q_f32(local_x + 12); + r0 = math::vActiveq_f32(r0); + r1 = math::vActiveq_f32(r1); + r2 = math::vActiveq_f32(r2); + r3 = math::vActiveq_f32(r3); + vst1q_f32(local_y, r0); + vst1q_f32(local_y + 4, r1); + vst1q_f32(local_y + 8, r2); + vst1q_f32(local_y + 12, r3); + } + x += (loop << 4); + y += (loop << 4); +#endif + for (size_t i = 0; i < remain; ++i) { + y[i] = math::Active(x[i]); + } + } + + void operator()(const Tensor *input, Tensor *output, float falpha) { + const float *x = input->data(); + float *y = output->mutable_data(); + size_t remain = input->numel(); + float alphas[4] = {falpha, falpha, falpha, falpha}; +#if defined(__ARM_NEON__) || defined(__ARM_NEON) + size_t loop = remain >> 4; + remain = remain & 0xF; + +#pragma omp parallel for + for (size_t i = 0; i < loop; ++i) { + const float *local_x = x + (i << 4); + float *local_y = y + (i << 4); + float32x4_t r0 = vld1q_f32(local_x); + float32x4_t r1 = vld1q_f32(local_x + 4); + float32x4_t r2 = vld1q_f32(local_x + 8); + float32x4_t r3 = vld1q_f32(local_x + 12); + float32x4_t a_r0 = vld1q_f32(alphas); + float32x4_t a_r1 = vld1q_f32(alphas); + float32x4_t a_r2 = vld1q_f32(alphas); + float32x4_t a_r3 = vld1q_f32(alphas); + r0 = math::vActiveq_f32(r0, a_r0); + r1 = math::vActiveq_f32(r1, a_r1); + r2 = math::vActiveq_f32(r2, a_r2); + r3 = math::vActiveq_f32(r3, a_r3); + vst1q_f32(local_y, r0); + vst1q_f32(local_y + 4, r1); + vst1q_f32(local_y + 8, r2); + vst1q_f32(local_y + 12, r3); + } + x += (loop << 4); + y += (loop << 4); +#endif + for (size_t i = 0; i < remain; ++i) { + y[i] = math::Active(x[i], falpha); + } + } +}; + +} // namespace operators +} // namespace paddle_mobile diff --git a/src/operators/kernel/cl/conv_relu_kernel.cpp b/src/operators/kernel/cl/conv_relu_kernel.cpp new file mode 100644 index 0000000000..896a662640 --- /dev/null +++ b/src/operators/kernel/cl/conv_relu_kernel.cpp @@ -0,0 +1,117 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_CONVRELU_OP + +#include "operators/kernel/conv_relu_kernel.h" +#include "operators/kernel/cl/cl-kernel-func/conv_func.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool ConvReluKernel::Init(FusionConvReluParam *param) { + PADDLE_MOBILE_ENFORCE( + param->Filter()->dims()[2] == param->Filter()->dims()[3] && + param->Paddings()[0] == param->Paddings()[1], + "need equal"); + + int offset = static_cast(param->Filter()->dims()[2]) / 2 - + static_cast(param->Paddings()[1]); + param->SetOffset(offset); + + DLOG << " init helper: " << &cl_helper_; + DLOG << " conv kernel add kernel ~ "; + DLOG << " width of one block: " << param->Filter()->dims()[3]; + DLOG << " height of one block: " << param->Filter()->dims()[2]; + DLOG << " filter dims: " << param->Filter()->dims(); + + const std::string conv_kernel_file = "conv_kernel.cl"; + const std::string wino_kernel_file = "winograd_transform.cl"; + const std::string build_options = "-DRELU"; + + if (param->Filter()->dims()[2] == 1 && param->Filter()->dims()[3] == 1) { + param->ExecMode() = ConvParam::EXEC_SLIDINGWINDOW1x1_FLOAT; + param->Filter()->InitNImage(cl_helper_.CLContext(), + cl_helper_.CLCommandQueue()); + + this->cl_helper_.AddKernel("conv_1x1_spl", conv_kernel_file, build_options); + DLOG << "conv 1x1"; + + } else if (param->Filter()->dims()[1] == 1 && + param->Input()->dims()[1] == param->Output()->dims()[1] && + param->Filter()->dims()[2] == 3) { + param->ExecMode() = ConvParam::EXEC_DEPTHWISE3x3_FLOAT; + param->Filter()->InitDWImage(cl_helper_.CLContext(), + cl_helper_.CLCommandQueue()); + + this->cl_helper_.AddKernel("depth_conv_3x3", conv_kernel_file, + build_options); + DLOG << "depth_conv 3x3"; + + } else if (param->Filter()->dims()[2] == 3 && + param->Filter()->dims()[3] == 3) { + // if (param->Strides()[0] == param->Strides()[1] && + // param->Strides()[0] == 1 && param->Input()->dims()[2] >= 32) { + // param->ExecMode() = ConvParam::EXEC_WINOGRAD3X3_FLOAT; + // this->cl_helper_.AddKernel("winograd_filter_transform_2x2", + // wino_kernel_file, build_options); + // this->cl_helper_.AddKernel("winograd_input_transform_2x2", + // wino_kernel_file, build_options); + // this->cl_helper_.AddKernel("matmul", "matmul.cl", build_options); + // this->cl_helper_.AddKernel("winograd_output_transform_2x2", + // wino_kernel_file, build_options); + // + // winograd_transform_weight<4, 3>(&this->cl_helper_, param->Filter()); + // + // } else { + param->ExecMode() = ConvParam::EXEC_SLIDINGWINDOW3x3_FLOAT; + param->Filter()->InitCLImage(cl_helper_.CLContext(), + cl_helper_.CLCommandQueue()); + + this->cl_helper_.AddKernel("conv_3x3", conv_kernel_file, build_options); + // } + DLOG << "conv 3x3"; + + } else { + PADDLE_MOBILE_THROW_EXCEPTION(" not support "); + } + + return true; +} + +template <> +void ConvReluKernel::Compute( + const FusionConvReluParam ¶m) { + switch (param.ExecMode()) { + case ConvParam::EXEC_WINOGRAD3X3_FLOAT: + WinogradConv3x3<4, 3>(&this->cl_helper_, param, true); + break; + case ConvParam::EXEC_SLIDINGWINDOW1x1_FLOAT: + case ConvParam::EXEC_SLIDINGWINDOW3x3_FLOAT: + case ConvParam::EXEC_DEPTHWISE3x3_FLOAT: + ConvAddBnRelu(&this->cl_helper_, param, true); + break; + default: + PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d", + param.ExecMode()); + } +} + +template class ConvReluKernel; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/conv_relu_kernel.h b/src/operators/kernel/conv_relu_kernel.h new file mode 100644 index 0000000000..4fb2fe3171 --- /dev/null +++ b/src/operators/kernel/conv_relu_kernel.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#ifdef FUSION_CONVRELU_OP + +#include +#include "framework/operator.h" +#include "operators/math/im2col.h" +#include "operators/math/math_function.h" +#include "operators/math/vol2col.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +using framework::OpKernelBase; + +template +class ConvReluKernel + : public OpKernelBase> { + public: + void Compute(const FusionConvReluParam ¶m); + bool Init(FusionConvReluParam *param); +}; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/op_param.h b/src/operators/op_param.h index f32ac21f31..3558fda919 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -2240,6 +2240,22 @@ class FusionDWConvBNReluParam : public ConvParam { #endif +#ifdef FUSION_CONVRELU_OP +template +class FusionConvReluParam : public ConvParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; + + public: + FusionConvReluParam(const VariableNameMap &inputs, + const VariableNameMap &outputs, const AttributeMap &attrs, + Scope *scope) + : ConvParam(inputs, outputs, attrs, scope) { + this->output_ = OpParam::OutFrom(outputs, *scope); + } +}; +#endif + #ifdef FUSION_CONVBNRELU_OP template class FusionConvBNReluParam : public ConvParam { diff --git a/tools/op.cmake b/tools/op.cmake index 4d21d41553..05496969cf 100755 --- a/tools/op.cmake +++ b/tools/op.cmake @@ -311,6 +311,7 @@ if(NOT FOUND_MATCH) set(FUSION_CONVADDADDPRELU_OP ON) set(FUSION_DWCONVBNRELU_OP ON) set(FUSION_CONVBNRELU_OP ON) + set(FUSION_CONVRELU_OP ON) set(FUSION_CONVBNADDRELU_OP ON) set(PRELU_OP ON) set(RESIZE_OP ON) @@ -484,6 +485,10 @@ if (FUSION_CONVBNRELU_OP) add_definitions(-DFUSION_CONVBNRELU_OP) endif() +if (FUSION_CONVRELU_OP) + add_definitions(-DFUSION_CONVRELU_OP) +endif() + if (FUSION_CONVBNADDRELU_OP) add_definitions(-DFUSION_CONVBNADDRELU_OP) endif() -- GitLab