diff --git a/src/common/types.cpp b/src/common/types.cpp index 2f366eb9e5a10ea11e3153e6e32b18204c6dd9cd..b6387503856f438acd74b8d147da13a2b009f2a1 100644 --- a/src/common/types.cpp +++ b/src/common/types.cpp @@ -81,8 +81,8 @@ std::unordered_map< {G_OP_TYPE_FUSION_CONV_ADD_RELU, {{"Input"}, {"Out"}}}, {G_OP_TYPE_IM2SEQUENCE, {{"X"}, {"Out"}}}, {G_OP_TYPE_DROPOUT, {{"X"}, {"Out"}}}, - {G_OP_TYPE_FUSION_CONV_ADD_BN, {{"Input"}, {"Out"}}}, - {G_OP_TYPE_FUSION_POOL_BN, {{"X"}, {"Out"}}}, + {G_OP_TYPE_FUSION_CONV_ADD_BN, {{"Input"}, {"Y"}}}, + {G_OP_TYPE_FUSION_POOL_BN, {{"X"}, {"Y"}}}, {G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU, {{"X", "Y"}, {"Out"}}}, {G_OP_TYPE_FUSION_FC_RELU, {{"X", "Y", "Z"}, {"Out"}}}, {G_OP_TYPE_REGION, {{"X"}, {"Out"}}}}; diff --git a/src/operators/dropout_op.cpp b/src/operators/dropout_op.cpp index a632aa0c52b19c591467f94afb216245a596680b..a913ff017bfe776a2c2dfea5696e4c0f23683c46 100644 --- a/src/operators/dropout_op.cpp +++ b/src/operators/dropout_op.cpp @@ -33,6 +33,7 @@ REGISTER_OPERATOR_CPU(dropout, ops::DropoutOp); #ifdef PADDLE_MOBILE_MALI_GPU #endif #ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(dropout, ops::DropoutOp); #endif #endif diff --git a/src/operators/dropout_op.h b/src/operators/dropout_op.h index d9d5e173a8427d24097a627946dce4e14253fbe7..89d658dd8c1e11576a2cc0ef9cceae3fcdf26477 100644 --- a/src/operators/dropout_op.h +++ b/src/operators/dropout_op.h @@ -56,6 +56,7 @@ USE_OP_CPU(dropout); #ifdef PADDLE_MOBILE_MALI_GPU #endif #ifdef PADDLE_MOBILE_FPGA +USE_OP_FPGA(dropout); #endif #endif diff --git a/src/operators/fusion_elementwise_add_relu_op.cpp b/src/operators/fusion_elementwise_add_relu_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fa2739ab4283c1fbb35e541ed2d40ea7a1904580 --- /dev/null +++ b/src/operators/fusion_elementwise_add_relu_op.cpp @@ -0,0 +1,45 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_ELEMENTWISEADDRELU_OP + +#include "fusion_elementwise_add_relu_op.h" + +namespace paddle_mobile { +namespace operators { + +template +void FusionElementwiseAddReluOp::InferShape() const { + auto x_dim = this->param_.InputX()->dims(); + this->param_.Out()->Resize(x_dim); +} + +} // namespace operators +} // namespace paddle_mobile + +namespace ops = paddle_mobile::operators; +#ifdef PADDLE_MOBILE_CPU +// REGISTER_OPERATOR_CPU(fusion_elementwise_add_relu, +// ops::FusionElementwiseAddReluOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +// REGISTER_OPERATOR_MALI_GPU(fusion_elementwise_add_relu, +// ops::FusionElementwiseAddReluOp); +#endif +#ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(fusion_elementwise_add_relu, + ops::FusionElementwiseAddReluOp); +#endif + +#endif diff --git a/src/operators/fusion_elementwise_add_relu_op.h b/src/operators/fusion_elementwise_add_relu_op.h new file mode 100644 index 0000000000000000000000000000000000000000..02151b97c9aa5913dadcd0b9b56953b309781b49 --- /dev/null +++ b/src/operators/fusion_elementwise_add_relu_op.h @@ -0,0 +1,104 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_ELEMENTWISEADDRELU_OP + +#pragma once + +#include +#include "framework/operator.h" +#include "framework/program/program-optimize/fusion_op_register.h" +#include "operators/kernel/elementwise_add_relu_kernel.h" + +namespace paddle_mobile { +namespace operators { +using std::string; +using std::vector; +class FusioneElementwiseAddReluMatcher : public framework::FusionOpMatcher { + public: + FusioneElementwiseAddReluMatcher() { + node_ = framework::Node(G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU); + node_ > std::make_shared(G_OP_TYPE_RELU); + } + + void FolderNodes( + framework::Node *node, + std::vector> *removed_nodes) { + node->Folder(node_.Depth(), Type(), {}, removed_nodes); + } + + std::string Type() { return G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU; } +}; + +template +class FusionElementwiseAddReluOp + : public framework::OperatorWithKernel< + DeviceType, ElementwiseAddReluParam, + operators::ElementwiseAddReluKernel> { + public: + FusionElementwiseAddReluOp(const string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, + const framework::AttributeMap &attrs, + std::shared_ptr scope) + : framework::OperatorWithKernel< + DeviceType, ElementwiseAddReluParam, + operators::ElementwiseAddReluKernel>( + type, inputs, outputs, attrs, scope) {} + + void InferShape() const override; + + protected: +}; + +#ifdef PADDLE_MOBILE_CPU +/* +#ifndef FUSION_ELEMENTWISE_ADD_RELU_REGISTER + static framework::FusionOpRegistrar fusion_elementwise_relu_registrar( + new FusioneElementwiseAddReluMatcher()); +#define FUSION_ELEMENTWISE_ADD_RELU_REGISTER +#endif +*/ +#endif + +#ifdef PADDLE_MOBILE_MALI_GPU +/* +#ifndef FUSION_ELEMENTWISE_ADD_RELU_REGISTER + static framework::FusionOpRegistrar fusion_elementwise_relu_registrar( + new FusioneElementwiseAddReluMatcher()); +#define FUSION_ELEMENTWISE_ADD_RELU_REGISTER +#endif +*/ +#endif + +#ifdef PADDLE_MOBILE_FPGA +#ifndef FUSION_ELEMENTWISE_ADD_RELU_REGISTER +static framework::FusionOpRegistrar fusion_elementwise_relu_registrar( + new FusioneElementwiseAddReluMatcher()); +#define FUSION_ELEMENTWISE_ADD_RELU_REGISTER +#endif + +} // namespace operators +} // namespace paddle_mobile + +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(fusion_elementwise_add_relu); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +#endif +#ifdef PADDLE_MOBILE_FPGA +USE_OP_FPGA(fusion_elementwise_add_relu); +#endif + +#endif +#endif diff --git a/src/operators/fusion_fc_op.cpp b/src/operators/fusion_fc_op.cpp index d564d4d88c16ee09382a9b2dae275807ec4bdb4b..9fa80fbf12d0fe300921418705b6900108c68faf 100644 --- a/src/operators/fusion_fc_op.cpp +++ b/src/operators/fusion_fc_op.cpp @@ -61,6 +61,7 @@ REGISTER_OPERATOR_CPU(fusion_fc, ops::FusionFcOp); REGISTER_OPERATOR_MALI_GPU(fusion_fc, ops::FusionFcOp); #endif #ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(fusion_fc, ops::FusionFcOp); #endif #endif diff --git a/src/operators/fusion_fc_op.h b/src/operators/fusion_fc_op.h index c6f13c8e347f525ecee915759a5f4df492a73a92..e6c7e9fdbd5f449eea004615fa31e49af0746086 100644 --- a/src/operators/fusion_fc_op.h +++ b/src/operators/fusion_fc_op.h @@ -67,8 +67,8 @@ class FusionFcOp #ifdef PADDLE_MOBILE_CPU -#ifndef CONV_CPU_REGISTER -#define CONV_CPU_REGISTER +#ifndef FUSION_FC_CPU_REGISTER +#define FUSION_FC_CPU_REGISTER static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher()); #endif @@ -84,6 +84,10 @@ static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher()); #endif #ifdef PADDLE_MOBILE_FPGA +#ifndef FUSION_FC_CPU_REGISTER +#define FUSION_FC_CPU_REGISTER +static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher()); +#endif #endif } // namespace operators @@ -96,6 +100,7 @@ USE_OP_CPU(fusion_fc); USE_OP_MALI_GPU(fusion_fc); #endif #ifdef PADDLE_MOBILE_FPGA +USE_OP_FPGA(fusion_fc); #endif #endif diff --git a/src/operators/fusion_fc_relu_op.cpp b/src/operators/fusion_fc_relu_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..97568323a3c204da06546ffc6b4d9a2483e95848 --- /dev/null +++ b/src/operators/fusion_fc_relu_op.cpp @@ -0,0 +1,67 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_FC_RELU_OP + +#include "operators/fusion_fc_relu_op.h" +namespace paddle_mobile { +namespace operators { + +template +void FusionFcReluOp::InferShape() const { + auto x_dims = this->param_.InputX()->dims(); + auto y_dims = this->param_.InputY()->dims(); + int x_num_col_dims = this->param_.XNumColDims(); + int y_num_col_dims = this->param_.YNumColDims(); + + assert(x_dims.size() > x_num_col_dims); + assert(y_dims.size() > y_num_col_dims); + + /// (1,2,3,4) , x_num_col_dims = 2 -> (2,12) + auto x_mat_dims = framework::flatten_to_2d(x_dims, x_num_col_dims); + auto y_mat_dims = framework::flatten_to_2d(y_dims, y_num_col_dims); + + assert(x_mat_dims[1] == y_mat_dims[0]); + + std::vector output_dims; + output_dims.reserve( + static_cast(x_num_col_dims + y_dims.size() - y_num_col_dims)); + + for (int i = 0; i < x_num_col_dims; ++i) { + output_dims.push_back(x_dims[i]); + } + + for (int i = y_num_col_dims; i < y_dims.size(); ++i) { + output_dims.push_back(y_dims[i]); + } + + framework::DDim ddim = framework::make_ddim(output_dims); + this->param_.Out()->Resize(ddim); +} + +} // namespace operators +} // namespace paddle_mobile + +namespace ops = paddle_mobile::operators; +#ifdef PADDLE_MOBILE_CPU +REGISTER_OPERATOR_CPU(fusion_fc_relu, ops::FusionFcReluOp); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +REGISTER_OPERATOR_MALI_GPU(fusion_fc_relu, ops::FusionFcReluOp); +#endif +#ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(fusion_fc_relu, ops::FusionFcReluOp); +#endif + +#endif diff --git a/src/operators/fusion_fc_relu_op.h b/src/operators/fusion_fc_relu_op.h new file mode 100644 index 0000000000000000000000000000000000000000..7a680695715b042152c8279510cdbf3100e84bb4 --- /dev/null +++ b/src/operators/fusion_fc_relu_op.h @@ -0,0 +1,107 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#ifdef FUSION_FCRELU_OP +#pragma once +#include +#include + +#include "framework/operator.h" +#include "framework/program/program-optimize/fusion_op_register.h" +#include "operators/kernel/fc_relu_kernel.h" + +namespace paddle_mobile { +namespace operators { +using std::string; +using std::vector; +class FusionFcReluMatcher : public framework::FusionOpMatcher { + public: + FusionFcReluMatcher() { + node_ = framework::Node(G_OP_TYPE_MUL); + node_ > std::make_shared(G_OP_TYPE_ELEMENTWISE_ADD) > + std::make_shared(G_OP_TYPE_RELU); + } + + void FolderNodes( + framework::Node *node, + std::vector> *removed_nodes) { + node->Folder(node_.Depth(), Type(), + {{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Z"}}}}, removed_nodes); + } + + std::string Type() { return G_OP_TYPE_FUSION_FC_RELU; } +}; + +template +class FusionFcReluOp : public framework::OperatorWithKernel< + DeviceType, FusionFcReluParam, + operators::FusionFcReluKernel> { + public: + FusionFcReluOp(const string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, + const framework::AttributeMap &attrs, + std::shared_ptr scope) + : framework::OperatorWithKernel< + DeviceType, FusionFcReluParam, + operators::FusionFcReluKernel>(type, inputs, outputs, + attrs, scope) {} + + using framework::OperatorWithKernel< + DeviceType, FusionFcReluParam, + operators::FusionFcReluKernel>::OperatorWithKernel; + void InferShape() const override; + + protected: +}; + +#ifdef PADDLE_MOBILE_CPU + +#ifndef FUSION_FC_RELU_REGISTER +#define FUSION_FC_RELU_REGISTER +static framework::FusionOpRegistrar fc_relu_registrar( + new FusionFcReluMatcher()); +#endif + +#endif + +#ifdef PADDLE_MOBILE_MALI_GPU + +#ifndef FUSION_FC_RELU_REGISTER +#define FUSION_FC_RELU_REGISTER +static framework::FusionOpRegistrar fc_relu_registrar( + new FusionFcReluMatcher()); +#endif + +#endif + +#ifdef PADDLE_MOBILE_FPGA +#ifndef FUSION_FC_RELU_REGISTER +#define FUSION_FC_RELU_REGISTER +static framework::FusionOpRegistrar fc_relu_registrar( + new FusionFcReluMatcher()); +#endif +#endif + +} // namespace operators +} // namespace paddle_mobile + +#ifdef PADDLE_MOBILE_CPU +USE_OP_CPU(fusion_fc_relu); +#endif +#ifdef PADDLE_MOBILE_MALI_GPU +USE_OP_MALI_GPU(fusion_fc_relu); +#endif +#ifdef PADDLE_MOBILE_FPGA +USE_OP_FPGA(fusion_fc_relu); +#endif +#endif // FUSION_FC_RELU_OP diff --git a/src/operators/kernel/elementwise_add_relu_kernel.h b/src/operators/kernel/elementwise_add_relu_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..1b3ff25ec983365a7a94cff5b047eba3466fe932 --- /dev/null +++ b/src/operators/kernel/elementwise_add_relu_kernel.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_ELEMENTWISEADDRELU_OP + +#pragma once + +#include "framework/operator.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +using namespace framework; + +template +class ElementwiseAddReluKernel + : public framework::OpKernelBase { + public: + void Compute(const ElementwiseAddReluParam ¶m) const; + bool Init(ElementwiseAddReluParam *param); +}; +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fc_relu_kernel.h b/src/operators/kernel/fc_relu_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..ceff36f8ef49ff996769802b1f39e52e955c45d0 --- /dev/null +++ b/src/operators/kernel/fc_relu_kernel.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef FUSION_FCRELU_OP + +#pragma once + +#include "framework/operator.h" +#include "operators/math/math_function.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +template +class FusionFcReluKernel + : public framework::OpKernelBase { + public: + void Compute(const FusionFcReluParam& param) const; + bool Init(FusionFcReluParam* param); +}; +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/dropout_kernel.cpp b/src/operators/kernel/fpga/dropout_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bb6ece969d83aba6bb7fe91a3688607df0be8a8b --- /dev/null +++ b/src/operators/kernel/fpga/dropout_kernel.cpp @@ -0,0 +1,40 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef DROPOUT_OP + +#include "operators/kernel/dropout_kernel.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool DropoutKernel::Init(DropoutParam *param) { + param->Out()->ShareDataWith(*param->InputX()); + return true; +} + +template <> +void DropoutKernel::Compute(const DropoutParam ¶m) const { + // auto *input_x = param.InputX(); + // auto *out = param.Out(); + // auto input_x_ptr = input_x->data(); + // auto out_ptr = out->mutable_data(); + // out_ptr = const_cast(input_x_ptr); +} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5dd8991e2a23540e81f043cd6199443d98098ff8 --- /dev/null +++ b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp @@ -0,0 +1,66 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#ifdef FUSION_ELEMENTWISEADDRELU_OP + +#include "operators/kernel/elementwise_add_relu_kernel.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool ElementwiseAddReluKernel::Init( + ElementwiseAddReluParam *param) { + bool relu_enabled = true; + const Tensor *input_x = param->InputX(); + const Tensor *input_y = param->InputY(); + Tensor *out = param->Out(); + auto input_x_ptr = input_x->data(); + auto input_y_ptr = input_y->data(); + auto out_ptr = out->mutable_data(); + + fpga::EWAddArgs ewaddArgs; + ewaddArgs.relu_enabled = relu_enabled; + ewaddArgs.const0 = 1; + ewaddArgs.const1 = 1; + ewaddArgs.image0.address = (void *)input_x_ptr; + ewaddArgs.image0.channels = input_x->dims()[1]; + ewaddArgs.image0.scale_address = + input_x->fpga_args().scale_pointer(); // ew has scale attribute?? + ewaddArgs.image0.height = input_x->dims()[2]; + ewaddArgs.image0.width = input_x->dims()[3]; + ewaddArgs.image0.pad_height = 0; + ewaddArgs.image0.pad_width = 0; + ewaddArgs.image1.address = (void *)input_y_ptr; + ewaddArgs.image1.channels = input_y->dims()[1]; + ewaddArgs.image1.scale_address = + input_y->fpga_args().scale_pointer(); // ew has scale attribute?? + ewaddArgs.image1.height = input_y->dims()[2]; + ewaddArgs.image1.width = input_y->dims()[3]; + ewaddArgs.image1.pad_height = 0; + ewaddArgs.image1.pad_width = 0; + ewaddArgs.output.scale_address = out->fpga_args().scale_pointer(); + ewaddArgs.output.address = (void *)out_ptr; + param->SetFpgaArgs(ewaddArgs); + return true; +} + +template <> +void ElementwiseAddReluKernel::Compute( + const ElementwiseAddReluParam ¶m) const { + fpga::ComputeFpgaEWAdd(param.FpgaArgs()); +} +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/fc_relu_kernel.cpp b/src/operators/kernel/fpga/fc_relu_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..704b0d76cb96eb62012bf85cae96b5c1448b3cc0 --- /dev/null +++ b/src/operators/kernel/fpga/fc_relu_kernel.cpp @@ -0,0 +1,76 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#ifdef FUSION_FCRELU_OP +#include "operators/kernel/fc_relu_kernel.h" +#include "fpga/api/fpga_api.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool FusionFcReluKernel::Init(FusionFcReluParam *param) { + bool relu_enabled = true; + const Tensor *input_x = param->InputX(); + auto input_x_ptr = input_x->data(); + const Tensor *input_y = param->InputY(); + auto input_y_ptr = input_y->data(); + const Tensor *input_z = param->InputZ(); + auto input_z_ptr = input_z->data(); + Tensor *out = param->Out(); + auto out_ptr = out->mutable_data(); + + PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0], + "Image channel should be equal to weight number"); + int channel = input_x->dims()[1]; + float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); + for (int i = 0; i < channel; i++) { + bs_ptr[i * 2] = 1; + bs_ptr[i * 2 + 1] = input_z_ptr[i]; + } + + fpga::ConvArgs convArgs; + convArgs.relu_enabled = relu_enabled; + convArgs.bias_address = (void *)input_z_ptr; + convArgs.filter_address = (void *)input_y_ptr; + convArgs.filter_num = out->dims()[1]; + convArgs.group_num = 1; + convArgs.sb_address = (void *)bs_ptr; + convArgs.kernel.stride_w = 1; + convArgs.kernel.stride_h = 1; + convArgs.kernel.height = input_x->dims()[2]; + convArgs.kernel.width = input_x->dims()[3]; + convArgs.image.address = (void *)input_x_ptr; + convArgs.image.channels = input_x->dims()[1]; + convArgs.image.height = input_x->dims()[2]; + convArgs.image.width = input_x->dims()[3]; + convArgs.image.pad_height = 0; + convArgs.image.pad_width = 0; + convArgs.image.scale_address = + input_x->fpga_args().scale_pointer(); // fc input has scale attribute?? + convArgs.output.address = (void *)out_ptr; + convArgs.output.scale_address = + out->fpga_args().scale_pointer(); // fc output has scale attribute?? + param->SetFpgaArgs(convArgs); + + return true; +} +template <> +void FusionFcReluKernel::Compute( + const FusionFcReluParam ¶m) const { + fpga::ComputeFpgaConv(param.FpgaArgs()); +}; + +} // namespace operators +} // namespace paddle_mobile +#endif diff --git a/src/operators/kernel/fpga/fusion_fc_kernel.cpp b/src/operators/kernel/fpga/fusion_fc_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9443270342a681199cd2cfe068a13d806cb606f9 --- /dev/null +++ b/src/operators/kernel/fpga/fusion_fc_kernel.cpp @@ -0,0 +1,75 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#ifdef FUSION_FC_OP + +#include "operators/kernel/fusion_fc_kernel.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool FusionFcKernel::Init(FusionFcParam *param) { + bool relu_enabled = false; + const Tensor *input_x = param->InputX(); + auto input_x_ptr = input_x->data(); + const Tensor *input_y = param->InputY(); + auto input_y_ptr = input_y->data(); + const Tensor *input_z = param->InputZ(); + auto input_z_ptr = input_z->data(); + Tensor *out = param->Out(); + auto out_ptr = out->mutable_data(); + + PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0], + "Image channel should be equal to weight number"); + int channel = input_x->dims()[1]; + float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); + for (int i = 0; i < channel; i++) { + bs_ptr[i * 2] = 1; + bs_ptr[i * 2 + 1] = input_z_ptr[i]; + } + + fpga::ConvArgs convArgs; + convArgs.relu_enabled = relu_enabled; + convArgs.bias_address = (void *)input_z_ptr; + convArgs.filter_address = (void *)input_y_ptr; + convArgs.filter_num = out->dims()[1]; + convArgs.group_num = 1; + convArgs.sb_address = (void *)bs_ptr; + convArgs.kernel.stride_w = 1; + convArgs.kernel.stride_h = 1; + convArgs.kernel.height = input_x->dims()[2]; + convArgs.kernel.width = input_x->dims()[3]; + convArgs.image.address = (void *)input_x_ptr; + convArgs.image.channels = input_x->dims()[1]; + convArgs.image.height = input_x->dims()[2]; + convArgs.image.width = input_x->dims()[3]; + convArgs.image.pad_height = 0; + convArgs.image.pad_width = 0; + convArgs.image.scale_address = + input_x->fpga_args().scale_pointer(); // fc input has scale attribute?? + convArgs.output.address = (void *)out_ptr; + convArgs.output.scale_address = + out->fpga_args().scale_pointer(); // fc output has scale attribute?? + param->SetFpgaArgs(convArgs); + return true; +} + +template <> +void FusionFcKernel::Compute(const FusionFcParam ¶m) const { + fpga::ComputeFpgaConv(param.FpgaArgs()); +} +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/pool_kernel.cpp b/src/operators/kernel/fpga/pool_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3e7dc5fd591fc85b98c7850102248c2264c62ba3 --- /dev/null +++ b/src/operators/kernel/fpga/pool_kernel.cpp @@ -0,0 +1,57 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#ifdef POOL_OP + +#include "operators/kernel/pool_kernel.h" + +class PoolingArgs; +namespace paddle_mobile { +namespace operators { + +template <> +bool PoolKernel::Init(PoolParam *param) { + const Tensor *input = param->Input(); + auto input_ptr = input->data(); + Tensor *output = param->Output(); + auto output_ptr = output->mutable_data(); + vector ksize = param->Ksize(); + vector strides = param->Strides(); + vector paddings = param->Paddings(); + + fpga::PoolingArgs poolArgs; + poolArgs.image.address = (void *)input_ptr; + poolArgs.image.channels = input->dims()[1]; + poolArgs.image.height = input->dims()[2]; + poolArgs.image.width = input->dims()[3]; + poolArgs.image.pad_height = paddings[0]; + poolArgs.image.pad_width = paddings[1]; + poolArgs.output.address = output_ptr; + poolArgs.kernel.height = ksize[0]; + poolArgs.kernel.width = ksize[1]; + poolArgs.kernel.stride_h = strides[0]; + poolArgs.kernel.stride_w = strides[1]; + param->SetFpgaArgs(poolArgs); + return true; +} + +template <> +void PoolKernel::Compute(const PoolParam ¶m) const { +#ifdef PADDLE_MOBILE_FPGA + fpga::ComputeFpgaPool(param.FpgaArgs()); +#endif +} +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/pool_op.cpp b/src/operators/pool_op.cpp index 0477c88cf84054090b4c46524284fb0cdf525c0e..dd23059ea01a332aff45137b7f7ed4c9f6c2e1bb 100644 --- a/src/operators/pool_op.cpp +++ b/src/operators/pool_op.cpp @@ -66,6 +66,7 @@ REGISTER_OPERATOR_CPU(pool2d, ops::PoolOp); REGISTER_OPERATOR_MALI_GPU(pool2d, ops::PoolOp); #endif #ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(pool2d, ops::PoolOp); #endif #endif diff --git a/src/operators/pool_op.h b/src/operators/pool_op.h index 4c48efdc53af7eb75f694d4b5a0a7ce5078d2e25..bc823e70c5a8e7b229a0101726316d1e825b7b54 100644 --- a/src/operators/pool_op.h +++ b/src/operators/pool_op.h @@ -55,6 +55,7 @@ USE_OP_CPU(pool2d); USE_OP_MALI_GPU(pool2d); #endif #ifdef PADDLE_MOBILE_FPGA +USE_OP_FPGA(pool2d); #endif #endif