diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ccf73763c08a748b53027d7f4a0f254774a1843..fbbc1db6b964e23775d1145ceed1d6b8a91b5d12 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,9 +6,15 @@ option(USE_OPENMP "openmp support" OFF) option(USE_EXCEPTION "use std exception" ON) option(LOG_PROFILE "log profile" ON) # select the platform to build -option(CPU "armv7 with neon" ON) +option(CPU "armv7 with neon" OFF) option(MALI_GPU "mali gpu" OFF) -option(FPGA "fpga" OFF) +option(FPGA "fpga" ON) +SET(FPGA ON) +SET(FUSION_ELEMENTWISEADDRELU_OP ON) +SET(FUSION_FC_OP ON) +SET(FUSION_FCRELU_OP ON) +SET(POOL_OP ON) +SET(DROPOUT_OP ON) file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm) file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h) @@ -139,7 +145,8 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY build) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build) # NET default -set(NET "default" CACHE STRING "select net type") +#set(NET "default" CACHE STRING "select net type") +set(NET "FPGAnets" CACHE STRING "select net type") set_property(CACHE NET PROPERTY STRINGS "default" "googlenet" "mobilenet" "yolo" "squeezenet" "FPGAnets") include("${CMAKE_CURRENT_LIST_DIR}/tools/op.cmake") @@ -151,7 +158,7 @@ if (ANDROID_NDK_TOOLCHAIN_INCLUDED) elseif(IS_IOS) add_library(paddle-mobile STATIC ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H}) else () - add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H}) + add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H} src/operators/kernel/fc_relu_kernel.h src/operators/kernel/fc_relu_kernel.h src/operators/kernel/fpga/fusion_fc_kernel.cpp src/operators/kernel/fpga/fc_relu_kernel.cpp src/operators/fusion_elementwise_add_relu_op.h src/operators/fusion_elementwise_add_relu_op.cpp src/operators/kernel/elementwise_add_relu_kernel.h src/operators/kernel/fpga/pool_kernel.cpp src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp src/operators/kernel/fpga/dropout_kernel.cpp) endif () # unit test diff --git a/src/operators/fusion_elementwise_add_relu_op.cpp b/src/operators/fusion_elementwise_add_relu_op.cpp index 7c20e3ec0ef8fa9c2380b019c726a3a93daa78ff..fa2739ab4283c1fbb35e541ed2d40ea7a1904580 100644 --- a/src/operators/fusion_elementwise_add_relu_op.cpp +++ b/src/operators/fusion_elementwise_add_relu_op.cpp @@ -30,12 +30,12 @@ void FusionElementwiseAddReluOp::InferShape() const { namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU -REGISTER_OPERATOR_CPU(fusion_elementwise_add_relu, - ops::FusionElementwiseAddReluOp); +// REGISTER_OPERATOR_CPU(fusion_elementwise_add_relu, +// ops::FusionElementwiseAddReluOp); #endif #ifdef PADDLE_MOBILE_MALI_GPU -REGISTER_OPERATOR_MALI_GPU(fusion_elementwise_add_relu, - ops::FusionElementwiseAddReluOp); +// REGISTER_OPERATOR_MALI_GPU(fusion_elementwise_add_relu, +// ops::FusionElementwiseAddReluOp); #endif #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(fusion_elementwise_add_relu, diff --git a/src/operators/fusion_elementwise_add_relu_op.h b/src/operators/fusion_elementwise_add_relu_op.h index 8799b5ab468e1fcd6f8bd8b0e3588962366cee6b..02151b97c9aa5913dadcd0b9b56953b309781b49 100644 --- a/src/operators/fusion_elementwise_add_relu_op.h +++ b/src/operators/fusion_elementwise_add_relu_op.h @@ -18,12 +18,29 @@ limitations under the License. */ #include #include "framework/operator.h" -#include "kernel/elementwise_add_relu_kernel.h" -#include "operators/op_param.h" +#include "framework/program/program-optimize/fusion_op_register.h" +#include "operators/kernel/elementwise_add_relu_kernel.h" namespace paddle_mobile { namespace operators { using std::string; +using std::vector; +class FusioneElementwiseAddReluMatcher : public framework::FusionOpMatcher { + public: + FusioneElementwiseAddReluMatcher() { + node_ = framework::Node(G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU); + node_ > std::make_shared(G_OP_TYPE_RELU); + } + + void FolderNodes( + framework::Node *node, + std::vector> *removed_nodes) { + node->Folder(node_.Depth(), Type(), {}, removed_nodes); + } + + std::string Type() { return G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU; } +}; + template class FusionElementwiseAddReluOp : public framework::OperatorWithKernel< @@ -39,13 +56,38 @@ class FusionElementwiseAddReluOp operators::ElementwiseAddReluKernel>( type, inputs, outputs, attrs, scope) {} - using framework::OperatorWithKernel< - DeviceType, ElementwiseAddReluParam, - operators::ElementwiseAddReluKernel>::OperatorWithKernel; void InferShape() const override; protected: }; + +#ifdef PADDLE_MOBILE_CPU +/* +#ifndef FUSION_ELEMENTWISE_ADD_RELU_REGISTER + static framework::FusionOpRegistrar fusion_elementwise_relu_registrar( + new FusioneElementwiseAddReluMatcher()); +#define FUSION_ELEMENTWISE_ADD_RELU_REGISTER +#endif +*/ +#endif + +#ifdef PADDLE_MOBILE_MALI_GPU +/* +#ifndef FUSION_ELEMENTWISE_ADD_RELU_REGISTER + static framework::FusionOpRegistrar fusion_elementwise_relu_registrar( + new FusioneElementwiseAddReluMatcher()); +#define FUSION_ELEMENTWISE_ADD_RELU_REGISTER +#endif +*/ +#endif + +#ifdef PADDLE_MOBILE_FPGA +#ifndef FUSION_ELEMENTWISE_ADD_RELU_REGISTER +static framework::FusionOpRegistrar fusion_elementwise_relu_registrar( + new FusioneElementwiseAddReluMatcher()); +#define FUSION_ELEMENTWISE_ADD_RELU_REGISTER +#endif + } // namespace operators } // namespace paddle_mobile @@ -53,10 +95,10 @@ class FusionElementwiseAddReluOp USE_OP_CPU(fusion_elementwise_add_relu); #endif #ifdef PADDLE_MOBILE_MALI_GPU -USE_OP_MALI_GPU(fusion_elementwise_add_relu); #endif #ifdef PADDLE_MOBILE_FPGA USE_OP_FPGA(fusion_elementwise_add_relu); #endif #endif +#endif diff --git a/src/operators/fusion_fc_op.h b/src/operators/fusion_fc_op.h index de5b837f7d5b41b136998232ec29d90a5bc47cf8..e6c7e9fdbd5f449eea004615fa31e49af0746086 100644 --- a/src/operators/fusion_fc_op.h +++ b/src/operators/fusion_fc_op.h @@ -67,8 +67,8 @@ class FusionFcOp #ifdef PADDLE_MOBILE_CPU -#ifndef CONV_CPU_REGISTER -#define CONV_CPU_REGISTER +#ifndef FUSION_FC_CPU_REGISTER +#define FUSION_FC_CPU_REGISTER static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher()); #endif @@ -84,6 +84,10 @@ static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher()); #endif #ifdef PADDLE_MOBILE_FPGA +#ifndef FUSION_FC_CPU_REGISTER +#define FUSION_FC_CPU_REGISTER +static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher()); +#endif #endif } // namespace operators diff --git a/src/operators/fusion_fc_relu_op.h b/src/operators/fusion_fc_relu_op.h index 6c02cb0f04da71e5c75b9824a2cacc5280236329..7a680695715b042152c8279510cdbf3100e84bb4 100644 --- a/src/operators/fusion_fc_relu_op.h +++ b/src/operators/fusion_fc_relu_op.h @@ -18,7 +18,7 @@ limitations under the License. */ #include "framework/operator.h" #include "framework/program/program-optimize/fusion_op_register.h" -#include "operators/kernel/fusion_fc_relu_kernel.h" +#include "operators/kernel/fc_relu_kernel.h" namespace paddle_mobile { namespace operators { diff --git a/src/operators/kernel/elementwise_add_relu_kernel.h b/src/operators/kernel/elementwise_add_relu_kernel.h index 5c90a2683f30333d68c4fb7e5b72af66a3ee2d26..1b3ff25ec983365a7a94cff5b047eba3466fe932 100644 --- a/src/operators/kernel/elementwise_add_relu_kernel.h +++ b/src/operators/kernel/elementwise_add_relu_kernel.h @@ -17,7 +17,6 @@ limitations under the License. */ #pragma once #include "framework/operator.h" -#include "operators/math/elementwise_op_function.h" #include "operators/op_param.h" namespace paddle_mobile { diff --git a/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp index fffd182bacacadfb3d0716eec6dc37e56d4b5069..5dd8991e2a23540e81f043cd6199443d98098ff8 100644 --- a/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp @@ -14,7 +14,6 @@ limitations under the License. */ #ifdef FUSION_ELEMENTWISEADDRELU_OP #include "operators/kernel/elementwise_add_relu_kernel.h" -#include "fpga/api/fpga_api.h" namespace paddle_mobile { namespace operators { @@ -28,7 +27,7 @@ bool ElementwiseAddReluKernel::Init( Tensor *out = param->Out(); auto input_x_ptr = input_x->data(); auto input_y_ptr = input_y->data(); - auto out_ptr = out->data(); + auto out_ptr = out->mutable_data(); fpga::EWAddArgs ewaddArgs; ewaddArgs.relu_enabled = relu_enabled; @@ -40,16 +39,16 @@ bool ElementwiseAddReluKernel::Init( input_x->fpga_args().scale_pointer(); // ew has scale attribute?? ewaddArgs.image0.height = input_x->dims()[2]; ewaddArgs.image0.width = input_x->dims()[3]; - ewaddArgs.image0.pad_height = 1; - ewaddArgs.image0.pad_width = 1; + ewaddArgs.image0.pad_height = 0; + ewaddArgs.image0.pad_width = 0; ewaddArgs.image1.address = (void *)input_y_ptr; ewaddArgs.image1.channels = input_y->dims()[1]; ewaddArgs.image1.scale_address = input_y->fpga_args().scale_pointer(); // ew has scale attribute?? ewaddArgs.image1.height = input_y->dims()[2]; ewaddArgs.image1.width = input_y->dims()[3]; - ewaddArgs.image1.pad_height = 1; - ewaddArgs.image1.pad_width = 1; + ewaddArgs.image1.pad_height = 0; + ewaddArgs.image1.pad_width = 0; ewaddArgs.output.scale_address = out->fpga_args().scale_pointer(); ewaddArgs.output.address = (void *)out_ptr; param->SetFpgaArgs(ewaddArgs); diff --git a/src/operators/kernel/fpga/fc_relu_kernel.cpp b/src/operators/kernel/fpga/fc_relu_kernel.cpp index 61d94f9231459c8f945834b68899b3447eecc1aa..704b0d76cb96eb62012bf85cae96b5c1448b3cc0 100644 --- a/src/operators/kernel/fpga/fc_relu_kernel.cpp +++ b/src/operators/kernel/fpga/fc_relu_kernel.cpp @@ -21,7 +21,6 @@ namespace operators { template <> bool FusionFcReluKernel::Init(FusionFcReluParam *param) { bool relu_enabled = true; - bool bn_enabled = false; const Tensor *input_x = param->InputX(); auto input_x_ptr = input_x->data(); const Tensor *input_y = param->InputY(); @@ -31,8 +30,8 @@ bool FusionFcReluKernel::Init(FusionFcReluParam *param) { Tensor *out = param->Out(); auto out_ptr = out->mutable_data(); - PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_z->dims()[0], - "Image channel should be equal to bias number"); + PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0], + "Image channel should be equal to weight number"); int channel = input_x->dims()[1]; float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); for (int i = 0; i < channel; i++) { @@ -55,8 +54,8 @@ bool FusionFcReluKernel::Init(FusionFcReluParam *param) { convArgs.image.channels = input_x->dims()[1]; convArgs.image.height = input_x->dims()[2]; convArgs.image.width = input_x->dims()[3]; - convArgs.image.pad_height = 1; - convArgs.image.pad_width = 1; + convArgs.image.pad_height = 0; + convArgs.image.pad_width = 0; convArgs.image.scale_address = input_x->fpga_args().scale_pointer(); // fc input has scale attribute?? convArgs.output.address = (void *)out_ptr; diff --git a/src/operators/kernel/fpga/fusion_fc_kernel.cpp b/src/operators/kernel/fpga/fusion_fc_kernel.cpp index 06939a242f6e579f833e0c83f49ab8d6afd751e6..9443270342a681199cd2cfe068a13d806cb606f9 100644 --- a/src/operators/kernel/fpga/fusion_fc_kernel.cpp +++ b/src/operators/kernel/fpga/fusion_fc_kernel.cpp @@ -21,7 +21,6 @@ namespace operators { template <> bool FusionFcKernel::Init(FusionFcParam *param) { bool relu_enabled = false; - bool bn_enabled = false; const Tensor *input_x = param->InputX(); auto input_x_ptr = input_x->data(); const Tensor *input_y = param->InputY(); @@ -31,8 +30,8 @@ bool FusionFcKernel::Init(FusionFcParam *param) { Tensor *out = param->Out(); auto out_ptr = out->mutable_data(); - PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_z->dims()[0], - "Image channel should be equal to bias number"); + PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0], + "Image channel should be equal to weight number"); int channel = input_x->dims()[1]; float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); for (int i = 0; i < channel; i++) { @@ -55,8 +54,8 @@ bool FusionFcKernel::Init(FusionFcParam *param) { convArgs.image.channels = input_x->dims()[1]; convArgs.image.height = input_x->dims()[2]; convArgs.image.width = input_x->dims()[3]; - convArgs.image.pad_height = 1; - convArgs.image.pad_width = 1; + convArgs.image.pad_height = 0; + convArgs.image.pad_width = 0; convArgs.image.scale_address = input_x->fpga_args().scale_pointer(); // fc input has scale attribute?? convArgs.output.address = (void *)out_ptr; diff --git a/src/operators/kernel/fpga/pool_kernel.cpp b/src/operators/kernel/fpga/pool_kernel.cpp index 287f849e2df0ed0976413e88b0ea8fb7c8d838aa..3e7dc5fd591fc85b98c7850102248c2264c62ba3 100644 --- a/src/operators/kernel/fpga/pool_kernel.cpp +++ b/src/operators/kernel/fpga/pool_kernel.cpp @@ -14,7 +14,6 @@ limitations under the License. */ #ifdef POOL_OP #include "operators/kernel/pool_kernel.h" -#include "fpga/api/fpga_api.h" class PoolingArgs; namespace paddle_mobile { @@ -25,7 +24,7 @@ bool PoolKernel::Init(PoolParam *param) { const Tensor *input = param->Input(); auto input_ptr = input->data(); Tensor *output = param->Output(); - auto output_ptr = output->data(); + auto output_ptr = output->mutable_data(); vector ksize = param->Ksize(); vector strides = param->Strides(); vector paddings = param->Paddings();