提交 e82bc1f8 编写于 作者: qnqinan's avatar qnqinan

fix some bugs of new added FPGA ops and kernels

上级 380c55f2
...@@ -6,9 +6,15 @@ option(USE_OPENMP "openmp support" OFF) ...@@ -6,9 +6,15 @@ option(USE_OPENMP "openmp support" OFF)
option(USE_EXCEPTION "use std exception" ON) option(USE_EXCEPTION "use std exception" ON)
option(LOG_PROFILE "log profile" ON) option(LOG_PROFILE "log profile" ON)
# select the platform to build # select the platform to build
option(CPU "armv7 with neon" ON) option(CPU "armv7 with neon" OFF)
option(MALI_GPU "mali gpu" OFF) option(MALI_GPU "mali gpu" OFF)
option(FPGA "fpga" OFF) option(FPGA "fpga" ON)
SET(FPGA ON)
SET(FUSION_ELEMENTWISEADDRELU_OP ON)
SET(FUSION_FC_OP ON)
SET(FUSION_FCRELU_OP ON)
SET(POOL_OP ON)
SET(DROPOUT_OP ON)
file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm) file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm)
file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h) file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h)
...@@ -139,7 +145,8 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY build) ...@@ -139,7 +145,8 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY build)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build)
# NET default # NET default
set(NET "default" CACHE STRING "select net type") #set(NET "default" CACHE STRING "select net type")
set(NET "FPGAnets" CACHE STRING "select net type")
set_property(CACHE NET PROPERTY STRINGS "default" "googlenet" "mobilenet" "yolo" "squeezenet" "FPGAnets") set_property(CACHE NET PROPERTY STRINGS "default" "googlenet" "mobilenet" "yolo" "squeezenet" "FPGAnets")
include("${CMAKE_CURRENT_LIST_DIR}/tools/op.cmake") include("${CMAKE_CURRENT_LIST_DIR}/tools/op.cmake")
...@@ -151,7 +158,7 @@ if (ANDROID_NDK_TOOLCHAIN_INCLUDED) ...@@ -151,7 +158,7 @@ if (ANDROID_NDK_TOOLCHAIN_INCLUDED)
elseif(IS_IOS) elseif(IS_IOS)
add_library(paddle-mobile STATIC ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H}) add_library(paddle-mobile STATIC ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
else () else ()
add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H}) add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H} src/operators/kernel/fc_relu_kernel.h src/operators/kernel/fc_relu_kernel.h src/operators/kernel/fpga/fusion_fc_kernel.cpp src/operators/kernel/fpga/fc_relu_kernel.cpp src/operators/fusion_elementwise_add_relu_op.h src/operators/fusion_elementwise_add_relu_op.cpp src/operators/kernel/elementwise_add_relu_kernel.h src/operators/kernel/fpga/pool_kernel.cpp src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp src/operators/kernel/fpga/dropout_kernel.cpp)
endif () endif ()
# unit test # unit test
......
...@@ -30,12 +30,12 @@ void FusionElementwiseAddReluOp<Dtype, T>::InferShape() const { ...@@ -30,12 +30,12 @@ void FusionElementwiseAddReluOp<Dtype, T>::InferShape() const {
namespace ops = paddle_mobile::operators; namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(fusion_elementwise_add_relu, // REGISTER_OPERATOR_CPU(fusion_elementwise_add_relu,
ops::FusionElementwiseAddReluOp); // ops::FusionElementwiseAddReluOp);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU #ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(fusion_elementwise_add_relu, // REGISTER_OPERATOR_MALI_GPU(fusion_elementwise_add_relu,
ops::FusionElementwiseAddReluOp); // ops::FusionElementwiseAddReluOp);
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_elementwise_add_relu, REGISTER_OPERATOR_FPGA(fusion_elementwise_add_relu,
......
...@@ -18,12 +18,29 @@ limitations under the License. */ ...@@ -18,12 +18,29 @@ limitations under the License. */
#include <string> #include <string>
#include "framework/operator.h" #include "framework/operator.h"
#include "kernel/elementwise_add_relu_kernel.h" #include "framework/program/program-optimize/fusion_op_register.h"
#include "operators/op_param.h" #include "operators/kernel/elementwise_add_relu_kernel.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
using std::string; using std::string;
using std::vector;
class FusioneElementwiseAddReluMatcher : public framework::FusionOpMatcher {
public:
FusioneElementwiseAddReluMatcher() {
node_ = framework::Node(G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU);
node_ > std::make_shared<framework::Node>(G_OP_TYPE_RELU);
}
void FolderNodes(
framework::Node *node,
std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
node->Folder(node_.Depth(), Type(), {}, removed_nodes);
}
std::string Type() { return G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU; }
};
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
class FusionElementwiseAddReluOp class FusionElementwiseAddReluOp
: public framework::OperatorWithKernel< : public framework::OperatorWithKernel<
...@@ -39,13 +56,38 @@ class FusionElementwiseAddReluOp ...@@ -39,13 +56,38 @@ class FusionElementwiseAddReluOp
operators::ElementwiseAddReluKernel<DeviceType, T>>( operators::ElementwiseAddReluKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, ElementwiseAddReluParam,
operators::ElementwiseAddReluKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
}; };
#ifdef PADDLE_MOBILE_CPU
/*
#ifndef FUSION_ELEMENTWISE_ADD_RELU_REGISTER
static framework::FusionOpRegistrar fusion_elementwise_relu_registrar(
new FusioneElementwiseAddReluMatcher());
#define FUSION_ELEMENTWISE_ADD_RELU_REGISTER
#endif
*/
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
/*
#ifndef FUSION_ELEMENTWISE_ADD_RELU_REGISTER
static framework::FusionOpRegistrar fusion_elementwise_relu_registrar(
new FusioneElementwiseAddReluMatcher());
#define FUSION_ELEMENTWISE_ADD_RELU_REGISTER
#endif
*/
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifndef FUSION_ELEMENTWISE_ADD_RELU_REGISTER
static framework::FusionOpRegistrar fusion_elementwise_relu_registrar(
new FusioneElementwiseAddReluMatcher());
#define FUSION_ELEMENTWISE_ADD_RELU_REGISTER
#endif
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -53,10 +95,10 @@ class FusionElementwiseAddReluOp ...@@ -53,10 +95,10 @@ class FusionElementwiseAddReluOp
USE_OP_CPU(fusion_elementwise_add_relu); USE_OP_CPU(fusion_elementwise_add_relu);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU #ifdef PADDLE_MOBILE_MALI_GPU
USE_OP_MALI_GPU(fusion_elementwise_add_relu);
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(fusion_elementwise_add_relu); USE_OP_FPGA(fusion_elementwise_add_relu);
#endif #endif
#endif #endif
#endif
...@@ -67,8 +67,8 @@ class FusionFcOp ...@@ -67,8 +67,8 @@ class FusionFcOp
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
#ifndef CONV_CPU_REGISTER #ifndef FUSION_FC_CPU_REGISTER
#define CONV_CPU_REGISTER #define FUSION_FC_CPU_REGISTER
static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher()); static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
#endif #endif
...@@ -84,6 +84,10 @@ static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher()); ...@@ -84,6 +84,10 @@ static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
#ifndef FUSION_FC_CPU_REGISTER
#define FUSION_FC_CPU_REGISTER
static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
#endif
#endif #endif
} // namespace operators } // namespace operators
......
...@@ -18,7 +18,7 @@ limitations under the License. */ ...@@ -18,7 +18,7 @@ limitations under the License. */
#include "framework/operator.h" #include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h" #include "framework/program/program-optimize/fusion_op_register.h"
#include "operators/kernel/fusion_fc_relu_kernel.h" #include "operators/kernel/fc_relu_kernel.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
......
...@@ -17,7 +17,6 @@ limitations under the License. */ ...@@ -17,7 +17,6 @@ limitations under the License. */
#pragma once #pragma once
#include "framework/operator.h" #include "framework/operator.h"
#include "operators/math/elementwise_op_function.h"
#include "operators/op_param.h" #include "operators/op_param.h"
namespace paddle_mobile { namespace paddle_mobile {
......
...@@ -14,7 +14,6 @@ limitations under the License. */ ...@@ -14,7 +14,6 @@ limitations under the License. */
#ifdef FUSION_ELEMENTWISEADDRELU_OP #ifdef FUSION_ELEMENTWISEADDRELU_OP
#include "operators/kernel/elementwise_add_relu_kernel.h" #include "operators/kernel/elementwise_add_relu_kernel.h"
#include "fpga/api/fpga_api.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -28,7 +27,7 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init( ...@@ -28,7 +27,7 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
Tensor *out = param->Out(); Tensor *out = param->Out();
auto input_x_ptr = input_x->data<float>(); auto input_x_ptr = input_x->data<float>();
auto input_y_ptr = input_y->data<float>(); auto input_y_ptr = input_y->data<float>();
auto out_ptr = out->data<float>(); auto out_ptr = out->mutable_data<float>();
fpga::EWAddArgs ewaddArgs; fpga::EWAddArgs ewaddArgs;
ewaddArgs.relu_enabled = relu_enabled; ewaddArgs.relu_enabled = relu_enabled;
...@@ -40,16 +39,16 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init( ...@@ -40,16 +39,16 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
input_x->fpga_args().scale_pointer(); // ew has scale attribute?? input_x->fpga_args().scale_pointer(); // ew has scale attribute??
ewaddArgs.image0.height = input_x->dims()[2]; ewaddArgs.image0.height = input_x->dims()[2];
ewaddArgs.image0.width = input_x->dims()[3]; ewaddArgs.image0.width = input_x->dims()[3];
ewaddArgs.image0.pad_height = 1; ewaddArgs.image0.pad_height = 0;
ewaddArgs.image0.pad_width = 1; ewaddArgs.image0.pad_width = 0;
ewaddArgs.image1.address = (void *)input_y_ptr; ewaddArgs.image1.address = (void *)input_y_ptr;
ewaddArgs.image1.channels = input_y->dims()[1]; ewaddArgs.image1.channels = input_y->dims()[1];
ewaddArgs.image1.scale_address = ewaddArgs.image1.scale_address =
input_y->fpga_args().scale_pointer(); // ew has scale attribute?? input_y->fpga_args().scale_pointer(); // ew has scale attribute??
ewaddArgs.image1.height = input_y->dims()[2]; ewaddArgs.image1.height = input_y->dims()[2];
ewaddArgs.image1.width = input_y->dims()[3]; ewaddArgs.image1.width = input_y->dims()[3];
ewaddArgs.image1.pad_height = 1; ewaddArgs.image1.pad_height = 0;
ewaddArgs.image1.pad_width = 1; ewaddArgs.image1.pad_width = 0;
ewaddArgs.output.scale_address = out->fpga_args().scale_pointer(); ewaddArgs.output.scale_address = out->fpga_args().scale_pointer();
ewaddArgs.output.address = (void *)out_ptr; ewaddArgs.output.address = (void *)out_ptr;
param->SetFpgaArgs(ewaddArgs); param->SetFpgaArgs(ewaddArgs);
......
...@@ -21,7 +21,6 @@ namespace operators { ...@@ -21,7 +21,6 @@ namespace operators {
template <> template <>
bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam *param) { bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam *param) {
bool relu_enabled = true; bool relu_enabled = true;
bool bn_enabled = false;
const Tensor *input_x = param->InputX(); const Tensor *input_x = param->InputX();
auto input_x_ptr = input_x->data<float>(); auto input_x_ptr = input_x->data<float>();
const Tensor *input_y = param->InputY(); const Tensor *input_y = param->InputY();
...@@ -31,8 +30,8 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam *param) { ...@@ -31,8 +30,8 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam *param) {
Tensor *out = param->Out(); Tensor *out = param->Out();
auto out_ptr = out->mutable_data<float>(); auto out_ptr = out->mutable_data<float>();
PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_z->dims()[0], PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0],
"Image channel should be equal to bias number"); "Image channel should be equal to weight number");
int channel = input_x->dims()[1]; int channel = input_x->dims()[1];
float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
for (int i = 0; i < channel; i++) { for (int i = 0; i < channel; i++) {
...@@ -55,8 +54,8 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam *param) { ...@@ -55,8 +54,8 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam *param) {
convArgs.image.channels = input_x->dims()[1]; convArgs.image.channels = input_x->dims()[1];
convArgs.image.height = input_x->dims()[2]; convArgs.image.height = input_x->dims()[2];
convArgs.image.width = input_x->dims()[3]; convArgs.image.width = input_x->dims()[3];
convArgs.image.pad_height = 1; convArgs.image.pad_height = 0;
convArgs.image.pad_width = 1; convArgs.image.pad_width = 0;
convArgs.image.scale_address = convArgs.image.scale_address =
input_x->fpga_args().scale_pointer(); // fc input has scale attribute?? input_x->fpga_args().scale_pointer(); // fc input has scale attribute??
convArgs.output.address = (void *)out_ptr; convArgs.output.address = (void *)out_ptr;
......
...@@ -21,7 +21,6 @@ namespace operators { ...@@ -21,7 +21,6 @@ namespace operators {
template <> template <>
bool FusionFcKernel<FPGA, float>::Init(FusionFcParam *param) { bool FusionFcKernel<FPGA, float>::Init(FusionFcParam *param) {
bool relu_enabled = false; bool relu_enabled = false;
bool bn_enabled = false;
const Tensor *input_x = param->InputX(); const Tensor *input_x = param->InputX();
auto input_x_ptr = input_x->data<float>(); auto input_x_ptr = input_x->data<float>();
const Tensor *input_y = param->InputY(); const Tensor *input_y = param->InputY();
...@@ -31,8 +30,8 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam *param) { ...@@ -31,8 +30,8 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam *param) {
Tensor *out = param->Out(); Tensor *out = param->Out();
auto out_ptr = out->mutable_data<float>(); auto out_ptr = out->mutable_data<float>();
PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_z->dims()[0], PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0],
"Image channel should be equal to bias number"); "Image channel should be equal to weight number");
int channel = input_x->dims()[1]; int channel = input_x->dims()[1];
float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
for (int i = 0; i < channel; i++) { for (int i = 0; i < channel; i++) {
...@@ -55,8 +54,8 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam *param) { ...@@ -55,8 +54,8 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam *param) {
convArgs.image.channels = input_x->dims()[1]; convArgs.image.channels = input_x->dims()[1];
convArgs.image.height = input_x->dims()[2]; convArgs.image.height = input_x->dims()[2];
convArgs.image.width = input_x->dims()[3]; convArgs.image.width = input_x->dims()[3];
convArgs.image.pad_height = 1; convArgs.image.pad_height = 0;
convArgs.image.pad_width = 1; convArgs.image.pad_width = 0;
convArgs.image.scale_address = convArgs.image.scale_address =
input_x->fpga_args().scale_pointer(); // fc input has scale attribute?? input_x->fpga_args().scale_pointer(); // fc input has scale attribute??
convArgs.output.address = (void *)out_ptr; convArgs.output.address = (void *)out_ptr;
......
...@@ -14,7 +14,6 @@ limitations under the License. */ ...@@ -14,7 +14,6 @@ limitations under the License. */
#ifdef POOL_OP #ifdef POOL_OP
#include "operators/kernel/pool_kernel.h" #include "operators/kernel/pool_kernel.h"
#include "fpga/api/fpga_api.h"
class PoolingArgs; class PoolingArgs;
namespace paddle_mobile { namespace paddle_mobile {
...@@ -25,7 +24,7 @@ bool PoolKernel<FPGA, float>::Init(PoolParam *param) { ...@@ -25,7 +24,7 @@ bool PoolKernel<FPGA, float>::Init(PoolParam *param) {
const Tensor *input = param->Input(); const Tensor *input = param->Input();
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
Tensor *output = param->Output(); Tensor *output = param->Output();
auto output_ptr = output->data<float>(); auto output_ptr = output->mutable_data<float>();
vector<int> ksize = param->Ksize(); vector<int> ksize = param->Ksize();
vector<int> strides = param->Strides(); vector<int> strides = param->Strides();
vector<int> paddings = param->Paddings(); vector<int> paddings = param->Paddings();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册