提交 e82bc1f8 编写于 作者: qnqinan's avatar qnqinan

fix some bugs of new added FPGA ops and kernels

上级 380c55f2
......@@ -6,9 +6,15 @@ option(USE_OPENMP "openmp support" OFF)
option(USE_EXCEPTION "use std exception" ON)
option(LOG_PROFILE "log profile" ON)
# select the platform to build
option(CPU "armv7 with neon" ON)
option(CPU "armv7 with neon" OFF)
option(MALI_GPU "mali gpu" OFF)
option(FPGA "fpga" OFF)
option(FPGA "fpga" ON)
SET(FPGA ON)
SET(FUSION_ELEMENTWISEADDRELU_OP ON)
SET(FUSION_FC_OP ON)
SET(FUSION_FCRELU_OP ON)
SET(POOL_OP ON)
SET(DROPOUT_OP ON)
file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm)
file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h)
......@@ -139,7 +145,8 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY build)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build)
# NET default
set(NET "default" CACHE STRING "select net type")
#set(NET "default" CACHE STRING "select net type")
set(NET "FPGAnets" CACHE STRING "select net type")
set_property(CACHE NET PROPERTY STRINGS "default" "googlenet" "mobilenet" "yolo" "squeezenet" "FPGAnets")
include("${CMAKE_CURRENT_LIST_DIR}/tools/op.cmake")
......@@ -151,7 +158,7 @@ if (ANDROID_NDK_TOOLCHAIN_INCLUDED)
elseif(IS_IOS)
add_library(paddle-mobile STATIC ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
else ()
add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H} src/operators/kernel/fc_relu_kernel.h src/operators/kernel/fc_relu_kernel.h src/operators/kernel/fpga/fusion_fc_kernel.cpp src/operators/kernel/fpga/fc_relu_kernel.cpp src/operators/fusion_elementwise_add_relu_op.h src/operators/fusion_elementwise_add_relu_op.cpp src/operators/kernel/elementwise_add_relu_kernel.h src/operators/kernel/fpga/pool_kernel.cpp src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp src/operators/kernel/fpga/dropout_kernel.cpp)
endif ()
# unit test
......
......@@ -30,12 +30,12 @@ void FusionElementwiseAddReluOp<Dtype, T>::InferShape() const {
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(fusion_elementwise_add_relu,
ops::FusionElementwiseAddReluOp);
// REGISTER_OPERATOR_CPU(fusion_elementwise_add_relu,
// ops::FusionElementwiseAddReluOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(fusion_elementwise_add_relu,
ops::FusionElementwiseAddReluOp);
// REGISTER_OPERATOR_MALI_GPU(fusion_elementwise_add_relu,
// ops::FusionElementwiseAddReluOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_elementwise_add_relu,
......
......@@ -18,12 +18,29 @@ limitations under the License. */
#include <string>
#include "framework/operator.h"
#include "kernel/elementwise_add_relu_kernel.h"
#include "operators/op_param.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "operators/kernel/elementwise_add_relu_kernel.h"
namespace paddle_mobile {
namespace operators {
using std::string;
using std::vector;
class FusioneElementwiseAddReluMatcher : public framework::FusionOpMatcher {
public:
FusioneElementwiseAddReluMatcher() {
node_ = framework::Node(G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU);
node_ > std::make_shared<framework::Node>(G_OP_TYPE_RELU);
}
void FolderNodes(
framework::Node *node,
std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
node->Folder(node_.Depth(), Type(), {}, removed_nodes);
}
std::string Type() { return G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU; }
};
template <typename DeviceType, typename T>
class FusionElementwiseAddReluOp
: public framework::OperatorWithKernel<
......@@ -39,13 +56,38 @@ class FusionElementwiseAddReluOp
operators::ElementwiseAddReluKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, ElementwiseAddReluParam,
operators::ElementwiseAddReluKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override;
protected:
};
#ifdef PADDLE_MOBILE_CPU
/*
#ifndef FUSION_ELEMENTWISE_ADD_RELU_REGISTER
static framework::FusionOpRegistrar fusion_elementwise_relu_registrar(
new FusioneElementwiseAddReluMatcher());
#define FUSION_ELEMENTWISE_ADD_RELU_REGISTER
#endif
*/
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
/*
#ifndef FUSION_ELEMENTWISE_ADD_RELU_REGISTER
static framework::FusionOpRegistrar fusion_elementwise_relu_registrar(
new FusioneElementwiseAddReluMatcher());
#define FUSION_ELEMENTWISE_ADD_RELU_REGISTER
#endif
*/
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifndef FUSION_ELEMENTWISE_ADD_RELU_REGISTER
static framework::FusionOpRegistrar fusion_elementwise_relu_registrar(
new FusioneElementwiseAddReluMatcher());
#define FUSION_ELEMENTWISE_ADD_RELU_REGISTER
#endif
} // namespace operators
} // namespace paddle_mobile
......@@ -53,10 +95,10 @@ class FusionElementwiseAddReluOp
USE_OP_CPU(fusion_elementwise_add_relu);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
USE_OP_MALI_GPU(fusion_elementwise_add_relu);
#endif
#ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(fusion_elementwise_add_relu);
#endif
#endif
#endif
......@@ -67,8 +67,8 @@ class FusionFcOp
#ifdef PADDLE_MOBILE_CPU
#ifndef CONV_CPU_REGISTER
#define CONV_CPU_REGISTER
#ifndef FUSION_FC_CPU_REGISTER
#define FUSION_FC_CPU_REGISTER
static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
#endif
......@@ -84,6 +84,10 @@ static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifndef FUSION_FC_CPU_REGISTER
#define FUSION_FC_CPU_REGISTER
static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
#endif
#endif
} // namespace operators
......
......@@ -18,7 +18,7 @@ limitations under the License. */
#include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "operators/kernel/fusion_fc_relu_kernel.h"
#include "operators/kernel/fc_relu_kernel.h"
namespace paddle_mobile {
namespace operators {
......
......@@ -17,7 +17,6 @@ limitations under the License. */
#pragma once
#include "framework/operator.h"
#include "operators/math/elementwise_op_function.h"
#include "operators/op_param.h"
namespace paddle_mobile {
......
......@@ -14,7 +14,6 @@ limitations under the License. */
#ifdef FUSION_ELEMENTWISEADDRELU_OP
#include "operators/kernel/elementwise_add_relu_kernel.h"
#include "fpga/api/fpga_api.h"
namespace paddle_mobile {
namespace operators {
......@@ -28,7 +27,7 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
Tensor *out = param->Out();
auto input_x_ptr = input_x->data<float>();
auto input_y_ptr = input_y->data<float>();
auto out_ptr = out->data<float>();
auto out_ptr = out->mutable_data<float>();
fpga::EWAddArgs ewaddArgs;
ewaddArgs.relu_enabled = relu_enabled;
......@@ -40,16 +39,16 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
input_x->fpga_args().scale_pointer(); // ew has scale attribute??
ewaddArgs.image0.height = input_x->dims()[2];
ewaddArgs.image0.width = input_x->dims()[3];
ewaddArgs.image0.pad_height = 1;
ewaddArgs.image0.pad_width = 1;
ewaddArgs.image0.pad_height = 0;
ewaddArgs.image0.pad_width = 0;
ewaddArgs.image1.address = (void *)input_y_ptr;
ewaddArgs.image1.channels = input_y->dims()[1];
ewaddArgs.image1.scale_address =
input_y->fpga_args().scale_pointer(); // ew has scale attribute??
ewaddArgs.image1.height = input_y->dims()[2];
ewaddArgs.image1.width = input_y->dims()[3];
ewaddArgs.image1.pad_height = 1;
ewaddArgs.image1.pad_width = 1;
ewaddArgs.image1.pad_height = 0;
ewaddArgs.image1.pad_width = 0;
ewaddArgs.output.scale_address = out->fpga_args().scale_pointer();
ewaddArgs.output.address = (void *)out_ptr;
param->SetFpgaArgs(ewaddArgs);
......
......@@ -21,7 +21,6 @@ namespace operators {
template <>
bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam *param) {
bool relu_enabled = true;
bool bn_enabled = false;
const Tensor *input_x = param->InputX();
auto input_x_ptr = input_x->data<float>();
const Tensor *input_y = param->InputY();
......@@ -31,8 +30,8 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam *param) {
Tensor *out = param->Out();
auto out_ptr = out->mutable_data<float>();
PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_z->dims()[0],
"Image channel should be equal to bias number");
PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0],
"Image channel should be equal to weight number");
int channel = input_x->dims()[1];
float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
for (int i = 0; i < channel; i++) {
......@@ -55,8 +54,8 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam *param) {
convArgs.image.channels = input_x->dims()[1];
convArgs.image.height = input_x->dims()[2];
convArgs.image.width = input_x->dims()[3];
convArgs.image.pad_height = 1;
convArgs.image.pad_width = 1;
convArgs.image.pad_height = 0;
convArgs.image.pad_width = 0;
convArgs.image.scale_address =
input_x->fpga_args().scale_pointer(); // fc input has scale attribute??
convArgs.output.address = (void *)out_ptr;
......
......@@ -21,7 +21,6 @@ namespace operators {
template <>
bool FusionFcKernel<FPGA, float>::Init(FusionFcParam *param) {
bool relu_enabled = false;
bool bn_enabled = false;
const Tensor *input_x = param->InputX();
auto input_x_ptr = input_x->data<float>();
const Tensor *input_y = param->InputY();
......@@ -31,8 +30,8 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam *param) {
Tensor *out = param->Out();
auto out_ptr = out->mutable_data<float>();
PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_z->dims()[0],
"Image channel should be equal to bias number");
PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0],
"Image channel should be equal to weight number");
int channel = input_x->dims()[1];
float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
for (int i = 0; i < channel; i++) {
......@@ -55,8 +54,8 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam *param) {
convArgs.image.channels = input_x->dims()[1];
convArgs.image.height = input_x->dims()[2];
convArgs.image.width = input_x->dims()[3];
convArgs.image.pad_height = 1;
convArgs.image.pad_width = 1;
convArgs.image.pad_height = 0;
convArgs.image.pad_width = 0;
convArgs.image.scale_address =
input_x->fpga_args().scale_pointer(); // fc input has scale attribute??
convArgs.output.address = (void *)out_ptr;
......
......@@ -14,7 +14,6 @@ limitations under the License. */
#ifdef POOL_OP
#include "operators/kernel/pool_kernel.h"
#include "fpga/api/fpga_api.h"
class PoolingArgs;
namespace paddle_mobile {
......@@ -25,7 +24,7 @@ bool PoolKernel<FPGA, float>::Init(PoolParam *param) {
const Tensor *input = param->Input();
auto input_ptr = input->data<float>();
Tensor *output = param->Output();
auto output_ptr = output->data<float>();
auto output_ptr = output->mutable_data<float>();
vector<int> ksize = param->Ksize();
vector<int> strides = param->Strides();
vector<int> paddings = param->Paddings();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册