From f19fc51b67954ae039fc9c7d0014a9b2c26abddb Mon Sep 17 00:00:00 2001 From: zhangyang Date: Sat, 8 Sep 2018 17:15:10 +0800 Subject: [PATCH] Modify FPGA kernels to comply to Gtype --- CMakeLists.txt | 17 ++++++++++++----- src/operators/feed_op.h | 3 ++- .../kernel/fpga/elementwise_add_relu_kernel.cpp | 4 ++-- src/operators/kernel/fpga/fc_relu_kernel.cpp | 2 +- src/operators/kernel/fpga/fusion_fc_kernel.cpp | 2 +- src/operators/op_param.h | 2 +- 6 files changed, 19 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8438ad53b0..e4cb9180f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,15 +1,15 @@ cmake_minimum_required(VERSION 3.6) -option(USE_OPENMP "openmp support" ON) +option(USE_OPENMP "openmp support" OFF) project(paddle-mobile) -option(DEBUGING "enable debug mode" OFF) +option(DEBUGING "enable debug mode" ON) option(USE_EXCEPTION "use std exception" OFF) option(LOG_PROFILE "log profile" OFF) # select the platform to build -option(CPU "armv7 with neon" ON) +option(CPU "armv7 with neon" OFF) option(MALI_GPU "mali gpu" OFF) -option(FPGA "fpga" OFF) +option(FPGA "fpga" ON) file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm) file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h) @@ -94,6 +94,8 @@ else() endif() if(FPGA) + set(DEBUGING ON) + add_definitions(-DPADDLE_MOBILE_DEBUG) add_definitions(-DPADDLE_MOBILE_FPGA) else() file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/*.cpp src/operators/kernel/fpga/*.cc) @@ -140,7 +142,12 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY build) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build) # NET default -set(NET "default" CACHE STRING "select net type") +if (FPGA) + set(NET "FPGAnets" CACHE STRING "select net type") +else() + set(NET "default" CACHE STRING "select net type") +endif() + set_property(CACHE NET PROPERTY STRINGS "default" "googlenet" "mobilenet" "yolo" "squeezenet" "FPGAnets" "NLP") include("${CMAKE_CURRENT_LIST_DIR}/tools/op.cmake") diff --git a/src/operators/feed_op.h b/src/operators/feed_op.h index 7982735030..7098e5f08c 100644 --- a/src/operators/feed_op.h +++ b/src/operators/feed_op.h @@ -35,6 +35,7 @@ class FeedOp : public framework::OperatorBase { auto out_dims = param_.Out()->dims(); out_dims[0] = param_.BatchSize(); param_.Out()->Resize(out_dims); + DLOG << "feed_op output dims size" << out_dims.size(); // note : mobile infershape iscalled when executer is created. so do not // pass lod here . @@ -49,7 +50,7 @@ class FeedOp : public framework::OperatorBase { } void RunImpl() const { - Tensor *input = const_cast(param_.InputX()); + auto input = (Tensor *)const_cast(param_.InputX()); auto input_ptr = input->data(); fpga::format_image(input); Tensor *output = param_.Out(); diff --git a/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp index c1a9710803..9840f495e8 100644 --- a/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp @@ -22,8 +22,8 @@ template <> bool ElementwiseAddReluKernel::Init( ElementwiseAddReluParam *param) { bool relu_enabled = true; - auto *input_x = const_cast(param->InputX()); - auto *input_y = const_cast(param->InputY()); + auto *input_x = const_cast(param->InputX()); + auto *input_y = const_cast(param->InputY()); auto *out = param->Out(); auto input_x_ptr = input_x->data(); auto input_y_ptr = input_y->data(); diff --git a/src/operators/kernel/fpga/fc_relu_kernel.cpp b/src/operators/kernel/fpga/fc_relu_kernel.cpp index 6636096d1c..c188831e13 100644 --- a/src/operators/kernel/fpga/fc_relu_kernel.cpp +++ b/src/operators/kernel/fpga/fc_relu_kernel.cpp @@ -20,7 +20,7 @@ namespace operators { template <> bool FusionFcReluKernel::Init(FusionFcReluParam *param) { bool relu_enabled = true; - auto *input_x = const_cast(param->InputX()); + auto *input_x = const_cast(param->InputX()); auto input_x_ptr = input_x->data(); auto *filter = const_cast(param->InputY()); const Tensor *input_z = param->InputZ(); diff --git a/src/operators/kernel/fpga/fusion_fc_kernel.cpp b/src/operators/kernel/fpga/fusion_fc_kernel.cpp index 15392dd146..6821e13508 100644 --- a/src/operators/kernel/fpga/fusion_fc_kernel.cpp +++ b/src/operators/kernel/fpga/fusion_fc_kernel.cpp @@ -21,7 +21,7 @@ namespace operators { template <> bool FusionFcKernel::Init(FusionFcParam *param) { bool relu_enabled = false; - auto *input_x = const_cast(param->InputX()); + auto *input_x = const_cast(param->InputX()); auto input_x_ptr = input_x->data(); auto *filter = const_cast(param->InputY()); const Tensor *input_z = param->InputZ(); diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 551ec01004..e7b02d0fc3 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -56,7 +56,7 @@ struct DtypeTensorTrait { template <> struct DtypeTensorTrait { // This is the type we obtained in variable. - typedef framework::Tensor gtype; + typedef framework::LoDTensor gtype; // This type will be the parent class type // or the same type. typedef framework::Tensor rtype; -- GitLab