diff --git a/CMakeLists.txt b/CMakeLists.txt index 8438ad53b026df19beb9f49becee379a2e45a795..e4cb9180f6ab6dd62265bd07c9f643a5e07be1e7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,15 +1,15 @@ cmake_minimum_required(VERSION 3.6) -option(USE_OPENMP "openmp support" ON) +option(USE_OPENMP "openmp support" OFF) project(paddle-mobile) -option(DEBUGING "enable debug mode" OFF) +option(DEBUGING "enable debug mode" ON) option(USE_EXCEPTION "use std exception" OFF) option(LOG_PROFILE "log profile" OFF) # select the platform to build -option(CPU "armv7 with neon" ON) +option(CPU "armv7 with neon" OFF) option(MALI_GPU "mali gpu" OFF) -option(FPGA "fpga" OFF) +option(FPGA "fpga" ON) file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm) file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h) @@ -94,6 +94,8 @@ else() endif() if(FPGA) + set(DEBUGING ON) + add_definitions(-DPADDLE_MOBILE_DEBUG) add_definitions(-DPADDLE_MOBILE_FPGA) else() file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/*.cpp src/operators/kernel/fpga/*.cc) @@ -140,7 +142,12 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY build) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build) # NET default -set(NET "default" CACHE STRING "select net type") +if (FPGA) + set(NET "FPGAnets" CACHE STRING "select net type") +else() + set(NET "default" CACHE STRING "select net type") +endif() + set_property(CACHE NET PROPERTY STRINGS "default" "googlenet" "mobilenet" "yolo" "squeezenet" "FPGAnets" "NLP") include("${CMAKE_CURRENT_LIST_DIR}/tools/op.cmake") diff --git a/src/operators/feed_op.h b/src/operators/feed_op.h index 7982735030690a9d3fe75cbadeb45f0f70a78836..7098e5f08cc4220c28616b922a2a74245de5bc3a 100644 --- a/src/operators/feed_op.h +++ b/src/operators/feed_op.h @@ -35,6 +35,7 @@ class FeedOp : public framework::OperatorBase { auto out_dims = param_.Out()->dims(); out_dims[0] = param_.BatchSize(); param_.Out()->Resize(out_dims); + DLOG << "feed_op output dims size" << out_dims.size(); // note : mobile infershape iscalled when executer is created. so do not // pass lod here . @@ -49,7 +50,7 @@ class FeedOp : public framework::OperatorBase { } void RunImpl() const { - Tensor *input = const_cast(param_.InputX()); + auto input = (Tensor *)const_cast(param_.InputX()); auto input_ptr = input->data(); fpga::format_image(input); Tensor *output = param_.Out(); diff --git a/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp index c1a9710803a8ea9052d6e355aa57a6bdff380846..9840f495e89a3e63990bf5f10c65cf4afe8d0854 100644 --- a/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp @@ -22,8 +22,8 @@ template <> bool ElementwiseAddReluKernel::Init( ElementwiseAddReluParam *param) { bool relu_enabled = true; - auto *input_x = const_cast(param->InputX()); - auto *input_y = const_cast(param->InputY()); + auto *input_x = const_cast(param->InputX()); + auto *input_y = const_cast(param->InputY()); auto *out = param->Out(); auto input_x_ptr = input_x->data(); auto input_y_ptr = input_y->data(); diff --git a/src/operators/kernel/fpga/fc_relu_kernel.cpp b/src/operators/kernel/fpga/fc_relu_kernel.cpp index 6636096d1cc159701c7a2aa214251b1b4622fc41..c188831e1303e365275283adb54f55d571aca52d 100644 --- a/src/operators/kernel/fpga/fc_relu_kernel.cpp +++ b/src/operators/kernel/fpga/fc_relu_kernel.cpp @@ -20,7 +20,7 @@ namespace operators { template <> bool FusionFcReluKernel::Init(FusionFcReluParam *param) { bool relu_enabled = true; - auto *input_x = const_cast(param->InputX()); + auto *input_x = const_cast(param->InputX()); auto input_x_ptr = input_x->data(); auto *filter = const_cast(param->InputY()); const Tensor *input_z = param->InputZ(); diff --git a/src/operators/kernel/fpga/fusion_fc_kernel.cpp b/src/operators/kernel/fpga/fusion_fc_kernel.cpp index 15392dd146a8fed8b4f5edb946b00d6510dd3732..6821e135085ce2a57ea58e4657549c54a1246342 100644 --- a/src/operators/kernel/fpga/fusion_fc_kernel.cpp +++ b/src/operators/kernel/fpga/fusion_fc_kernel.cpp @@ -21,7 +21,7 @@ namespace operators { template <> bool FusionFcKernel::Init(FusionFcParam *param) { bool relu_enabled = false; - auto *input_x = const_cast(param->InputX()); + auto *input_x = const_cast(param->InputX()); auto input_x_ptr = input_x->data(); auto *filter = const_cast(param->InputY()); const Tensor *input_z = param->InputZ(); diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 551ec0100431e32a775442066dae4964d484832a..e7b02d0fc3a15ade9ca758e2eb47fb2d6d7c517a 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -56,7 +56,7 @@ struct DtypeTensorTrait { template <> struct DtypeTensorTrait { // This is the type we obtained in variable. - typedef framework::Tensor gtype; + typedef framework::LoDTensor gtype; // This type will be the parent class type // or the same type. typedef framework::Tensor rtype;