diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8438ad53b026df19beb9f49becee379a2e45a795..e4cb9180f6ab6dd62265bd07c9f643a5e07be1e7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,15 +1,15 @@
 cmake_minimum_required(VERSION 3.6)
-option(USE_OPENMP "openmp support" ON)
+option(USE_OPENMP "openmp support" OFF)
 
 project(paddle-mobile)
 
-option(DEBUGING "enable debug mode" OFF)
+option(DEBUGING "enable debug mode" ON)
 option(USE_EXCEPTION "use std exception" OFF)
 option(LOG_PROFILE "log profile" OFF)
 # select the platform to build
-option(CPU "armv7 with neon" ON)
+option(CPU "armv7 with neon" OFF)
 option(MALI_GPU "mali gpu" OFF)
-option(FPGA "fpga" OFF)
+option(FPGA "fpga" ON)
 
 file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm)
 file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h)
@@ -94,6 +94,8 @@ else()
 endif()
 
 if(FPGA)
+    set(DEBUGING ON)
+    add_definitions(-DPADDLE_MOBILE_DEBUG)
     add_definitions(-DPADDLE_MOBILE_FPGA)
 else()
     file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/*.cpp src/operators/kernel/fpga/*.cc)
@@ -140,7 +142,12 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY build)
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build)
 
 # NET default
-set(NET "default" CACHE STRING "select net type")
+if (FPGA)
+    set(NET "FPGAnets" CACHE STRING "select net type")
+else()
+    set(NET "default" CACHE STRING "select net type")
+endif()
+
 set_property(CACHE NET PROPERTY STRINGS "default" "googlenet" "mobilenet" "yolo" "squeezenet" "FPGAnets" "NLP")
 include("${CMAKE_CURRENT_LIST_DIR}/tools/op.cmake")
 
diff --git a/src/operators/feed_op.h b/src/operators/feed_op.h
index 7982735030690a9d3fe75cbadeb45f0f70a78836..7098e5f08cc4220c28616b922a2a74245de5bc3a 100644
--- a/src/operators/feed_op.h
+++ b/src/operators/feed_op.h
@@ -35,6 +35,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
     auto out_dims = param_.Out()->dims();
     out_dims[0] = param_.BatchSize();
     param_.Out()->Resize(out_dims);
+    DLOG << "feed_op output dims size" << out_dims.size();
 
     //  note : mobile infershape iscalled when executer is created.  so  do not
     //  pass lod here .
@@ -49,7 +50,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
   }
 
   void RunImpl() const {
-    Tensor *input = const_cast<Tensor *>(param_.InputX());
+    auto input = (Tensor *)const_cast<LoDTensor *>(param_.InputX());
     auto input_ptr = input->data<float>();
     fpga::format_image(input);
     Tensor *output = param_.Out();
diff --git a/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp
index c1a9710803a8ea9052d6e355aa57a6bdff380846..9840f495e89a3e63990bf5f10c65cf4afe8d0854 100644
--- a/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp
@@ -22,8 +22,8 @@ template <>
 bool ElementwiseAddReluKernel<FPGA, float>::Init(
     ElementwiseAddReluParam<FPGA> *param) {
   bool relu_enabled = true;
-  auto *input_x = const_cast<Tensor *>(param->InputX());
-  auto *input_y = const_cast<Tensor *>(param->InputY());
+  auto *input_x = const_cast<LoDTensor *>(param->InputX());
+  auto *input_y = const_cast<LoDTensor *>(param->InputY());
   auto *out = param->Out();
   auto input_x_ptr = input_x->data<float>();
   auto input_y_ptr = input_y->data<float>();
diff --git a/src/operators/kernel/fpga/fc_relu_kernel.cpp b/src/operators/kernel/fpga/fc_relu_kernel.cpp
index 6636096d1cc159701c7a2aa214251b1b4622fc41..c188831e1303e365275283adb54f55d571aca52d 100644
--- a/src/operators/kernel/fpga/fc_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/fc_relu_kernel.cpp
@@ -20,7 +20,7 @@ namespace operators {
 template <>
 bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
   bool relu_enabled = true;
-  auto *input_x = const_cast<Tensor *>(param->InputX());
+  auto *input_x = const_cast<LoDTensor *>(param->InputX());
   auto input_x_ptr = input_x->data<float>();
   auto *filter = const_cast<Tensor *>(param->InputY());
   const Tensor *input_z = param->InputZ();
diff --git a/src/operators/kernel/fpga/fusion_fc_kernel.cpp b/src/operators/kernel/fpga/fusion_fc_kernel.cpp
index 15392dd146a8fed8b4f5edb946b00d6510dd3732..6821e135085ce2a57ea58e4657549c54a1246342 100644
--- a/src/operators/kernel/fpga/fusion_fc_kernel.cpp
+++ b/src/operators/kernel/fpga/fusion_fc_kernel.cpp
@@ -21,7 +21,7 @@ namespace operators {
 template <>
 bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
   bool relu_enabled = false;
-  auto *input_x = const_cast<Tensor *>(param->InputX());
+  auto *input_x = const_cast<LoDTensor *>(param->InputX());
   auto input_x_ptr = input_x->data<float>();
   auto *filter = const_cast<Tensor *>(param->InputY());
   const Tensor *input_z = param->InputZ();
diff --git a/src/operators/op_param.h b/src/operators/op_param.h
index 551ec0100431e32a775442066dae4964d484832a..e7b02d0fc3a15ade9ca758e2eb47fb2d6d7c517a 100644
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -56,7 +56,7 @@ struct DtypeTensorTrait<CPU> {
 template <>
 struct DtypeTensorTrait<FPGA> {
   // This is the type we obtained in variable.
-  typedef framework::Tensor gtype;
+  typedef framework::LoDTensor gtype;
   // This type will be the parent class type
   // or the same type.
   typedef framework::Tensor rtype;