diff --git a/src/fpga/api/fpga_api.cpp b/src/fpga/api/fpga_api.cpp
index f91c21beb2d6b5fbce86b56d49b7d8c6a3ec9219..779c846d1f3c465e5113f805b2b3856a1a7894c5 100644
--- a/src/fpga/api/fpga_api.cpp
+++ b/src/fpga/api/fpga_api.cpp
@@ -35,7 +35,7 @@ namespace fpga {
 static int fd = -1;
 static const char *device_path = "/dev/fpgadrv0";
 
-static inline int do_ioctl(int req, void *arg) {
+static inline int do_ioctl(int req, const void *arg) {
   return ioctl(req, (unsigned int64_t)arg);
 }
 
@@ -58,12 +58,17 @@ void fpga_copy(void *dest, const void *src, size_t num) {
   memcpy(dest, src, num);
 }
 
-int ComputeFpgaConv(const struct ConvArgs &args) { return do_ioctl(21, &args); }
+int ComputeFpgaConv(const struct ConvArgs &args) {
+  return do_ioctl(IOCTL_CONFIG_CONV, &args);
+}
 int ComputeFpgaPool(const struct PoolingArgs &args) {
-  return do_ioctl(22, &args);
+  return do_ioctl(IOCTL_CONFIG_POOLING, &args);
 }
 int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
-  return do_ioctl(23, &args);
+  return do_ioctl(IOCTL_CONFIG_EW, &args);
+}
+int PerformBypass(const struct BypassArgs &args) {
+  return do_ioctl(IOCTL_CONFIG_BYPASS, &args);
 }
 
 }  // namespace fpga
diff --git a/src/fpga/api/fpga_api.h b/src/fpga/api/fpga_api.h
index 08635cdb5c01b50f59eb35554bba9a7b70f6ebfb..0823e19a7f9dfaba709b6ad2723e3228c27e2e0f 100644
--- a/src/fpga/api/fpga_api.h
+++ b/src/fpga/api/fpga_api.h
@@ -86,12 +86,12 @@ struct ImageOutputArgs {
 
 struct ConvArgs {
   bool relu_enabled;
-  void* bias_address;
+  void* sb_address;  // scale and bias are interlaced;
   void* filter_address;
+  float* filter_scale_address;
   uint32_t filter_num;
   uint32_t group_num;
 
-  void* sb_address;  // scale and bias are interlaced;
   struct KernelArgs kernel;
   struct ImageInputArgs image;  // input image;
   struct ImageOutputArgs output;
@@ -116,6 +116,7 @@ struct EWAddArgs {
 
 struct BypassArgs {
   enum DataConvertType convert_type;
+  enum LayoutConvertType layout_type;
   struct ImageInputArgs image;
   struct ImageOutputArgs output;
 };
@@ -125,11 +126,6 @@ struct FpgaRegWriteArgs {
   uint64_t value;
 };
 
-struct FpgaRegReadArgs {
-  uint64_t address;
-  uint64_t value;
-};
-
 #define IOCTL_FPGA_MAGIC 'FPGA'
 
 #define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs)
@@ -143,6 +139,7 @@ struct FpgaRegReadArgs {
 #define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs)
 #define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs)
 #define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs)
+#define IOCTL_CONFIG_BYPASS _IOW(IOCTL_FPGA_MAGIC, 24, struct BypassArgs)
 #define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 28, struct FpgaRegReadArgs)
 #define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 29, struct FpgaRegWriteArgs)
 
@@ -172,6 +169,7 @@ enum FPGA_ERR_TYPE {
 
 //============================== API =============================
 
+int PerformBypass(const struct BypassArgs& args);
 int ComputeFpgaConv(const struct ConvArgs& args);
 int ComputeFpgaPool(const struct PoolingArgs& args);
 int ComputeFpgaEWAdd(const struct EWAddArgs& args);
diff --git a/src/fpga/fpga_quantilization.h b/src/fpga/fpga_quantilization.h
index d2d2d61835de84c94760c10a25a973d4eaff1fbe..7a1df04732580c7225423cedeb277beca3edc154 100644
--- a/src/fpga/fpga_quantilization.h
+++ b/src/fpga/fpga_quantilization.h
@@ -13,55 +13,40 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
 
-#include <string>
 #include "common/types.h"
 #include "framework/lod_tensor.h"
-#include "framework/operator.h"
-#include "framework/scope.h"
 #include "framework/tensor.h"
 
 namespace paddle_mobile {
 
-bool is_conv(std::string type) {
-  if (type.compare(G_OP_TYPE_CONV) == 0) {
-    return true;
-  }
-  if (type.compare(G_OP_TYPE_FUSION_CONV_ADD) == 0) {
-    return true;
-  }
-  if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_RELU) == 0) {
-    return true;
-  }
-  if (type.compare(G_OP_TYPE_FUSION_CONV_BN_RELU) == 0) {
-    return true;
-  }
-  if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_BN) == 0) {
-    return true;
-  }
-  return false;
-}
-
 template <typename Dtype>
-void quantilize_op(std::shared_ptr<framework::OperatorBase<Dtype>> op,
-                   std::shared_ptr<framework::Scope> scope) {
-  if (!is_conv(op.get()->Type())) {
-    return;
-  }
-  framework::Tensor* filter = nullptr;
-  auto var_vec = op.get()->Inputs().at("Filter");
-  if (!var_vec.empty()) {
-    auto var = scope.get()->FindVar(var_vec[0]);
-    filter = var->template GetMutable<framework::LoDTensor>();
-  }
+framework::Tensor* quantilize_filter(framework::Tensor* filter) {
   float scale = 0;
-
   // 32bit filter -> 8bit filter;
+  float min = 0f;
+  float max = 0f;
   if (filter->type() == typeid(float)) {
+    float* floatData = originalFilter->data<float>();
+    for (int i = 0; i < filter->numel(); ++i) {
+      min = std::min(min, floatData[i]);
+      max = std::max(max, floatData[i]);
+    }
+
+    float fix_range = (float)((1 << (8 - 1)) - 1);
+    float float_range = max;
+    scale = (float_range / fix_range);
+
     framework::Tensor* originalFilter = filter;
     framework::Tensor* quantFilter = new framework::Tensor();
-    float* floatData = originalFilter->data<float>();
     int8_t* intData = quantFilter->mutable_data<int8_t>();
-  }
+    for (int i = 0; i < filter->numel(); ++i) {
+      intData[i] = (int8_t)floatData[i] * scale;
+    }
+    quantFilter.scale = scale;
+    // NCHW -> NHWC;
+    return quantFilter;
+  }
+  return filter;
 }
 
 }  // namespace paddle_mobile
diff --git a/src/framework/tensor.h b/src/framework/tensor.h
index 8d9407e8ee25a4dadbee16713324f4afa90bb03f..364f79cc84b5a3f4c2aa1838961eb092a9b842f0 100644
--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
@@ -257,10 +257,10 @@ class Tensor {
   struct FPGAArgs {
     float scale;
 
-    inline float *scale_pointer() const { return &scale; }
+    inline const float *scale_pointer() const { return &scale; }
   };
 
-  const struct FPGAArgs &fpga_args() const { return fpgaArgs_; }
+  const struct FPGAArgs fpga_args() const { return fpgaArgs_; }
 #endif
 
  private:
diff --git a/src/io/executor.cpp b/src/io/executor.cpp
index c09fe2c58532437336307ce007532d43689d8fd2..d6434b64aa752fd62bc637a882298228d59880b8 100644
--- a/src/io/executor.cpp
+++ b/src/io/executor.cpp
@@ -32,10 +32,6 @@ limitations under the License. */
 #include "common/threadpool.h"
 #endif
 
-#ifdef PADDLE_MOBILE_FPGA
-#include "fpga/fpga_quantilization.h"
-#endif
-
 namespace paddle_mobile {
 using framework::Variable;
 
@@ -100,11 +96,6 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
   for (const auto &op : ops) {
     op->Init();
   }
-#ifdef PADDLE_MOBILE_FPGA
-  for (const auto &op : ops) {
-    quantilize_op(op, program_.scope);
-  }
-#endif
 }
 
 template <typename Dtype, Precision P>
diff --git a/src/operators/fusion_conv_add_bn_op.cpp b/src/operators/fusion_conv_add_bn_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5b61bf5d390cc2904a3f40f5400a5a3eec9a2dd5
--- /dev/null
+++ b/src/operators/fusion_conv_add_bn_op.cpp
@@ -0,0 +1,61 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVADDBN_OP
+
+#include "operators/fusion_conv_add_bn_op.h"
+#include "operators/math/conv_func.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <typename Dtype, typename T>
+void FusionConvAddBNOp<Dtype, T>::InferShape() const {
+  auto in_dims = this->param_.Input()->dims();
+  auto filter_dims = this->param_.Filter()->dims();
+  const std::vector<int> &strides = this->param_.Strides();
+  std::vector<int> paddings = this->param_.Paddings();
+  int groups = this->param_.Groups();
+  std::vector<int> dilations = this->param_.Dilations();
+
+  PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
+                         dilations.size() == paddings.size() &&
+                         paddings.size() == strides.size()),
+                        "ConvParam is not suitable");
+
+  std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
+  for (size_t i = 0; i < strides.size(); ++i) {
+    output_shape.push_back(
+        math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
+                             paddings[i], strides[i]));
+  }
+
+  framework::DDim ddim = framework::make_ddim(output_shape);
+  this->param_.Output()->Resize(ddim);
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+namespace ops = paddle_mobile::operators;
+#ifdef PADDLE_MOBILE_CPU
+REGISTER_OPERATOR_CPU(fusion_conv_add_bn, ops::FusionConvAddBNOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+REGISTER_OPERATOR_FPGA(fusion_conv_add_bn, ops::FusionConvAddBNOp);
+#endif
+
+#endif
diff --git a/src/operators/fusion_conv_add_bn_op.h b/src/operators/fusion_conv_add_bn_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..7a7f6b2bababd3f5d36d7b6faf60069567d45423
--- /dev/null
+++ b/src/operators/fusion_conv_add_bn_op.h
@@ -0,0 +1,115 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVADDBN_OP
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include "framework/operator.h"
+#include "framework/program/program-optimize/fusion_op_register.h"
+#include "op_param.h"
+#include "operators/kernel/conv_add_bn_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+using std::string;
+using std::vector;
+class FusionConvAddBNMatcher : public framework::FusionOpMatcher {
+ public:
+  FusionConvAddBNMatcher() {
+    node_ = framework::Node(G_OP_TYPE_CONV);
+    node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
+        std::make_shared<framework::Node>(G_OP_TYPE_BATCHNORM);
+  }
+
+  void FolderNodes(
+      framework::Node *node,
+      std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
+    node->Folder(node_.Depth(), Type(),
+                 {{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}}},
+                  {G_OP_TYPE_BATCHNORM,
+                   {{"Scale", "Scale"},
+                    {"Mean", "Mean"},
+                    {"Bias", "Bias"},
+                    {"Variance", "Variance"}}}},
+                 removed_nodes);
+  }
+
+  std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_BN; }
+};
+
+template <typename DeviceType, typename T>
+class FusionConvAddBNOp : public framework::OperatorWithKernel<
+                              DeviceType, FusionConvAddBNParam,
+                              operators::ConvAddBNKernel<DeviceType, T>> {
+ public:
+  FusionConvAddBNOp(const string &type, const VariableNameMap &inputs,
+                    const VariableNameMap &outputs,
+                    const framework::AttributeMap &attrs,
+                    std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<
+            DeviceType, FusionConvAddBNParam,
+            operators::ConvAddBNKernel<DeviceType, T>>(type, inputs, outputs,
+                                                       attrs, scope) {}
+
+  void InferShape() const override;
+
+ protected:
+};
+
+#ifdef PADDLE_MOBILE_CPU
+
+#ifndef FUSION_CONV_ADD_BN_REGISTER
+static framework::FusionOpRegistrar fusion_conv_add_bn_registrar(
+    new FusionConvAddBNMatcher());
+#define FUSION_CONV_ADD_BN_REGISTER
+#endif
+
+#endif
+
+#ifdef PADDLE_MOBILE_MALI_GPU
+
+#ifndef FUSION_CONV_ADD_BN_REGISTER
+static framework::FusionOpRegistrar fusion_conv_add_bn_registrar(
+    new FusionConvAddBNMatcher());
+#define FUSION_CONV_ADD_BN_REGISTER
+#endif
+
+#endif
+
+#ifdef PADDLE_MOBILE_FPGA
+
+#ifndef FUSION_CONV_ADD_BN_REGISTER
+static framework::FusionOpRegistrar fusion_conv_add_bn_registrar(
+    new FusionConvAddBNMatcher());
+#define FUSION_CONV_ADD_BN_REGISTER
+#endif
+
+#endif
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(fusion_conv_add_bn);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+USE_OP_FPGA(fusion_conv_add_bn);
+#endif
+
+#endif
diff --git a/src/operators/fusion_conv_add_bn_relu_op.cpp b/src/operators/fusion_conv_add_bn_relu_op.cpp
index 16f4650a64ec0c363d5fa94ee27c15c73cf58a70..793634eec392fabe6c7399127ec9cb3e187697bc 100644
--- a/src/operators/fusion_conv_add_bn_relu_op.cpp
+++ b/src/operators/fusion_conv_add_bn_relu_op.cpp
@@ -55,6 +55,7 @@ REGISTER_OPERATOR_CPU(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp);
 #ifdef PADDLE_MOBILE_MALI_GPU
 #endif
 #ifdef PADDLE_MOBILE_FPGA
+REGISTER_OPERATOR_FPGA(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp);
 #endif
 
 #endif
diff --git a/src/operators/fusion_conv_add_bn_relu_op.h b/src/operators/fusion_conv_add_bn_relu_op.h
index 19e33465c06921e9a6a7beb77053f05a03a6c760..54e7e58f8af4111edd0b86c85bb1cffc87f5cd22 100644
--- a/src/operators/fusion_conv_add_bn_relu_op.h
+++ b/src/operators/fusion_conv_add_bn_relu_op.h
@@ -96,6 +96,13 @@ static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar(
 #endif
 
 #ifdef PADDLE_MOBILE_FPGA
+
+#ifndef FUSION_CONV_ADD_BN_RELU_REGISTER
+static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar(
+    new FusionConvAddBNReluMatcher());
+#define FUSION_CONV_ADD_BN_RELU_REGISTER
+#endif
+
 #endif
 
 }  // namespace operators
@@ -107,6 +114,7 @@ USE_OP_CPU(fusion_conv_add_bn_relu);
 #ifdef PADDLE_MOBILE_MALI_GPU
 #endif
 #ifdef PADDLE_MOBILE_FPGA
+USE_OP_FPGA(fusion_conv_add_bn_relu);
 #endif
 
 #endif
diff --git a/src/operators/fusion_conv_add_relu_op.cpp b/src/operators/fusion_conv_add_relu_op.cpp
index 18618886cccba08c7502b3e1d75fbba9b6916f56..99b770a6c5e3bc89024e467631e129b914f0bcec 100644
--- a/src/operators/fusion_conv_add_relu_op.cpp
+++ b/src/operators/fusion_conv_add_relu_op.cpp
@@ -54,6 +54,7 @@ REGISTER_OPERATOR_CPU(fusion_conv_add_relu, ops::FusionConvAddReluOp);
 #ifdef PADDLE_MOBILE_MALI_GPU
 #endif
 #ifdef PADDLE_MOBILE_FPGA
+REGISTER_OPERATOR_FPGA(fusion_conv_add_relu, ops::FusionConvAddReluOp);
 #endif
 
 #endif
diff --git a/src/operators/fusion_conv_add_relu_op.h b/src/operators/fusion_conv_add_relu_op.h
index 50a4a2c7c64526c9a5dc1057829ed14f09357780..cda97ba1a342e5b9451fd8363643f638792e3579 100644
--- a/src/operators/fusion_conv_add_relu_op.h
+++ b/src/operators/fusion_conv_add_relu_op.h
@@ -75,6 +75,13 @@ class FusionConvAddReluOp : public framework::OperatorWithKernel<
 #ifdef PADDLE_MOBILE_MALI_GPU
 #endif
 #ifdef PADDLE_MOBILE_FPGA
+
+#ifndef CONV_ADD_RELU_REGISTER
+#define CONV_ADD_RELU_REGISTER
+static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(
+    new FusionConvAddReluOpMatcher());
+#endif
+
 #endif
 
 }  // namespace operators
@@ -86,6 +93,7 @@ USE_OP_CPU(fusion_conv_add_relu);
 #ifdef PADDLE_MOBILE_MALI_GPU
 #endif
 #ifdef PADDLE_MOBILE_FPGA
+USE_OP_FPGA(fusion_conv_add_relu);
 #endif
 
 #endif
diff --git a/src/operators/kernel/central-arm-func/mul_arm_func.h b/src/operators/kernel/central-arm-func/mul_arm_func.h
index 9dfb1f48a574156f1b026fc6af3a03d77b81263f..d2da67afe1d2eb746971a2443bdb449eb2b66ec4 100644
--- a/src/operators/kernel/central-arm-func/mul_arm_func.h
+++ b/src/operators/kernel/central-arm-func/mul_arm_func.h
@@ -19,6 +19,40 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace operators {
 
+// 1、如果x,y维度都是2维，
+// x = [[1,2],   y = [[5,6],
+//      [3,4]]        [7,8]]
+// 运算结果为正常矩阵相乘。结果 out =
+//  [[1*5+2*7,1*6+2*8],[3*5+4*7, 3*6+4*8]]
+//
+// 2、如果x的维度大于2或者y的维度大于2,x的维度(2,3,4) ,y的维度(4,1,2)
+// x = [[[1,2,3,4],
+//       [2,3,4,5],
+//       [3,4,5,6]],
+//      [[1,2,3,4],
+//       [2,3,4,5],
+//       [3,4,5,6]]]
+// y = [[[1,2]],
+//      [[3,4]],
+//      [[5,6]],
+//      [[7,8]]]
+// 需要借助x_num_col_dims和y_num_col_dims将x和y的维度转换为2维
+// 从模型中读到参数,x_num_col_dims = 2,y_num_col_dims = 1,左开右闭
+// (1) 将x = (2,3,4)的index [0,x_num_col_dims)部分2,3相乘，得到6，
+//     [x_num_col_dims,xdim.size())部分4相乘，得到4，
+//     将Tensor x的dims重写成(6,4)
+// (2) 将y = (4,1,2)的index [0,y_num_col_dims)部分4相乘，得到4，
+//     [y_num_col_dims,ydim.size())部分1,2相乘，得到2,
+//     将Tensor y的dims重写成(4,2)
+// 并不影响x,y在内存中的分布。
+// x = [[1,2,3,4],             y = [[1,2],
+//      [2,3,4,5],                  [3,4],
+//      [3,4,5,6],   矩阵乘法        [5,6],
+//      [1,2,3,4],                  [7,8]]
+//      [2,3,4,5],
+//      [3,4,5,6]]
+// 结果x(6行4列)乘y(4行2列)，按1中矩阵相乘，结果out(6行2列)
+
 template <typename P>
 void MulCompute(const MulParam &param) {
   const Tensor *input_x = param.InputX();
diff --git a/src/operators/kernel/fpga/conv_kernel.cpp b/src/operators/kernel/conv_add_bn_kernel.h
similarity index 54%
rename from src/operators/kernel/fpga/conv_kernel.cpp
rename to src/operators/kernel/conv_add_bn_kernel.h
index dc537362a216983974bea325433c456136356fc8..cc11ef1d71f402f32b2da6490877626247884a44 100644
--- a/src/operators/kernel/fpga/conv_kernel.cpp
+++ b/src/operators/kernel/conv_add_bn_kernel.h
@@ -12,21 +12,31 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#ifdef CONV_OP
+#pragma once
 
-#include "operators/kernel/conv_kernel.h"
+#ifdef FUSION_CONVADDBN_OP
+
+#include <vector>
+#include "framework/ddim.h"
+#include "framework/operator.h"
+#include "operators/math/conv_func.h"
+#include "operators/math/im2col.h"
+#include "operators/math/math_function.h"
+#include "operators/math/vol2col.h"
+#include "operators/op_param.h"
 
 namespace paddle_mobile {
 namespace operators {
 
-template <>
-bool ConvKernel<FPGA, float>::Init(ConvParam *param) {
-  return true;
-}
+using framework::DDim;
+using framework::OpKernelBase;
 
-template <>
-void ConvKernel<FPGA, float>::Compute(const ConvParam &param) const {}
-template class ConvKernel<FPGA, float>;
+template <typename DeviceType, typename T>
+class ConvAddBNKernel : public OpKernelBase<DeviceType, FusionConvAddBNParam> {
+ public:
+  void Compute(const FusionConvAddBNParam &param) const;
+  bool Init(FusionConvAddBNParam *param);
+};
 
 }  // namespace operators
 }  // namespace paddle_mobile
diff --git a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6f9da6bc1dde924e2c499bb2478d29a8d4a9e5d9
--- /dev/null
+++ b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
@@ -0,0 +1,94 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVADDBN_OP
+
+#include "operators/kernel/conv_add_bn_kernel.h"
+#include "fpga/api/fpga_api.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
+  bool relu_enabled = false;
+  const Tensor *input = param->Input();
+  auto input_ptr = input->data<float>();
+  const Tensor *bias = param->Bias();
+  auto bias_ptr = bias->data<float>();
+  const Tensor *filter = param->Filter();
+  auto filter_ptr = filter->data<float>();
+  Tensor *out = param->Output();
+  auto out_ptr = out->mutable_data<float>();
+  auto bn_mean_ptr = param->InputMean()->data<float>();
+  auto bn_var_ptr = param->InputVariance()->data<float>();
+  auto bn_scale_ptr = param->InputScale()->data<float>();
+  auto bn_bias_ptr = param->InputBias()->data<float>();
+  const float epsilon = param->Epsilon();
+  PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0] &&
+                            bias->dims()[0] == param->InputBias()->dims()[0],
+                        "Image channel should be equal to bias number");
+
+  const int channel = input->dims()[1];
+  float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
+  Tensor *new_scale = new Tensor();
+  Tensor *new_bias = new Tensor();
+  auto new_scale_ptr = new_scale->mutable_data<float>({channel});
+  auto new_bias_ptr = new_bias->mutable_data<float>({channel});
+
+  for (int i = 0; i < channel; i++) {
+    new_scale_ptr[i] = bn_scale_ptr[i] /
+                       static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5));
+    new_bias_ptr[i] =
+        bn_bias_ptr[i] + (bias_ptr[i] - bn_mean_ptr[i]) * new_scale_ptr[i];
+    bs_ptr[i * 2] = new_scale_ptr[i];
+    bs_ptr[i * 2 + 1] = new_bias_ptr[i];
+  }
+  param->SetNewScale(new_scale);
+  param->SetNewBias(new_bias);
+
+  fpga::ConvArgs convArgs;
+  convArgs.relu_enabled = relu_enabled;
+  convArgs.filter_address = (void *)filter_ptr;
+  convArgs.filter_num = filter->dims()[0];
+  convArgs.group_num = param->Groups();
+  convArgs.sb_address = (void *)bs_ptr;
+  convArgs.kernel.stride_h = param->Strides()[0];
+  convArgs.kernel.stride_w = param->Strides()[1];
+  convArgs.kernel.height = filter->dims()[2];
+  convArgs.kernel.width = filter->dims()[3];
+  convArgs.image.address = (void *)input_ptr;
+  convArgs.image.channels = input->dims()[1];
+  convArgs.image.height = input->dims()[2];
+  convArgs.image.width = input->dims()[3];
+  convArgs.image.pad_height = param->Paddings()[0];
+  convArgs.image.pad_width = param->Paddings()[1];
+  convArgs.image.scale_address = input->fpga_args().scale_pointer();
+  convArgs.output.address = (void *)out_ptr;
+  convArgs.output.scale_address = out->fpga_args().scale_pointer();
+  param->SetFpgaArgs(convArgs);
+  return true;
+}
+
+template <>
+void ConvAddBNKernel<FPGA, float>::Compute(
+    const FusionConvAddBNParam &param) const {
+  fpga::ComputeFpgaConv(param.FpgaArgs());
+}
+template class ConvAddBNKernel<FPGA, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..66a593df84c12f87371a9bde9f0aef514b392584
--- /dev/null
+++ b/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
@@ -0,0 +1,94 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVADDBNRELU_OP
+
+#include "operators/kernel/conv_add_bn_relu_kernel.h"
+#include "memory/t_malloc.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConvAddBNReluKernel<FPGA, float>::Init(FusionConvAddBNReluParam *param) {
+  bool relu_enabled = true;
+  const Tensor *input = param->Input();
+  auto input_ptr = input->data<float>();
+  const Tensor *bias = param->Bias();
+  auto bias_ptr = bias->data<float>();
+  const Tensor *filter = param->Filter();
+  auto filter_ptr = filter->data<float>();
+  Tensor *out = param->Output();
+  auto out_ptr = out->mutable_data<float>();
+  auto bn_mean_ptr = param->InputMean()->data<float>();
+  auto bn_var_ptr = param->InputVariance()->data<float>();
+  auto bn_scale_ptr = param->InputScale()->data<float>();
+  auto bn_bias_ptr = param->InputBias()->data<float>();
+  const float epsilon = param->Epsilon();
+  PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0] &&
+                            bias->dims()[0] == param->InputBias()->dims()[0],
+                        "Image channel should be equal to bias number");
+
+  const int channel = input->dims()[1];
+  float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
+  Tensor *new_scale = new Tensor();
+  Tensor *new_bias = new Tensor();
+  auto new_scale_ptr = new_scale->mutable_data<float>({channel});
+  auto new_bias_ptr = new_bias->mutable_data<float>({channel});
+
+  for (int i = 0; i < channel; i++) {
+    new_scale_ptr[i] = bn_scale_ptr[i] /
+                       static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5));
+    new_bias_ptr[i] =
+        bn_bias_ptr[i] + (bias_ptr[i] - bn_mean_ptr[i]) * new_scale_ptr[i];
+    bs_ptr[i * 2] = new_scale_ptr[i];
+    bs_ptr[i * 2 + 1] = new_bias_ptr[i];
+  }
+  param->SetNewScale(new_scale);
+  param->SetNewBias(new_bias);
+
+  fpga::ConvArgs convArgs;
+  convArgs.relu_enabled = relu_enabled;
+  convArgs.filter_address = (void *)filter_ptr;
+  convArgs.filter_num = filter->dims()[0];
+  convArgs.group_num = param->Groups();
+  convArgs.sb_address = (void *)bs_ptr;
+  convArgs.kernel.stride_h = param->Strides()[0];
+  convArgs.kernel.stride_w = param->Strides()[1];
+  convArgs.kernel.height = filter->dims()[2];
+  convArgs.kernel.width = filter->dims()[3];
+  convArgs.image.address = (void *)input_ptr;
+  convArgs.image.channels = input->dims()[1];
+  convArgs.image.height = input->dims()[2];
+  convArgs.image.width = input->dims()[3];
+  convArgs.image.pad_height = param->Paddings()[0];
+  convArgs.image.pad_width = param->Paddings()[1];
+  convArgs.image.scale_address = input->fpga_args().scale_pointer();
+  convArgs.output.address = (void *)out_ptr;
+  convArgs.output.scale_address = out->fpga_args().scale_pointer();
+  param->SetFpgaArgs(convArgs);
+  return true;
+}
+
+template <>
+void ConvAddBNReluKernel<FPGA, float>::Compute(
+    const FusionConvAddBNReluParam &param) const {
+  fpga::ComputeFpgaConv(param.FpgaArgs());
+}
+template class ConvAddBNReluKernel<FPGA, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/fpga/conv_add_relu_kernel.cpp b/src/operators/kernel/fpga/conv_add_relu_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9692bcef872f956e2cdbe82545b3ab4173bf1348
--- /dev/null
+++ b/src/operators/kernel/fpga/conv_add_relu_kernel.cpp
@@ -0,0 +1,78 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVADDRELU_OP
+
+#include "operators/kernel/conv_add_relu_kernel.h"
+#include "common/enforce.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam *param) {
+  bool relu_enabled = true;
+  const Tensor *input = param->Input();
+  auto input_ptr = input->data<float>();
+  const Tensor *bias = param->Bias();
+  auto bias_ptr = bias->data<float>();
+  const Tensor *filter = param->Filter();
+  auto filter_ptr = filter->data<float>();
+  Tensor *out = param->Output();
+  auto out_ptr = out->mutable_data<float>();
+
+  PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0],
+                        "Image channel should be equal to bias number");
+  int channel = input->dims()[1];
+  float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
+  for (int i = 0; i < channel; i++) {
+    bs_ptr[i * 2] = 1;
+    bs_ptr[i * 2 + 1] = bias_ptr[i];
+  }
+
+  fpga::ConvArgs convArgs;
+  convArgs.relu_enabled = relu_enabled;
+  convArgs.filter_address = (void *)filter_ptr;
+  convArgs.filter_num = filter->dims()[0];
+  convArgs.group_num = param->Groups();
+  convArgs.sb_address = (void *)bs_ptr;
+  convArgs.kernel.stride_h = param->Strides()[0];
+  convArgs.kernel.stride_w = param->Strides()[1];
+  convArgs.kernel.height = filter->dims()[2];
+  convArgs.kernel.width = filter->dims()[3];
+  convArgs.image.address = (void *)input_ptr;
+  convArgs.image.channels = input->dims()[1];
+  convArgs.image.height = input->dims()[2];
+  convArgs.image.width = input->dims()[3];
+
+  convArgs.image.pad_height = param->Paddings()[0];
+  convArgs.image.pad_width = param->Paddings()[1];
+  convArgs.image.scale_address = input->fpga_args().scale_pointer();
+  convArgs.output.address = (void *)out_ptr;
+  convArgs.output.scale_address = out->fpga_args().scale_pointer();
+  param->SetFpgaArgs(convArgs);
+  return true;
+}
+
+template <>
+void ConvAddReluKernel<FPGA, float>::Compute(
+    const FusionConvAddReluParam &param) const {
+  fpga::ComputeFpgaConv(param.FpgaArgs());
+}
+template class ConvAddReluKernel<FPGA, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/op_param.h b/src/operators/op_param.h
index 88c1886ad7ade5960d1d8175a1b46e12363ca849..0821ab8c32a6ba232a673ddd100a4e7fe6475571 100644
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -1136,7 +1136,7 @@ class FusionConvAddBNParam : public OpParam {
 
   const Tensor *Filter() const { return filter_; }
 
-  Tensor *OutputY() const { return output_y_; }
+  Tensor *Output() const { return output_y_; }
 
   const vector<int> &Strides() const { return strides_; }
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 8839079fecfdbefcdaff85354d3a6a8208af10ee..5072db53874e0becf1318a26633fb13cc33d07f4 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -21,6 +21,7 @@ elseif("resnet" IN_LIST NET)
     # gen test
     ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h  test_include.h executor_for_test.h)
     target_link_libraries(test-resnet paddle-mobile)
+elseif("FPGAnets" IN_LIST NET)
 else ()
 
     # gen test