diff --git a/src/fpga/api/fpga_api.cpp b/src/fpga/api/fpga_api.cpp
index 8167d4cf7d1fd343e15754bafc7c77ee6f52a822..f91c21beb2d6b5fbce86b56d49b7d8c6a3ec9219 100644
--- a/src/fpga/api/fpga_api.cpp
+++ b/src/fpga/api/fpga_api.cpp
@@ -36,7 +36,7 @@ static int fd = -1;
 static const char *device_path = "/dev/fpgadrv0";
 
 static inline int do_ioctl(int req, void *arg) {
-  return ioctl(req, (long unsigned int)arg);
+  return ioctl(req, (unsigned int64_t)arg);
 }
 
 int open_device() {
@@ -58,9 +58,13 @@ void fpga_copy(void *dest, const void *src, size_t num) {
   memcpy(dest, src, num);
 }
 
-int ComputeFpgaConv(struct ConvArgs args) {}
-int ComputeFpgaPool(struct PoolingArgs args) {}
-int ComputeFpgaEWAdd(struct EWAddArgs args) {}
+int ComputeFpgaConv(const struct ConvArgs &args) { return do_ioctl(21, &args); }
+int ComputeFpgaPool(const struct PoolingArgs &args) {
+  return do_ioctl(22, &args);
+}
+int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
+  return do_ioctl(23, &args);
+}
 
 }  // namespace fpga
 }  // namespace paddle_mobile
diff --git a/src/fpga/api/fpga_api.h b/src/fpga/api/fpga_api.h
index 4225e887c3dadeddf48b173db72412a5209d989d..08635cdb5c01b50f59eb35554bba9a7b70f6ebfb 100644
--- a/src/fpga/api/fpga_api.h
+++ b/src/fpga/api/fpga_api.h
@@ -31,6 +31,18 @@ void* fpga_malloc(size_t size);
 void fpga_free(void* ptr);
 void fpga_copy(void* dst, const void* src, size_t num);
 
+enum DataConvertType {
+  DATA_NO_CONVERT = 0,
+  DATA_FP32_TO_FP16 = 1,
+  DATA_FP16_TO_FP32 = 2,
+};
+
+enum LayoutConvertType {
+  LAYOUT_NO_CONVERT = 0,
+  LAYOUT_CHW_TO_HWC = 1,
+  LAYOUT_HWC_TO_CHW = 2,
+};
+
 struct VersionArgs {
   void* buffer;
 };
@@ -79,7 +91,7 @@ struct ConvArgs {
   uint32_t filter_num;
   uint32_t group_num;
 
-  struct BNArgs bn;
+  void* sb_address;  // scale and bias are interlaced;
   struct KernelArgs kernel;
   struct ImageInputArgs image;  // input image;
   struct ImageOutputArgs output;
@@ -102,6 +114,12 @@ struct EWAddArgs {
   struct ImageOutputArgs output;
 };
 
+struct BypassArgs {
+  enum DataConvertType convert_type;
+  struct ImageInputArgs image;
+  struct ImageOutputArgs output;
+};
+
 struct FpgaRegWriteArgs {
   uint64_t address;  //
   uint64_t value;
@@ -115,8 +133,6 @@ struct FpgaRegReadArgs {
 #define IOCTL_FPGA_MAGIC 'FPGA'
 
 #define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs)
-#define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 02, struct FpgaRegReadArgs)
-#define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 03, struct FpgaRegWriteArgs)
 
 #define IOCTL_SEPARATOR_0 10
 
@@ -127,6 +143,8 @@ struct FpgaRegReadArgs {
 #define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs)
 #define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs)
 #define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs)
+#define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 28, struct FpgaRegReadArgs)
+#define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 29, struct FpgaRegWriteArgs)
 
 enum FPGA_ERR_TYPE {
   ERR_IOCTL_CMD = -1,
@@ -154,9 +172,9 @@ enum FPGA_ERR_TYPE {
 
 //============================== API =============================
 
-int ComputeFpgaConv(struct ConvArgs args);
-int ComputeFpgaPool(struct PoolingArgs args);
-int ComputeFpgaEWAdd(struct EWAddArgs args);
+int ComputeFpgaConv(const struct ConvArgs& args);
+int ComputeFpgaPool(const struct PoolingArgs& args);
+int ComputeFpgaEWAdd(const struct EWAddArgs& args);
 
 }  // namespace fpga
 }  // namespace paddle_mobile
diff --git a/src/fpga/fpga_quantilization.h b/src/fpga/fpga_quantilization.h
new file mode 100644
index 0000000000000000000000000000000000000000..d2d2d61835de84c94760c10a25a973d4eaff1fbe
--- /dev/null
+++ b/src/fpga/fpga_quantilization.h
@@ -0,0 +1,67 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+
+#include <string>
+#include "common/types.h"
+#include "framework/lod_tensor.h"
+#include "framework/operator.h"
+#include "framework/scope.h"
+#include "framework/tensor.h"
+
+namespace paddle_mobile {
+
+bool is_conv(std::string type) {
+  if (type.compare(G_OP_TYPE_CONV) == 0) {
+    return true;
+  }
+  if (type.compare(G_OP_TYPE_FUSION_CONV_ADD) == 0) {
+    return true;
+  }
+  if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_RELU) == 0) {
+    return true;
+  }
+  if (type.compare(G_OP_TYPE_FUSION_CONV_BN_RELU) == 0) {
+    return true;
+  }
+  if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_BN) == 0) {
+    return true;
+  }
+  return false;
+}
+
+template <typename Dtype>
+void quantilize_op(std::shared_ptr<framework::OperatorBase<Dtype>> op,
+                   std::shared_ptr<framework::Scope> scope) {
+  if (!is_conv(op.get()->Type())) {
+    return;
+  }
+  framework::Tensor* filter = nullptr;
+  auto var_vec = op.get()->Inputs().at("Filter");
+  if (!var_vec.empty()) {
+    auto var = scope.get()->FindVar(var_vec[0]);
+    filter = var->template GetMutable<framework::LoDTensor>();
+  }
+  float scale = 0;
+
+  // 32bit filter -> 8bit filter;
+  if (filter->type() == typeid(float)) {
+    framework::Tensor* originalFilter = filter;
+    framework::Tensor* quantFilter = new framework::Tensor();
+    float* floatData = originalFilter->data<float>();
+    int8_t* intData = quantFilter->mutable_data<int8_t>();
+  }
+}
+
+}  // namespace paddle_mobile
diff --git a/src/framework/tensor.h b/src/framework/tensor.h
index 388788216fe45b66441a0390e2ef09c2d51c16dc..3dba76d790d44f154f359454250d15b81ff717a3 100644
--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
@@ -260,7 +260,7 @@ class Tensor {
     inline float *scale_pointer() { return &scale; }
   };
 
-  struct &fpga_args() const {
+  struct FPGAArgs &fpga_args() {
     return fpgaArgs_;
   }
 #endif
diff --git a/src/io/executor.cpp b/src/io/executor.cpp
index d6434b64aa752fd62bc637a882298228d59880b8..c09fe2c58532437336307ce007532d43689d8fd2 100644
--- a/src/io/executor.cpp
+++ b/src/io/executor.cpp
@@ -32,6 +32,10 @@ limitations under the License. */
 #include "common/threadpool.h"
 #endif
 
+#ifdef PADDLE_MOBILE_FPGA
+#include "fpga/fpga_quantilization.h"
+#endif
+
 namespace paddle_mobile {
 using framework::Variable;
 
@@ -96,6 +100,11 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
   for (const auto &op : ops) {
     op->Init();
   }
+#ifdef PADDLE_MOBILE_FPGA
+  for (const auto &op : ops) {
+    quantilize_op(op, program_.scope);
+  }
+#endif
 }
 
 template <typename Dtype, Precision P>
diff --git a/test/fpga/test_tensor_quant.cpp b/test/fpga/test_tensor_quant.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1e30b9be551c608c5200460ebb80526270da5aed
--- /dev/null
+++ b/test/fpga/test_tensor_quant.cpp
@@ -0,0 +1,34 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <fstream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  bool optimize = false;
+  if (paddle_mobile.Load(g_googlenet, optimize)) {
+    auto time2 = time();
+    DLOG << "load cost: " << time_diff(time1, time1) << "ms";
+    std::vector<float> input;
+    std::vector<int64_t> dims{1, 3, 224, 224};
+    GetInput<float>(g_test_image_1x3x224x224, &input, dims);
+    auto time3 = time();
+    auto vec_result = paddle_mobile.Predict(input, dims);
+    auto time4 = time();
+    DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+  }
+  return 0;
+}