diff --git a/src/fpga/fpga_quantilization.cpp b/src/fpga/fpga_quantilization.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..34033a60a683183695a79bfafbaf14223e2eebf2
--- /dev/null
+++ b/src/fpga/fpga_quantilization.cpp
@@ -0,0 +1,95 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "fpga/fpga_quantilization.h"
+#include <algorithm>
+
+namespace paddle_mobile {
+namespace fpga {
+
+template <typename Dtype>
+static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel,
+                       int height, int width) {
+  int offset_height = 0;
+
+  for (int n = 0; n < num; n++) {
+    int amount_per_row = width * channel;
+    for (int c = 0; c < channel; c++) {
+      for (int h = 0; h < height; h++) {
+        int offset_height = h * amount_per_row;
+        for (int w = 0; w < width; w++) {
+          *(data_out + offset_height + w * channel + c) = *(data_in++);
+        }
+      }
+    }
+    data_out += num;
+  }
+}
+
+template <typename Dtype>
+static Dtype find_max(Dtype* data, int num) {
+  Dtype max = 0;
+  for (int i = 0; i < num; ++i) {
+    max = std::max(max, data[i]);
+  }
+  return max;
+}
+
+template <typename Dtype>
+framework::Tensor* quantilize_filter(framework::Tensor* filter) {
+  float scale = 0;
+  float fix_range = static_cast<float>((1 << (8 - 1)) - 1);
+
+  const int batch_size = filter->dims()[0];
+  const int channel = filter->dims()[1];
+  const int height = filter->dims()[2];
+  const int width = filter->dims()[3];
+
+  int8_t* int_data = nullptr;
+  int8_t* tmp_data = new int[filter->numel()];
+
+  // 32bit filter -> 8bit filter;
+  if (filter->type() == typeid(float)) {
+    float* float_data = filter->data<float>();
+    float max = find_max(float_data, filter->numel());
+
+    scale = (max / fix_range);
+
+    framework::Tensor* filter = filter;
+    framework::Tensor* quant_filter = new framework::Tensor();
+
+    int_data = quant_filter->mutable_data<int8_t>();
+    for (int i = 0; i < filter->numel(); ++i) {
+      tmp_data[i] = (int8_t)float_data[i] * scale;
+    }
+    filter = quant_filter;
+  } else {
+    int8_t max = find_max(filter->data<int8_t>(), filter->numel());
+    scale = (max / fix_range);
+
+    int_data = filter->data<int8_t>();
+    for (int i = 0; i < filter->numel(); ++i) {
+      tmp_data[i] = int_data[i];
+    }
+    int_data = filter->mutable_data<int8_t>();
+  }
+  // NCHW -> NHWC;
+  chw_to_hwc<int8_t>(tmp_data, int_data, batch_size, channel, height, width);
+  delete tmp_data;
+  *(filter->fpga_args().scale_pointer()) = scale;
+  return filter;
+}
+
+}  // namespace fpga
+}  // namespace paddle_mobile
diff --git a/src/fpga/fpga_quantilization.h b/src/fpga/fpga_quantilization.h
index 7a1df04732580c7225423cedeb277beca3edc154..8dacd20abdc85da05a451ec763fd01f03f8f4516 100644
--- a/src/fpga/fpga_quantilization.h
+++ b/src/fpga/fpga_quantilization.h
@@ -18,35 +18,13 @@ limitations under the License. */
 #include "framework/tensor.h"
 
 namespace paddle_mobile {
+namespace fpga {
 
 template <typename Dtype>
-framework::Tensor* quantilize_filter(framework::Tensor* filter) {
-  float scale = 0;
-  // 32bit filter -> 8bit filter;
-  float min = 0f;
-  float max = 0f;
-  if (filter->type() == typeid(float)) {
-    float* floatData = originalFilter->data<float>();
-    for (int i = 0; i < filter->numel(); ++i) {
-      min = std::min(min, floatData[i]);
-      max = std::max(max, floatData[i]);
-    }
-
-    float fix_range = (float)((1 << (8 - 1)) - 1);
-    float float_range = max;
-    scale = (float_range / fix_range);
-
-    framework::Tensor* originalFilter = filter;
-    framework::Tensor* quantFilter = new framework::Tensor();
-    int8_t* intData = quantFilter->mutable_data<int8_t>();
-    for (int i = 0; i < filter->numel(); ++i) {
-      intData[i] = (int8_t)floatData[i] * scale;
-    }
-    quantFilter.scale = scale;
-    // NCHW -> NHWC;
-    return quantFilter;
-  }
-  return filter;
-}
+static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel,
+                       int height, int width);
 
+template <typename Dtype>
+framework::Tensor* quantilize_filter(framework::Tensor* filter);
+}  // namespace fpga
 }  // namespace paddle_mobile
diff --git a/src/operators/kernel/fpga/concat_kernel.cpp b/src/operators/kernel/fpga/concat_kernel.cpp
index ae1270b146373587287140116114970963dcca7c..c6e04787a58bc437bf0738cf67072426f1cbaa57 100644
--- a/src/operators/kernel/fpga/concat_kernel.cpp
+++ b/src/operators/kernel/fpga/concat_kernel.cpp
@@ -36,18 +36,18 @@ void ConcatKernel<FPGA, float>::Compute(const ConcatParam &param) const {
   auto out_channel = out_dim[3];
 
   auto out_offset = 0;
-
   for (int i = 0; i < inputs.size(); ++i) {
     auto input = inputs[i];
     auto channels = input->dims()[3];
     out_offset += channels;
     auto src = input->data<half>();
     for (int j = 0; j < pixels; ++j) {
-      auto dst = out->data<half>() + out_offset;
+      auto dst = out->mutable_data<half>() + out_offset;
       memory::Copy(dst, src, sizeof(half));
     }
   }
 }
+template class ConcatKernel<FPGA, float>;
 
 }  // namespace operators
 }  // namespace paddle_mobile
diff --git a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
index 6719db3a80cb3c3a2ee603096b2659fa5489497d..3240a8d6b9604d0876691b641c072bc596312dbd 100644
--- a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
@@ -16,6 +16,7 @@ limitations under the License. */
 
 #include "operators/kernel/conv_add_bn_kernel.h"
 #include "fpga/api/fpga_api.h"
+#include "fpga/quantilization.h"
 
 namespace paddle_mobile {
 namespace operators {
@@ -28,7 +29,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
   const Tensor *bias = param->Bias();
   auto bias_ptr = bias->data<float>();
   const Tensor *filter = param->Filter();
-  auto filter_ptr = filter->data<float>();
+
   Tensor *out = param->Output();
   auto out_ptr = out->mutable_data<half>();
   auto bn_mean_ptr = param->InputMean()->data<float>();
@@ -41,7 +42,8 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
                         "Image channel should be equal to bias number");
 
   const int channel = input->dims()[1];
-  float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
+  float *bs_ptr =
+      reinterpret_cast<float *>(fpga::fpga_malloc(2 * channel * sizeof(float)));
   Tensor *new_scale = new Tensor();
   Tensor *new_bias = new Tensor();
   auto new_scale_ptr = new_scale->mutable_data<float>({channel});
@@ -58,26 +60,33 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
   param->SetNewScale(new_scale);
   param->SetNewBias(new_bias);
 
+  const Tensor *quant_filter = quantilize_filter(filter);
+
+  // delete original filter?
+  filter = quant_filter;
+
+  auto filter_ptr = filter->data<float>();
   fpga::ConvArgs convArgs;
   convArgs.relu_enabled = relu_enabled;
-  convArgs.filter_address = (void *)filter_ptr;
+  convArgs.filter_address = reinterpret_cast<void *> filter_ptr;
   convArgs.filter_num = filter->dims()[0];
   convArgs.group_num = param->Groups();
-  convArgs.sb_address = (void *)bs_ptr;
+  convArgs.sb_address = reinterpret_cast<void *> bs_ptr;
   convArgs.kernel.stride_h = param->Strides()[0];
   convArgs.kernel.stride_w = param->Strides()[1];
   convArgs.kernel.height = filter->dims()[2];
   convArgs.kernel.width = filter->dims()[3];
-  convArgs.image.address = (void *)input_ptr;
+  convArgs.image.address = reinterpret_cast<void *> input_ptr;
   convArgs.image.channels = input->dims()[1];
   convArgs.image.height = input->dims()[2];
   convArgs.image.width = input->dims()[3];
   convArgs.image.pad_height = param->Paddings()[0];
   convArgs.image.pad_width = param->Paddings()[1];
   convArgs.image.scale_address = input->fpga_args().scale_pointer();
-  convArgs.output.address = (void *)out_ptr;
+  convArgs.output.address = reinterpret_cast<void *> out_ptr;
   convArgs.output.scale_address = out->fpga_args().scale_pointer();
   param->SetFpgaArgs(convArgs);
+
   return true;
 }
 
diff --git a/src/operators/kernel/fpga/conv_kernel.cpp b/src/operators/kernel/fpga/conv_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..91d0f393fcc1018bacd507c5f7975f7b3a2a56ca
--- /dev/null
+++ b/src/operators/kernel/fpga/conv_kernel.cpp
@@ -0,0 +1,38 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef CONV_OP
+
+#include "operators/kernel/conv_kernel.h"
+#include "operators/kernel/central-arm-func/conv_arm_func.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConvKernel<FPGA, float>::Init(ConvParam *param) {
+  return true;
+}
+
+template <>
+void ConvKernel<FPGA, float>::Compute(const ConvParam &param) const {
+  // ConvCompute<float>(param);
+}
+
+template class ConvKernel<FPGA, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index ee1d12bfd6be13d67fd8360be2ab5c8d7f86e662..f4a14f1bc4197051594a0f8609b4662ad4c7cefb 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -160,4 +160,12 @@ else ()
 
     #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp)
 
+
+
+endif()
+
+if(FPGA)
+    ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-tensor-quant paddle-mobile)
+
 endif()
diff --git a/test/fpga/test_tensor_quant.cpp b/test/fpga/test_tensor_quant.cpp
index 1e30b9be551c608c5200460ebb80526270da5aed..3835c395a4764c3c978b6bba9c1af48305be1d58 100644
--- a/test/fpga/test_tensor_quant.cpp
+++ b/test/fpga/test_tensor_quant.cpp
@@ -20,7 +20,7 @@ int main() {
   paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
   bool optimize = false;
   if (paddle_mobile.Load(g_googlenet, optimize)) {
-    auto time2 = time();
+    auto time1 = time();
     DLOG << "load cost: " << time_diff(time1, time1) << "ms";
     std::vector<float> input;
     std::vector<int64_t> dims{1, 3, 224, 224};