diff --git a/src/fpga/fpga_quantilization.cpp b/src/fpga/fpga_quantilization.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5bbf4f254d465f4c45124e7512b64662f155478d
--- /dev/null
+++ b/src/fpga/fpga_quantilization.cpp
@@ -0,0 +1,83 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "fpga/fpga_quantilization.h"
+#include <algorithm>
+
+namespace paddle_mobile {
+namespace fpga {
+
+template <typename Dtype>
+static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel,
+                       int height, int width) {
+  int offset_height = 0;
+
+  for (int n = 0; n < num; n++) {
+    int amount_per_row = width * channel;
+    for (int c = 0; c < channel; c++) {
+      for (int h = 0; h < height; h++) {
+        int offset_height = h * amount_per_row;
+        for (int w = 0; w < width; w++) {
+          *(data_out + offset_height + w * channel + c) = *(data_in++);
+        }
+      }
+    }
+    data_out += num;
+  }
+}
+
+template <typename Dtype>
+framework::Tensor* quantilize_filter(framework::Tensor* filter) {
+  float scale = 0;
+  float max = 0f;
+
+  const int batch_size = filter->dims()[0];
+  const int channel = filter->dims()[1];
+  const int height = filter->dims()[2];
+  const int width = filter->dims()[3];
+
+  // 32bit filter -> 8bit filter;
+  if (filter->type() == typeid(float)) {
+    float* float_data = filter->data<float>();
+    for (int i = 0; i < filter->numel(); ++i) {
+      max = std::max(max, float_data[i]);
+    }
+
+    float fix_range = static_cast<float>((1 << (8 - 1)) - 1);
+    float float_range = max;
+    scale = (float_range / fix_range);
+
+    framework::Tensor* filter = filter;
+    framework::Tensor* quant_filter = new framework::Tensor();
+    int8_t* temp = new int8_t[filter->numel()];
+    int8_t* int_data = quant_filter->mutable_data<int8_t>();
+    for (int i = 0; i < filter->numel(); ++i) {
+      temp[i] = (int8_t)float_data[i] * scale;
+    }
+    quant_filter.scale = scale;
+    // NCHW -> NHWC;
+    chw_to_hwc<int8_t>(temp, int_data, in_batch_size, channel, height, width);
+    return quantFilter;
+  } else if (filter->type() == typeid(int8_t)) {
+    // model is already quantilized
+    int8_t* int_data = filter->data<int8_t>();
+    for (int i = 0; i < filter->numel(); ++i) {
+      max = std::max(max, int_data[i]);
+    }
+  }
+  return filter;
+}
+
+}  // namespace fpga
+}  // namespace paddle_mobile
diff --git a/src/fpga/fpga_quantilization.h b/src/fpga/fpga_quantilization.h
index 7a1df04732580c7225423cedeb277beca3edc154..8dacd20abdc85da05a451ec763fd01f03f8f4516 100644
--- a/src/fpga/fpga_quantilization.h
+++ b/src/fpga/fpga_quantilization.h
@@ -18,35 +18,13 @@ limitations under the License. */
 #include "framework/tensor.h"
 
 namespace paddle_mobile {
+namespace fpga {
 
 template <typename Dtype>
-framework::Tensor* quantilize_filter(framework::Tensor* filter) {
-  float scale = 0;
-  // 32bit filter -> 8bit filter;
-  float min = 0f;
-  float max = 0f;
-  if (filter->type() == typeid(float)) {
-    float* floatData = originalFilter->data<float>();
-    for (int i = 0; i < filter->numel(); ++i) {
-      min = std::min(min, floatData[i]);
-      max = std::max(max, floatData[i]);
-    }
-
-    float fix_range = (float)((1 << (8 - 1)) - 1);
-    float float_range = max;
-    scale = (float_range / fix_range);
-
-    framework::Tensor* originalFilter = filter;
-    framework::Tensor* quantFilter = new framework::Tensor();
-    int8_t* intData = quantFilter->mutable_data<int8_t>();
-    for (int i = 0; i < filter->numel(); ++i) {
-      intData[i] = (int8_t)floatData[i] * scale;
-    }
-    quantFilter.scale = scale;
-    // NCHW -> NHWC;
-    return quantFilter;
-  }
-  return filter;
-}
+static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel,
+                       int height, int width);
 
+template <typename Dtype>
+framework::Tensor* quantilize_filter(framework::Tensor* filter);
+}  // namespace fpga
 }  // namespace paddle_mobile
diff --git a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
index 6719db3a80cb3c3a2ee603096b2659fa5489497d..3240a8d6b9604d0876691b641c072bc596312dbd 100644
--- a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
@@ -16,6 +16,7 @@ limitations under the License. */
 
 #include "operators/kernel/conv_add_bn_kernel.h"
 #include "fpga/api/fpga_api.h"
+#include "fpga/quantilization.h"
 
 namespace paddle_mobile {
 namespace operators {
@@ -28,7 +29,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
   const Tensor *bias = param->Bias();
   auto bias_ptr = bias->data<float>();
   const Tensor *filter = param->Filter();
-  auto filter_ptr = filter->data<float>();
+
   Tensor *out = param->Output();
   auto out_ptr = out->mutable_data<half>();
   auto bn_mean_ptr = param->InputMean()->data<float>();
@@ -41,7 +42,8 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
                         "Image channel should be equal to bias number");
 
   const int channel = input->dims()[1];
-  float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
+  float *bs_ptr =
+      reinterpret_cast<float *>(fpga::fpga_malloc(2 * channel * sizeof(float)));
   Tensor *new_scale = new Tensor();
   Tensor *new_bias = new Tensor();
   auto new_scale_ptr = new_scale->mutable_data<float>({channel});
@@ -58,26 +60,33 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
   param->SetNewScale(new_scale);
   param->SetNewBias(new_bias);
 
+  const Tensor *quant_filter = quantilize_filter(filter);
+
+  // delete original filter?
+  filter = quant_filter;
+
+  auto filter_ptr = filter->data<float>();
   fpga::ConvArgs convArgs;
   convArgs.relu_enabled = relu_enabled;
-  convArgs.filter_address = (void *)filter_ptr;
+  convArgs.filter_address = reinterpret_cast<void *> filter_ptr;
   convArgs.filter_num = filter->dims()[0];
   convArgs.group_num = param->Groups();
-  convArgs.sb_address = (void *)bs_ptr;
+  convArgs.sb_address = reinterpret_cast<void *> bs_ptr;
   convArgs.kernel.stride_h = param->Strides()[0];
   convArgs.kernel.stride_w = param->Strides()[1];
   convArgs.kernel.height = filter->dims()[2];
   convArgs.kernel.width = filter->dims()[3];
-  convArgs.image.address = (void *)input_ptr;
+  convArgs.image.address = reinterpret_cast<void *> input_ptr;
   convArgs.image.channels = input->dims()[1];
   convArgs.image.height = input->dims()[2];
   convArgs.image.width = input->dims()[3];
   convArgs.image.pad_height = param->Paddings()[0];
   convArgs.image.pad_width = param->Paddings()[1];
   convArgs.image.scale_address = input->fpga_args().scale_pointer();
-  convArgs.output.address = (void *)out_ptr;
+  convArgs.output.address = reinterpret_cast<void *> out_ptr;
   convArgs.output.scale_address = out->fpga_args().scale_pointer();
   param->SetFpgaArgs(convArgs);
+
   return true;
 }