diff --git a/src/common/types.cpp b/src/common/types.cpp
index b6387503856f438acd74b8d147da13a2b009f2a1..41bbfa5256a1d55ac9c8ebe3ba695c4a6f1be720 100644
--- a/src/common/types.cpp
+++ b/src/common/types.cpp
@@ -50,6 +50,7 @@ const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU =
     "fusion_elementwise_add_relu";
 const char *G_OP_TYPE_FUSION_FC_RELU = "fusion_fc_relu";
 const char *G_OP_TYPE_REGION = "region";
+const char *G_OP_TYPE_FUSION_CONV_BN = "fusion_conv_bn";
 
 std::unordered_map<
     std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
@@ -85,6 +86,7 @@ std::unordered_map<
         {G_OP_TYPE_FUSION_POOL_BN, {{"X"}, {"Y"}}},
         {G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU, {{"X", "Y"}, {"Out"}}},
         {G_OP_TYPE_FUSION_FC_RELU, {{"X", "Y", "Z"}, {"Out"}}},
-        {G_OP_TYPE_REGION, {{"X"}, {"Out"}}}};
+        {G_OP_TYPE_REGION, {{"X"}, {"Out"}}},
+        {G_OP_TYPE_FUSION_CONV_BN, {{"Input"}, {"Y"}}}};
 
 }  // namespace paddle_mobile
diff --git a/src/common/types.h b/src/common/types.h
index 6066879305d5ea7d1b6dcb0bb618c234338cc171..78c96f327a5f483ebee9d56bf338a4415542fbde 100644
--- a/src/common/types.h
+++ b/src/common/types.h
@@ -113,6 +113,7 @@ extern const char *G_OP_TYPE_FUSION_POOL_BN;
 extern const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU;
 extern const char *G_OP_TYPE_FUSION_FC_RELU;
 extern const char *G_OP_TYPE_REGION;
+extern const char *G_OP_TYPE_FUSION_CONV_BN;
 
 extern std::unordered_map<
     std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
diff --git a/src/fpga/fpga_quantilization.cpp b/src/fpga/fpga_quantilization.cpp
index dee3d3abc19e020304ff9e658d40797b6681c43b..8b351f1a81e0a92f0e2f12a3f61dd2a7d3948c85 100644
--- a/src/fpga/fpga_quantilization.cpp
+++ b/src/fpga/fpga_quantilization.cpp
@@ -47,7 +47,9 @@ static Dtype find_max(Dtype* data, int num) {
 }
 
 // template <typename Dtype>
-framework::Tensor* quantify_filter(framework::Tensor* filter) {
+void quantify_filter(framework::Tensor* filter) {
+  DLOG << "quantilize_filter........";
+
   float scale = 0;
   float fix_range = static_cast<float>((1 << (8 - 1)) - 1);
 
@@ -62,25 +64,20 @@ framework::Tensor* quantify_filter(framework::Tensor* filter) {
   // 32bit filter -> 8bit filter;
   if (filter->type() == typeid(float)) {
     float* float_data = filter->data<float>();
-    float max = find_max(float_data, filter->numel());
+    float max = find_max<float>(float_data, filter->numel());
 
     scale = (max / fix_range);
 
-    framework::Tensor* filter = filter;
-    framework::Tensor* quant_filter = new framework::Tensor();
-
-    int_data = quant_filter->mutable_data<int8_t>();
     for (int i = 0; i < filter->numel(); ++i) {
       tmp_data[i] = (int8_t)float_data[i] * scale;
     }
-    filter = quant_filter;
+    int_data = filter->mutable_data<int8_t>();
   } else {
-    int8_t max = find_max(filter->data<int8_t>(), filter->numel());
+    int8_t max = find_max<int8_t>(filter->data<int8_t>(), filter->numel());
     scale = (max / fix_range);
 
-    int_data = filter->data<int8_t>();
     for (int i = 0; i < filter->numel(); ++i) {
-      tmp_data[i] = int_data[i];
+      tmp_data[i] = filter->data<int8_t>()[i];
     }
     int_data = filter->mutable_data<int8_t>();
   }
@@ -88,7 +85,6 @@ framework::Tensor* quantify_filter(framework::Tensor* filter) {
   chw_to_hwc<int8_t>(tmp_data, int_data, batch_size, channel, height, width);
   delete tmp_data;
   *(filter->fpga_args().scale_pointer()) = scale;
-  return filter;
 }
 
 }  // namespace fpga
diff --git a/src/fpga/fpga_quantilization.h b/src/fpga/fpga_quantilization.h
index 56e14f89ac0e7d21e7bbb704df838374be84fbcd..4f1f6ad402a3ff4df773ecbd2121820f4c7dc265 100644
--- a/src/fpga/fpga_quantilization.h
+++ b/src/fpga/fpga_quantilization.h
@@ -25,6 +25,7 @@ static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel,
                        int height, int width);
 
 // template <typename Dtype>
-framework::Tensor* quantify_filter(framework::Tensor* filter);
+void quantify_filter(framework::Tensor* filter);
+
 }  // namespace fpga
 }  // namespace paddle_mobile
diff --git a/src/io/executor.cpp b/src/io/executor.cpp
index d6434b64aa752fd62bc637a882298228d59880b8..73e6c9d6f170fc4eebb6af2f8b7a67c847961950 100644
--- a/src/io/executor.cpp
+++ b/src/io/executor.cpp
@@ -89,7 +89,6 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
   } else {
     InitMemory();
   }
-
   std::shared_ptr<framework::BlockDesc> to_predict_block =
       to_predict_program_->Block(0);
   auto &ops = ops_of_block_[*to_predict_block.get()];
diff --git a/src/memory/t_malloc.cpp b/src/memory/t_malloc.cpp
index 42b8c4551871c58955251d94845ca13576d7735b..8902543347b2db7caee7126b2a28fa460ca741db 100644
--- a/src/memory/t_malloc.cpp
+++ b/src/memory/t_malloc.cpp
@@ -26,7 +26,7 @@ namespace paddle_mobile {
 namespace memory {
 const int MALLOC_ALIGN = 64;
 
-#ifdef PADDLE_MOBILE_FPGA
+#ifdef PADDLE_MOBILE_FPGA__VV
 namespace fpga = paddle_mobile::fpga;
 
 void Copy(void *dst, const void *src, size_t num) {
diff --git a/src/operators/feed_op.h b/src/operators/feed_op.h
index 4766d56d9ae0b86cc28c476a17547acfd53ab02b..7a58e29cea635e62e64806a0c40956baf684d76e 100644
--- a/src/operators/feed_op.h
+++ b/src/operators/feed_op.h
@@ -41,7 +41,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
   void RunImpl() const { fpga::PerformBypass(param_.FpgaArgs()); }
   void Init() {
     const Tensor *input = param_.InputX();
-    auto input_ptr = input->data<float>();
+    auto input_ptr = input->mutable_data<float>();
     Tensor *output = param_.Out();
     auto output_ptr = output->mutable_data<half>();
     fpga::BypassArgs args;
diff --git a/src/operators/fusion_elementwise_add_relu_op.h b/src/operators/fusion_elementwise_add_relu_op.h
index b7e1f244732f9b4c463b6dd0f1ba81e7baf04bfd..b9d662cae559781789130e0483bccef06e1ac9b0 100644
--- a/src/operators/fusion_elementwise_add_relu_op.h
+++ b/src/operators/fusion_elementwise_add_relu_op.h
@@ -28,7 +28,7 @@ using std::vector;
 class FusioneElementwiseAddReluMatcher : public framework::FusionOpMatcher {
  public:
   FusioneElementwiseAddReluMatcher() {
-    node_ = framework::Node(G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU);
+    node_ = framework::Node(G_OP_TYPE_ELEMENTWISE_ADD);
     node_ > std::make_shared<framework::Node>(G_OP_TYPE_RELU);
   }
 
diff --git a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
index 095ae4a6d0c8d642aa1e8225bb69f27fb63091b0..91553a8aa3289030ee06bee2def09cb672665e83 100644
--- a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
@@ -60,10 +60,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
   param->SetNewScale(new_scale);
   param->SetNewBias(new_bias);
 
-  Tensor *quant_filter = fpga::quantify_filter(filter);
-
-  // delete original filter?
-  filter = quant_filter;
+  fpga::quantify_filter(filter);
 
   auto filter_ptr = filter->data<float>();
   fpga::ConvArgs convArgs;
diff --git a/src/operators/kernel/fpga/conv_kernel.cpp b/src/operators/kernel/fpga/conv_kernel.cpp
deleted file mode 100644
index 91d0f393fcc1018bacd507c5f7975f7b3a2a56ca..0000000000000000000000000000000000000000
--- a/src/operators/kernel/fpga/conv_kernel.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#ifdef CONV_OP
-
-#include "operators/kernel/conv_kernel.h"
-#include "operators/kernel/central-arm-func/conv_arm_func.h"
-
-namespace paddle_mobile {
-namespace operators {
-
-template <>
-bool ConvKernel<FPGA, float>::Init(ConvParam *param) {
-  return true;
-}
-
-template <>
-void ConvKernel<FPGA, float>::Compute(const ConvParam &param) const {
-  // ConvCompute<float>(param);
-}
-
-template class ConvKernel<FPGA, float>;
-
-}  // namespace operators
-}  // namespace paddle_mobile
-
-#endif
diff --git a/src/operators/op_param.h b/src/operators/op_param.h
index 4d1d5af29b81b044ca6d89b4a48a078f73dcabc9..a139714b2c71ce6ef2c79343af0e918e577114fb 100644
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -210,7 +210,7 @@ class ConvParam : OpParam {
 
   const Tensor *Input() const { return input_; }
 
-  const Tensor *Filter() const { return filter_; }
+  Tensor *Filter() const { return filter_; }
 
   Tensor *Output() const { return output_; }
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 468cbd4ed6d579f7b39f8628a3e052e90ae26644..1033cfa180ac6928b2edf6b0cef2885dd0e72a8c 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -27,6 +27,11 @@ elseif("resnet" IN_LIST NET)
     ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h  test_include.h executor_for_test.h)
     target_link_libraries(test-resnet paddle-mobile)
 elseif("FPGAnets" IN_LIST NET)
+    # ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h  test_include.h executor_for_test.h)
+    # target_link_libraries(test-resnet paddle-mobile)
+    ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-tensor-quant paddle-mobile)
+    
 else ()
 
     # gen test
@@ -173,8 +178,7 @@ else ()
 
 endif()
 
-if(FPGA)
-    ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-tensor-quant paddle-mobile)
-
-endif()
+# if(FPGA)
+#     ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h  test_include.h executor_for_test.h)
+#     target_link_libraries(test-tensor-quant paddle-mobile)
+# endif()
diff --git a/test/fpga/test_tensor_quant.cpp b/test/fpga/test_tensor_quant.cpp
index 3835c395a4764c3c978b6bba9c1af48305be1d58..6cfc27e91ced109e41bf5420649dbb762ee94d66 100644
--- a/test/fpga/test_tensor_quant.cpp
+++ b/test/fpga/test_tensor_quant.cpp
@@ -12,23 +12,34 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <fstream>
+#include <iostream>
 #include "../test_helper.h"
 #include "../test_include.h"
 
 int main() {
-  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
-  bool optimize = false;
-  if (paddle_mobile.Load(g_googlenet, optimize)) {
-    auto time1 = time();
-    DLOG << "load cost: " << time_diff(time1, time1) << "ms";
-    std::vector<float> input;
-    std::vector<int64_t> dims{1, 3, 224, 224};
-    GetInput<float>(g_test_image_1x3x224x224, &input, dims);
+  paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
+  auto time1 = time();
+  if (paddle_mobile.Load(g_resnet, true)) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+    std::vector<int64_t> dims{1, 3, 32, 32};
+    Tensor input_tensor;
+    SetupTensor<float>(&input_tensor, {1, 3, 32, 32}, static_cast<float>(0),
+                       static_cast<float>(1));
+
+    std::vector<float> input(input_tensor.data<float>(),
+                             input_tensor.data<float>() + input_tensor.numel());
+    // 预热一次
+    paddle_mobile.Predict(input, dims);
     auto time3 = time();
-    auto vec_result = paddle_mobile.Predict(input, dims);
+    for (int i = 0; i < 10; ++i) {
+      paddle_mobile.Predict(input, dims);
+    }
     auto time4 = time();
-    DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+    std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
+              << std::endl;
   }
+
   return 0;
 }
diff --git a/tools/op.cmake b/tools/op.cmake
index 0eab67267032d3956a52b80ab7494c6572df7074..af246f1d48e7c687812c454af163f12d5f804571 100644
--- a/tools/op.cmake
+++ b/tools/op.cmake
@@ -82,6 +82,7 @@ if ("FPGAnets" IN_LIST NET)
   set(CONCAT_OP ON)
   set(SOFTMAX_OP ON)
   set(DROPOUT_OP ON)
+  # set(CONV_OP ON)
 
   set(FOUND_MATCH ON)   
 endif()