diff --git a/src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp
index ded66540814e23d0fe5fdafed0458b7ff072f5c7..d16ec56d701ae5a861bdca7f1a334da80073c78f 100644
--- a/src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp
@@ -32,6 +32,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
   auto bias_ptr = bias->data<float>();
   auto filter = const_cast<LoDTensor *>(param->Filter());
   auto out = param->Output();
+  const int groups = param->Groups();
   float Si = input->scale[0];
   float So = out->scale[0];
   float Sf = fpga::filter_find_max(filter);
@@ -63,9 +64,12 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
     //    bs_ptr[i] = new_bias_ptr[i];
     bs_ptr[i + channel] = new_scale_ptr[i] * Si / So * Sf / 127.0;
     bs_ptr[i] = new_bias_ptr[i] * 127.0 / So;
+    if (groups == channel) {
+      new_scale_ptr[i] = new_scale_ptr[i] * Si / So;
+      new_bias_ptr[i] = new_bias_ptr[i] * 127.0f / So;
+    }
   }
 
-  const int groups = param->Groups();
   if (groups == channel) {
     fpga::format_dwconv_data(filter, out, new_scale_ptr, &new_bias_ptr);
     fpga::DWconvArgs dwconv_arg = {0};
diff --git a/src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp b/src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp
index 1597885e43e01895b6acd425031341af70d5eaf7..76889b0dd914c7e61a91c1de9c35faf9baf42648 100644
--- a/src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp
+++ b/src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp
@@ -32,6 +32,9 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
   // auto bias_ptr = bias->data<float>();
   auto filter = const_cast<LoDTensor *>(param->Filter());
   auto out = param->Output();
+  float Si = input->scale[0];
+  float So = out->scale[0];
+  float Sf = fpga::filter_find_max(filter);
 
   // PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
   //                      "Output channel should be equal to bias number");
@@ -53,6 +56,10 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
   PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
                         "filter axis should be the multiple of stride axis ");
   if (param->Groups() == channel) {
+    for (int i = 0; i < channel * sub_conv_n; i++) {
+      bs_ptr[i + sub_conv_n * channel] = Si / So;
+      bs_ptr[i] = 0;  // bias_ptr[i % (channel)];
+    }
     fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
                                sub_conv_n);
     fpga::DWDeconvArgs DWDeconv_arg = {0};
@@ -62,6 +69,10 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
                             param->Paddings()[0], param->Paddings()[1], bs_ptr);
     param->SetFpgaArgs(DWDeconv_arg);
   } else {
+    for (int i = 0; i < channel * sub_conv_n; i++) {
+      bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f;
+      bs_ptr[i] = 0;  // bias_ptr[i % (channel)];
+    }
     fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
     fpga::DeconvArgs deconv_arg = {0};
     fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
diff --git a/src/operators/kernel/fpga/V2/deconv_add_bn_kernel.cpp b/src/operators/kernel/fpga/V2/deconv_add_bn_kernel.cpp
index a8205df3c9c1052055ba15ca58fd215f1d49ba0e..5e3417f8c69ea901d9d671981d543c6ce3e50340 100644
--- a/src/operators/kernel/fpga/V2/deconv_add_bn_kernel.cpp
+++ b/src/operators/kernel/fpga/V2/deconv_add_bn_kernel.cpp
@@ -32,7 +32,9 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
   auto bias_ptr = bias->data<float>();
   auto filter = const_cast<LoDTensor *>(param->Filter());
   auto out = param->Output();
-
+  float Si = input->scale[0];
+  float So = out->scale[0];
+  float Sf = fpga::filter_find_max(filter);
   PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
                         "Output channel should be equal to bias number");
   int channel = out->dims()[1];
@@ -53,6 +55,10 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
   PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
                         "filter axis should be the multiple of stride axis ");
   if (param->Groups() == channel) {
+    for (int i = 0; i < channel * sub_conv_n; i++) {
+      bs_ptr[i + sub_conv_n * channel] = Si / So;
+      bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
+    }
     fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
                                sub_conv_n);
     fpga::DWDeconvArgs DWDeconv_arg = {0};
@@ -62,6 +68,10 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
                             param->Paddings()[0], param->Paddings()[1], bs_ptr);
     param->SetFpgaArgs(DWDeconv_arg);
   } else {
+    for (int i = 0; i < channel * sub_conv_n; i++) {
+      bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f;
+      bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
+    }
     fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
     fpga::DeconvArgs deconv_arg = {0};
     fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
diff --git a/src/operators/kernel/fpga/V2/deconv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V2/deconv_add_bn_relu_kernel.cpp
index b27f5cf870d2e3220bec31ee63bb27361cb2c8cf..2913a628dddb9e89ce5dd327cb500f7c3e2d4390 100644
--- a/src/operators/kernel/fpga/V2/deconv_add_bn_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V2/deconv_add_bn_relu_kernel.cpp
@@ -33,7 +33,9 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
   auto bias_ptr = bias->data<float>();
   auto filter = const_cast<LoDTensor *>(param->Filter());
   auto out = param->Output();
-
+  float Si = input->scale[0];
+  float So = out->scale[0];
+  float Sf = fpga::filter_find_max(filter);
   PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
                         "Output channel should be equal to bias number");
   int channel = out->dims()[1];
@@ -54,6 +56,10 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
   PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
                         "filter axis should be the multiple of stride axis ");
   if (param->Groups() == channel) {
+    for (int i = 0; i < channel * sub_conv_n; i++) {
+      bs_ptr[i + sub_conv_n * channel] = Si / So;
+      bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
+    }
     fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
                                sub_conv_n);
     fpga::DWDeconvArgs DWDeconv_arg = {0};
@@ -63,6 +69,10 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
                             param->Paddings()[0], param->Paddings()[1], bs_ptr);
     param->SetFpgaArgs(DWDeconv_arg);
   } else {
+    for (int i = 0; i < channel * sub_conv_n; i++) {
+      bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f;
+      bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
+    }
     fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
     fpga::DeconvArgs deconv_arg = {0};
     fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
diff --git a/src/operators/kernel/fpga/V2/deconv_add_kernel.cpp b/src/operators/kernel/fpga/V2/deconv_add_kernel.cpp
index 41844d008b2c8313fc8f1ac75a00d9864b5a20a5..dcafcbea9c5f995af1a129ca0fd16d96aea8f79c 100644
--- a/src/operators/kernel/fpga/V2/deconv_add_kernel.cpp
+++ b/src/operators/kernel/fpga/V2/deconv_add_kernel.cpp
@@ -32,7 +32,9 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
   auto bias_ptr = bias->data<float>();
   auto filter = const_cast<LoDTensor *>(param->Filter());
   auto out = param->Output();
-
+  float Si = input->scale[0];
+  float So = out->scale[0];
+  float Sf = fpga::filter_find_max(filter);
   PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
                         "Output channel should be equal to bias number");
   int channel = out->dims()[1];
@@ -53,6 +55,10 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
   PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
                         "filter axis should be the multiple of stride axis ");
   if (param->Groups() == channel) {
+    for (int i = 0; i < channel * sub_conv_n; i++) {
+      bs_ptr[i + sub_conv_n * channel] = Si / So;
+      bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
+    }
     fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
                                sub_conv_n);
     fpga::DWDeconvArgs DWDeconv_arg = {0};
@@ -62,6 +68,10 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
                             param->Paddings()[0], param->Paddings()[1], bs_ptr);
     param->SetFpgaArgs(DWDeconv_arg);
   } else {
+    for (int i = 0; i < channel * sub_conv_n; i++) {
+      bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f;
+      bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
+    }
     fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
     fpga::DeconvArgs deconv_arg = {0};
     fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
diff --git a/src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp b/src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp
index c6fc9d195511ae3218450fa58393ba420444eb92..1364b4b5aa8b00728ab2dc63d2507449f7e96200 100644
--- a/src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp
@@ -33,7 +33,9 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
   auto bias_ptr = bias->data<float>();
   auto filter = const_cast<LoDTensor *>(param->Filter());
   auto out = param->Output();
-
+  float Si = input->scale[0];
+  float So = out->scale[0];
+  float Sf = fpga::filter_find_max(filter);
   PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
                         "Output channel should be equal to bias number");
   int channel = out->dims()[1];
@@ -54,6 +56,10 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
   PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
                         "filter axis should be the multiple of stride axis ");
   if (param->Groups() == channel) {
+    for (int i = 0; i < channel * sub_conv_n; i++) {
+      bs_ptr[i + sub_conv_n * channel] = Si / So;
+      bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
+    }
     fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
                                sub_conv_n);
     fpga::DWDeconvArgs DWDeconv_arg = {0};
@@ -63,6 +69,10 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
                             param->Paddings()[0], param->Paddings()[1], bs_ptr);
     param->SetFpgaArgs(DWDeconv_arg);
   } else {
+    for (int i = 0; i < channel * sub_conv_n; i++) {
+      bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f;
+      bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
+    }
     fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
     fpga::DeconvArgs deconv_arg = {0};
     fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
diff --git a/src/operators/kernel/fpga/V2/deconv_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V2/deconv_bn_relu_kernel.cpp
index 75597f0ecd570b6b21894a2f9a0ff0ad91a54ea4..6aae1ea7298e7d927f2db0dd89399ca4817a38b7 100644
--- a/src/operators/kernel/fpga/V2/deconv_bn_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V2/deconv_bn_relu_kernel.cpp
@@ -34,6 +34,9 @@ bool DeconvBNReluKernel<FPGA, float>::Init(
   auto bias_ptr = bias->data<float>();
   auto filter = const_cast<LoDTensor *>(param->Filter());
   auto out = param->Output();
+  float Si = input->scale[0];
+  float So = out->scale[0];
+  float Sf = fpga::filter_find_max(filter);
   auto bn_mean_ptr = param->InputMean()->data<float>();
   auto bn_var_ptr = param->InputVariance()->data<float>();
   auto bn_scale_ptr = param->InputScale()->data<float>();
@@ -56,12 +59,22 @@ bool DeconvBNReluKernel<FPGA, float>::Init(
   int sub_conv_n = param->Strides()[0];
   auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sub_conv_n *  // NOLINT
                                            sizeof(float));             // NOLINT
-
-  for (int i = 0; i < channel * sub_conv_n; i++) {
-    bs_ptr[i + sub_conv_n * channel] = new_scale_ptr[i % channel];
-    bs_ptr[i] = new_bias_ptr[i % (channel)];
+  //  for (int i = 0; i < channel * sub_conv_n; i++) {
+  //    bs_ptr[i + sub_conv_n * channel] = new_scale_ptr[i % channel];
+  //    bs_ptr[i] = new_bias_ptr[i % (channel)];
+  //  }
+  if (param->Groups() == channel) {
+    for (int i = 0; i < channel * sub_conv_n; i++) {
+      bs_ptr[i + sub_conv_n * channel] = new_scale_ptr[i % channel] * Si / So;
+      bs_ptr[i] = new_bias_ptr[i % (channel)] * 127.0f / So;
+    }
+  } else {
+    for (int i = 0; i < channel * sub_conv_n; i++) {
+      bs_ptr[i + sub_conv_n * channel] =
+          new_scale_ptr[i % channel] * Si / So * Sf / 127.0f;
+      bs_ptr[i] = new_bias_ptr[i % (channel)] * 127.0f / So;
+    }
   }
-
   PADDLE_MOBILE_ENFORCE(param->Strides()[1] == param->Strides()[0],
                         "stride_width should be equal to stride_height ");
   PADDLE_MOBILE_ENFORCE(filter->dims()[2] == filter->dims()[3],
diff --git a/src/operators/kernel/fpga/V2/elementwise_add_kernel.cpp b/src/operators/kernel/fpga/V2/elementwise_add_kernel.cpp
index eec058edc40f23dbbd98e12b084d6c232eaf08e4..145d7851f0bba14ac3fcba3d858e91a826683871 100644
--- a/src/operators/kernel/fpga/V2/elementwise_add_kernel.cpp
+++ b/src/operators/kernel/fpga/V2/elementwise_add_kernel.cpp
@@ -34,7 +34,11 @@ bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
     auto input_y_ptr = input_y->data<half>();
     fpga::format_fp16_ofm(out);
     auto out_ptr = out->mutable_data<half>();
-
+    float Si_1 = input_x->scale[0];
+    float Si_2 = input_y->scale[0];
+    float So = out->scale[0];
+    float C1 = Si_1 / So;
+    float C2 = Si_2 / So;
     fpga::EWAddArgs ewaddArgs = {0};
     // ewaddArgs.relu_enabled = relu_enabled;
     ewaddArgs.output.activation.activation_type = activation_enable;
diff --git a/src/operators/kernel/fpga/V2/elementwise_add_relu_kernel.cpp b/src/operators/kernel/fpga/V2/elementwise_add_relu_kernel.cpp
index f36206a8a15451144a00a16aad176ca67c4a4114..44266049a27c6110346a328e81713486942f4110 100644
--- a/src/operators/kernel/fpga/V2/elementwise_add_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V2/elementwise_add_relu_kernel.cpp
@@ -32,7 +32,11 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
   auto input_y_ptr = input_y->data<half>();
   fpga::format_fp16_ofm(out);
   auto out_ptr = out->mutable_data<half>();
-
+  float Si_1 = input_x->scale[0];
+  float Si_2 = input_y->scale[0];
+  float So = out->scale[0];
+  float C1 = Si_1 / So;
+  float C2 = Si_2 / So;
   fpga::EWAddArgs ewaddArgs = {0};
   // ewaddArgs.relu_enabled = relu_enabled;
   ewaddArgs.output.activation.activation_type = activation_enable;
diff --git a/src/operators/kernel/fpga/V2/fusion_fc_kernel.cpp b/src/operators/kernel/fpga/V2/fusion_fc_kernel.cpp
index 3a29104d0fe0e3c69c9369fb1137b2c94ef04e43..1f85beb532a805e69418bf4edffe66c178764769 100644
--- a/src/operators/kernel/fpga/V2/fusion_fc_kernel.cpp
+++ b/src/operators/kernel/fpga/V2/fusion_fc_kernel.cpp
@@ -29,6 +29,9 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
   const Tensor *input_z = param->InputZ();
   auto input_z_ptr = input_z->data<float>();
   auto out = param->Out();
+  float Si = input_x->scale[0];
+  float Sf = filter->scale[0];
+  float So = out->scale[0];
 
   // PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0],
   //                     "Image channel should be equal to weight number");
@@ -36,8 +39,10 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
   auto bs_ptr =
       (float *)fpga::fpga_malloc(2 * channel * sizeof(float));  // NOLINT
   for (int i = 0; i < channel; i++) {
-    bs_ptr[i + channel] = 1;
-    bs_ptr[i] = input_z_ptr[i];
+    //    bs_ptr[i + channel] = 1;
+    //    bs_ptr[i] = input_z_ptr[i];
+    bs_ptr[i + channel] = Si / So * Sf / 127.0f;
+    bs_ptr[i] = input_z_ptr[i] * 127.0f / So;
   }
   int num = (uint32_t)filter->dims()[1];
   int chw = (uint32_t)filter->dims()[0];
diff --git a/src/operators/kernel/fpga/V2/fusion_fc_relu_kernel.cpp b/src/operators/kernel/fpga/V2/fusion_fc_relu_kernel.cpp
index fef370515e9e9ffa1d90c184e62919235533b8a5..0ccec45195ec2efa90ae097d57547373a3253843 100644
--- a/src/operators/kernel/fpga/V2/fusion_fc_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V2/fusion_fc_relu_kernel.cpp
@@ -29,6 +29,9 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
   const Tensor *input_z = param->InputZ();
   auto input_z_ptr = input_z->data<float>();
   auto out = param->Out();
+  float Si = input_x->scale[0];
+  float Sf = filter->scale[0];
+  float So = out->scale[0];
 
   // PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0],
   //                      "Image channel should be equal to weight number");
@@ -36,8 +39,10 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
   auto bs_ptr =
       (float *)fpga::fpga_malloc(2 * channel * sizeof(float));  // NOLINT
   for (int i = 0; i < channel; i++) {
-    bs_ptr[i + channel] = 1;
-    bs_ptr[i] = input_z_ptr[i];
+    //    bs_ptr[i + channel] = 1;
+    //    bs_ptr[i] = input_z_ptr[i];
+    bs_ptr[i + channel] = Si / So * Sf / 127.0f;
+    bs_ptr[i] = input_z_ptr[i] * 127.0f / So;
   }
   int num = (uint32_t)filter->dims()[1];
   int chw = (uint32_t)filter->dims()[0];
diff --git a/src/operators/kernel/fpga/V2/pad2d_kernel.cpp b/src/operators/kernel/fpga/V2/pad2d_kernel.cpp
deleted file mode 100644
index e5328dc31978ad4da6ba2872881be52c0975e692..0000000000000000000000000000000000000000
--- a/src/operators/kernel/fpga/V2/pad2d_kernel.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#ifdef PAD2D_OP
-#include "operators/kernel/pad2d_kernel.h"
-namespace paddle_mobile {
-namespace operators {
-template <>
-bool Pad2DKernel<FPGA, float>::Init(Pad2DParam<FPGA> *param) {
-  Tensor *output = param->output_;
-  fpga::format_fp16_ofm(output);
-  return true;
-}
-void pad2dFunc(const framework::Tensor *input, framework::Tensor *output) {
-  auto input_data = (input->data<half>());
-  auto output_data = (output->data<half>());
-  auto input_c = input->dims()[1];
-  auto input_h = input->dims()[2];
-  auto input_w = input->dims()[3];
-  auto output_c = output->dims()[1];
-  auto output_w = output->dims()[3];
-  auto copysize = input_c * input_w;
-  for (int h = 0; h < input_h; ++h) {
-    auto input_offset = h * input_c * input_w;
-    auto output_offset = h * paddle_mobile::fpga::align_to_x(
-                                 output_c * output_w, IMAGE_ALIGNMENT);
-    memcpy((output_data + output_offset), (input_data + input_offset),
-           copysize * sizeof(half));
-  }
-}
-template <>
-void Pad2DKernel<FPGA, float>::Compute(const Pad2DParam<FPGA> &param) {
-  auto in_x = param.input_;
-  auto out = param.output_;
-  fpga::fpga_invalidate((void *)in_x->data<half>(),  // NOLINT
-                        in_x->numel() * sizeof(half));
-  pad2dFunc(in_x, out);
-  (out->scale)[0] = (in_x->scale)[0];
-  (out->scale)[1] = (in_x->scale)[1];
-  DLOG << (out->scale)[0];
-  DLOG << (out->scale)[1];
-  size_t outputSize =
-      out->dims()[2] *
-      paddle_mobile::fpga::align_to_x((out->dims()[1]) * (out->dims()[3]),
-                                      IMAGE_ALIGNMENT) *
-      sizeof(half);
-  fpga::fpga_flush(out->data<half>(), outputSize);
-}
-}  // namespace operators
-}  // namespace paddle_mobile
-#endif  // PAD2D_OP
diff --git a/src/operators/kernel/fpga/V2/pool_kernel.cpp b/src/operators/kernel/fpga/V2/pool_kernel.cpp
index 7c8dba1696ecc15ba9748aabf1973445d23de95c..60bd3786aa589eb300a7453325be55d839500754 100644
--- a/src/operators/kernel/fpga/V2/pool_kernel.cpp
+++ b/src/operators/kernel/fpga/V2/pool_kernel.cpp
@@ -44,11 +44,13 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
   auto input_ptr = input->data<half>();
   fpga::format_fp16_ofm(output);
   auto output_ptr = output->mutable_data<half>();
+  float Si = input->scale[0];
+  float So = output->scale[0];
 
   fpga::PoolingArgs poolArgs = {0};
   poolArgs.mode = pooling_type == "max" ? 0 : 1;  // max:0, avg:1
-  poolArgs.kernel_reciprocal =
-      fpga::fp32_2_fp16(float(1.0 / (ksize[0] * ksize[1])));  // NOLINT
+  poolArgs.kernel_reciprocal = fpga::fp32_2_fp16(
+      float(1.0 / (ksize[0] * ksize[1]) * Si / So));  // NOLINT
   poolArgs.image.address = input_ptr;
   poolArgs.image.channels = (uint32_t)input->dims()[1];
   poolArgs.image.height = (uint32_t)input->dims()[2];
diff --git a/tools/op.cmake b/tools/op.cmake
index 5847c60e9471d74dbaf4bdb415518af2a4de797c..eb6501de2232ae160acded85931c31122055ab4e 100755
--- a/tools/op.cmake
+++ b/tools/op.cmake
@@ -163,6 +163,26 @@ if (CON GREATER -1)
   set(SPLIT_OP ON)
   set(FUSION_DECONVADD_OP ON)
   set(FUSION_DECONVADDRELU_OP ON)
+
+  set(RESHAPE_OP ON)
+  set(FUSION_CONVADDBNRELU_OP ON)
+  set(FUSION_CONVADDBN_OP ON)
+  set(RESHAPE2_OP ON)
+  set(PSROI_POOL_OP ON)
+  set(ROIALIGN_POOL_OP ON)
+  set(PROPOSAL_OP ON)
+  set(ANCHOR_GENERATOR_OP ON)
+  set(SLICE_OP ON)
+  set(SIGMOID_OP ON)
+  set(CONCAT_OP ON)
+  set(CONV_TRANSPOSE_OP ON)
+  set(FUSION_DECONVADDBNRELU_OP ON)
+  set(FUSION_DECONVADDBN_OP ON)
+  set(FUSION_DECONVBNRELU_OP ON)
+  set(CONV_OP ON)
+  set(ELEMENTWISEMUL_OP ON)
+  set(FUSION_FCRELU_OP ON)
+  set(RELU_OP ON)
   set(FOUND_MATCH ON)
 endif()