diff --git a/src/operators/kernel/fpga/V1/softmax_kernel.cpp b/src/operators/kernel/fpga/V1/softmax_kernel.cpp
index ff5ff5380ff33545bb52a8d0ef31f7e539edb46b..ac7a7bdc77c291864aad55ebb33495d8e1c57b50 100644
--- a/src/operators/kernel/fpga/V1/softmax_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/softmax_kernel.cpp
@@ -35,19 +35,23 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
 
   auto float_input = new LoDTensor;
 
-  PADDLE_MOBILE_ENFORCE(input->dims().size() == 4,
-                        "Softmax should have 4-order input");
-
-  auto channel = dims[3];
-  if (channel == 1) {  // This input is generated by FC op, dims = [N C 1 1]
-    PADDLE_MOBILE_ENFORCE(dims[2] == 1, "Softmax input must come from FC op");
-    dims[3] = dims[1];
-    dims[1] = 1;
+  int input_n = 1, input_c = 1, input_h = 1, input_w = 1;
+  if (dims.size() == 4) {
+    input_h = dims[1];
+    input_w = dims[2];
+    input_c = dims[3];
+    if (input_c == 1) {  // This input is generated by FC op, dims = [N C 1 1]
+      PADDLE_MOBILE_ENFORCE(input_w == 1, "Softmax input must come from FC op");
+      input_c = dims[1];
+      input_h = 1;
+    }
+  } else if (dims.size() == 2) {
+    input_c = dims[1];
   }
   input->Resize(framework::make_ddim(dims));
   float_input->Resize(framework::make_ddim(dims));
 
-  if (channel == 2 && input->type() == type_id<half>()) {  // Use FPGA
+  if (input_c == 2 && input->type() == type_id<half>()) {  // Use FPGA
     fpga::format_fp16_ofm(out);
     fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
     args.input_layout_type = fpga::LAYOUT_HWC;
@@ -55,9 +59,9 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
     args.input_data_type = fpga::DATA_TYPE_FP16;
     args.output_data_type = fpga::DATA_TYPE_FP16;
     args.image.address = input_ptr;
-    args.image.height = (uint32_t)input->dims()[1];
-    args.image.width = (uint32_t)input->dims()[2];
-    args.image.channels = (uint32_t)input->dims()[3];
+    args.image.height = input_h;
+    args.image.width = input_w;
+    args.image.channels = input_c;
     args.output.address = out->data<half>();
     args.output.scale_address = out->scale;
     args.output.activation.activation_type = fpga::SOFTMAX;
@@ -67,8 +71,8 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
     out->mutable_data<float>(framework::make_ddim(dims));
     float_input->init(type_id<float>().hash_code());
     float_input->mutable_data<float>(framework::make_ddim(dims));
-    //  fpga::format_fp32_ofm(float_input);
-    // fpga::format_fp32_ofm(out);
+    fpga::format_fp32_ofm(float_input);
+    fpga::format_fp32_ofm(out);
 
     fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
     args.input_layout_type = fpga::LAYOUT_HWC;
@@ -76,9 +80,9 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
     args.input_data_type = fpga::DATA_TYPE_FP16;
     args.output_data_type = fpga::DATA_TYPE_FP32;
     args.image.address = input_ptr;
-    args.image.height = (uint32_t)dims[1] * dims[0];
-    args.image.width = (uint32_t)dims[2];
-    args.image.channels = (uint32_t)dims[3];
+    args.image.height = input_h;
+    args.image.width = input_w;
+    args.image.channels = input_c;
     args.output.address = float_input->data<float>();
     args.output.scale_address = float_input->scale;
     param->SetFloatInput(float_input);
@@ -91,6 +95,23 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
 template <>
 void SoftmaxKernel<FPGA, float>::Compute(const SoftmaxParam<FPGA> &param) {
   auto *in_x = (param.InputX());
+  auto dims = in_x->dims();
+  auto n = 1;
+  auto h = 1;
+  auto w = 1;
+  auto c = 1;
+  if (dims.size() == 4) {
+    h = dims[1];
+    w = dims[2];
+    c = dims[3];
+    if (c == 1) {  // This input is generated by FC op, dims = [N C 1 1]
+      PADDLE_MOBILE_ENFORCE(w == 1, "Softmax input must come from FC op");
+      c = dims[1];
+      h = 1;
+    }
+  } else if (dims.size() == 2) {
+    c = dims[1];
+  }
   if (in_x->type() == type_id<half>()) {
     fpga::PerformBypass(param.FpgaArgs());
     if (param.FpgaArgs().output.activation.activation_type != fpga::SOFTMAX) {
@@ -105,8 +126,7 @@ void SoftmaxKernel<FPGA, float>::Compute(const SoftmaxParam<FPGA> &param) {
   } else {
     if (param.FpgaArgs().output.activation.activation_type != fpga::SOFTMAX) {
       Tensor *out = param.Out();
-      out->Resize(
-          {in_x->dims()[0], out->dims()[1], out->dims()[2], out->dims()[3]});
+      out->Resize({n, h, w, c});
       math::SoftmaxFuntor<CPU, float>()(in_x, out);
     }
   }
diff --git a/src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp
index 8bf1ead85c18bc23c51b528c3729aa702558f3ae..8c65ee0627f2810a198dabdcbca286725595d798 100644
--- a/src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp
@@ -19,13 +19,10 @@ limitations under the License. */
 
 namespace paddle_mobile {
 namespace operators {
-
 template <>
 bool ConvAddBNReluKernel<FPGA, float>::Init(
     FusionConvAddBNReluParam<FPGA> *param) {
   bool relu_enabled = true;
-  // paddle_mobile::fpga::ActivationType activation_enable =
-  //    paddle_mobile::fpga::LEAKYRELU;
   auto input = const_cast<LoDTensor *>(param->Input());
   auto bias = param->Bias();
   auto bias_ptr = bias->data<float>();
@@ -42,6 +39,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
   auto bn_scale_ptr = param->InputScale()->data<float>();
   auto bn_bias_ptr = param->InputBias()->data<float>();
   const float epsilon = param->Epsilon();
+
   PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0] &&
                             bias->dims()[0] == param->InputBias()->dims()[0],
                         "Output channel should be equal to bias number");
@@ -75,6 +73,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
                           new_bias_ptr);
     param->SetFpgaArgs(dwconv_arg);
     fpga::fpga_free(bs_ptr);
+    delete new_scale;
   } else {
     fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
     fpga::SplitConvArgs conv_arg = {0};
@@ -82,9 +81,10 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
                          param->Groups(), strides[0], strides[1], paddings[0],
                          paddings[1], bs_ptr);
     param->SetFpgaArgs(conv_arg);
+    delete new_scale;
+    delete new_bias;
   }
-  delete new_scale;
-  delete new_bias;
+
   return true;
 }
 
diff --git a/src/operators/kernel/fpga/V2/reshape2_kernel.cpp b/src/operators/kernel/fpga/V2/reshape2_kernel.cpp
index b1df9372a6b3d7a39d6d5cd7ac9dd48534522f64..ebaf3759400c60c9ecf36467d0eeb7adad140f46 100644
--- a/src/operators/kernel/fpga/V2/reshape2_kernel.cpp
+++ b/src/operators/kernel/fpga/V2/reshape2_kernel.cpp
@@ -114,6 +114,7 @@ void Reshape2Kernel<FPGA, float>::Compute(const Reshape2Param<FPGA> &param) {
     output->ShareDataWith(*input);
     framework::LoD lod = input->lod();
     output->set_lod(lod);
+    output->scale[0] = input->scale[0];
     return;
   }
 
diff --git a/src/operators/kernel/fpga/V2/sigmoid_kernel.cpp b/src/operators/kernel/fpga/V2/sigmoid_kernel.cpp
index 2171432cba5700844ccd58fbb32ffcf23d3c132d..194fd5a30565b866ca702b296981d0b8302a1c16 100644
--- a/src/operators/kernel/fpga/V2/sigmoid_kernel.cpp
+++ b/src/operators/kernel/fpga/V2/sigmoid_kernel.cpp
@@ -21,11 +21,12 @@ namespace operators {
 
 template <>
 bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) {
-  paddle_mobile::fpga::ActivationType activation_enable =
-      paddle_mobile::fpga::SIGMOID;
-  int16_t leaky_relu_negative_slope = 0;
   auto input = const_cast<LoDTensor *>(param->InputX());
   auto input_ptr = input->data<int8_t>();
+  paddle_mobile::fpga::ActivationType activation_enable =
+      paddle_mobile::fpga::SIGMOID;
+  int16_t leaky_relu_negative_slope =
+      fpga::fp32_2_fp16(input->scale[0] / 127.0);
   auto out = param->Out();
   fpga::format_ofm(out);
 
@@ -47,6 +48,7 @@ bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) {
 template <>
 void SigmoidKernel<FPGA, float>::Compute(const SigmoidParam<FPGA> &param) {
   fpga::PerformBypass(param.FpgaArgs());
+  param.Out()->scale[0] = 127.0;
 }
 
 }  // namespace operators
diff --git a/src/operators/kernel/fpga/V2/softmax_kernel.cpp b/src/operators/kernel/fpga/V2/softmax_kernel.cpp
index 4f75e0f30b2e9f57d94941e972d012016b55251e..b7615a8891b8292dd4d65c15955a0ee640c2f770 100755
--- a/src/operators/kernel/fpga/V2/softmax_kernel.cpp
+++ b/src/operators/kernel/fpga/V2/softmax_kernel.cpp
@@ -28,17 +28,22 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
   auto out = param->Out();
   out->Resize(framework::make_ddim(dims));
 
-  PADDLE_MOBILE_ENFORCE(input->dims().size() == 4,
-                        "Softmax should have 4-order input");
-
-  auto channel = dims[3];
-  if (channel == 1) {  // This input is generated by FC op, dims = [N C 1 1]
-    PADDLE_MOBILE_ENFORCE(dims[2] == 1, "Softmax input must come from FC op");
-    dims[3] = dims[1];
-    dims[1] = 1;
+  int input_c = 1, input_h = 1, input_w = 1;
+  if (dims.size() == 4) {
+    input_h = dims[1];
+    input_w = dims[2];
+    input_c = dims[3];
+    if (input_c == 1) {  // This input is generated by FC op, dims = [N C 1 1]
+      PADDLE_MOBILE_ENFORCE(input_w == 1, "Softmax input must come from FC op");
+      input_c = dims[1];
+      input_h = 1;
+    }
+  } else if (dims.size() == 2) {
+    input_c = dims[1];
   }
+
   input->Resize(framework::make_ddim(dims));
-  if ((channel == 2) && (input->type() == type_id<int8_t>())) {
+  if ((input_c == 2) && (input->type() == type_id<int8_t>())) {
     auto input_ptr = input->data<int8_t>();
     float Si = input->scale[0];
     int16_t slope = fpga::fp32_2_fp16(Si / 127);
@@ -50,22 +55,14 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
     args.input_data_type = fpga::DATA_TYPE_FP16;
     args.output_data_type = fpga::DATA_TYPE_FP16;
     args.image.address = input_ptr;
-    args.image.height = (uint32_t)input->dims()[1];
-    args.image.width = (uint32_t)input->dims()[2];
-    args.image.channels = (uint32_t)input->dims()[3];
+    args.image.height = input_h;
+    args.image.width = input_w;
+    args.image.channels = input_c;
     args.output.address = out->data<int8_t>();
     args.output.scale_address = out->scale;
     args.output.activation.activation_type = fpga::SOFTMAX;
     args.output.activation.leaky_relu_negative_slope = slope;
     param->SetFpgaArgs(args);
-  } else if (input->type() == type_id<int8_t>()) {
-    auto float_input_x = param->float_input_x_;
-    float_input_x = std::make_shared<Tensor>();
-    float_input_x->Resize(input->dims());
-    float_input_x->init(type_id<float>().hash_code());
-    fpga::format_ofm(float_input_x.get());
-    out->mutable_data<float>(framework::make_ddim(dims));
-    fpga::format_ofm(out);
   } else {
     out->mutable_data<float>(framework::make_ddim(dims));
     fpga::format_ofm(out);
@@ -78,36 +75,45 @@ template <>
 void SoftmaxKernel<FPGA, float>::Compute(const SoftmaxParam<FPGA> &param) {
   auto *in_x = (param.InputX());
   auto dims = in_x->dims();
-  auto n = dims[0];
-  auto h = dims[1];
-  auto w = dims[2];
-  auto c = dims[3];
+
+  auto n = 1;
+  auto h = 1;
+  auto w = 1;
+  auto c = 1;
+  if (dims.size() == 4) {
+    h = dims[1];
+    w = dims[2];
+    c = dims[3];
+    if (c == 1) {  // This input is generated by FC op, dims = [N C 1 1]
+      PADDLE_MOBILE_ENFORCE(w == 1, "Softmax input must come from FC op");
+      c = dims[1];
+      h = 1;
+    }
+  } else if (dims.size() == 2) {
+    c = dims[1];
+  }
   if ((c == 2) && (in_x->type() == type_id<int8_t>())) {
     fpga::PerformBypass(param.FpgaArgs());
   } else if (in_x->type() == type_id<int8_t>()) {
     auto in_data = in_x->data<int8_t>();
     float Si = in_x->scale[0];
     Tensor *out = param.Out();
-    out->Resize(
-        {in_x->dims()[0], out->dims()[1], out->dims()[2], out->dims()[3]});
-
+    out->Resize({n, h, w, c});
     auto float_input_x = param.float_input_x_;
+    float_input_x = std::make_shared<Tensor>();
+    float_input_x->Resize(in_x->dims());
+    float_input_x->init(type_id<float>().hash_code());
+    fpga::format_fp32_ofm(float_input_x.get());
     auto float_input_x_data = float_input_x->data<float>();
     int dataNum = n * h * fpga::align_to_x(w * c, IMAGE_ALIGNMENT);
     for (int i = 0; i < dataNum; i++) {
       float_input_x_data[i] = in_data[i] * Si / 127;
     }
     math::SoftmaxFuntor<CPU, float>()(float_input_x.get(), out);
-    auto out_data = out->data<float>();
-    fpga::fpga_flush(out_data, dataNum * sizeof(float));
   } else {
     Tensor *out = param.Out();
-    out->Resize(
-        {in_x->dims()[0], out->dims()[1], out->dims()[2], out->dims()[3]});
+    out->Resize({n, h, w, c});
     math::SoftmaxFuntor<CPU, float>()(in_x, out);
-    int dataNum = n * h * fpga::align_to_x(w * c, IMAGE_ALIGNMENT);
-    auto out_data = out->data<float>();
-    fpga::fpga_flush(out_data, dataNum * sizeof(float));
   }
 }