add pad2d and deconv related op in FPGA track

1712ac3d · qnqinan · 9cc38d33 · 1712ac3d · 1712ac3d · 1712ac3d
18 changed file
--- a/src/common/types.cpp
+++ b/src/common/types.cpp
@@ -105,12 +105,14 @@ const char *G_OP_TYPE_FUSION_DECONV_ADD_RELU = "fusion_deconv_add_relu";
 const char *G_OP_TYPE_SEQUENCE_EXPAND = "sequence_expand";
 const char *G_OP_TYPE_SEQUENCE_POOL = "sequence_pool";
 const char *G_OP_TYPE_SEQUENCE_SOFTMAX = "sequence_softmax";
 const char *G_OP_TYPE_SLICE = "slice";
 const char *G_OP_TYPE_ANCHOR_GENERATOR = "anchor_generator";
 const char *G_OP_TYPE_GENERATE_PROPOSALS = "generate_proposals";
 const char *G_OP_TYPE_PSROI_POOL = "psroi_pool";
 const char *G_OP_TYPE_ROI_PERSPECTIVE = "roi_perspective_transform";
+const char *G_OP_TYPE_PAD2D = "pad2d";
+const char *G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU = "fusion_deconv_add_bn_relu";
+const char *G_OP_TYPE_FUSION_DECONV_ADD_BN = "fusion_deconv_add_bn";
 std::unordered_map<
    std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
@@ -210,5 +212,8 @@ std::unordered_map<
         {{"Scores", "BboxDeltas", "ImInfo", "Anchors", "Variances"},
          {"RpnRois", "RpnRoiProbs"}}},
        {G_OP_TYPE_PSROI_POOL, {{"X", "ROIs"}, {"Out"}}},
-        {G_OP_TYPE_ROI_PERSPECTIVE, {{"X", "ROIs"}, {"Out"}}}};
+        {G_OP_TYPE_ROI_PERSPECTIVE, {{"X", "ROIs"}, {"Out"}}},
+        {G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU, {{"Input"}, {"Out"}}},
+        {G_OP_TYPE_FUSION_DECONV_ADD_BN, {{"Input"}, {"Out"}}},
+        {G_OP_TYPE_PAD2D, {{"X"}, {"Out"}}}};
 }  // namespace paddle_mobile
--- a/src/common/types.h
+++ b/src/common/types.h
@@ -199,6 +199,9 @@ extern const char *G_OP_TYPE_ANCHOR_GENERATOR;
 extern const char *G_OP_TYPE_GENERATE_PROPOSALS;
 extern const char *G_OP_TYPE_PSROI_POOL;
 extern const char *G_OP_TYPE_ROI_PERSPECTIVE;
+extern const char *G_OP_TYPE_PAD2D;
+extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU;
+extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN;
 extern std::unordered_map<
    std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>

--- a/src/operators/fusion_deconv_add_bn_op.cpp
+++ b/src/operators/fusion_deconv_add_bn_op.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef FUSION_DECONVADDBN_OP
+#include "operators/fusion_deconv_add_bn_op.h"
+namespace paddle_mobile {
+namespace operators {}
+}  // namespace paddle_mobile
+namespace ops = paddle_mobile::operators;
+REGISTER_FUSION_MATCHER(fusion_deconv_add_bn, ops::FusionDeconvAddBNMatcher);
+#ifdef PADDLE_MOBILE_CPU
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+REGISTER_OPERATOR_FPGA(fusion_deconv_add_bn, ops::FusionDeconvAddBNOp);
+#endif
+#endif
--- a/src/operators/fusion_deconv_add_bn_op.h
+++ b/src/operators/fusion_deconv_add_bn_op.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef FUSION_DECONVADDBN_OP
+#pragma once
+#include <string>
+#include <vector>
+#include "framework/operator.h"
+#include "framework/program/program-optimize/fusion_op_register.h"
+#include "operators/kernel/deconv_add_bn_kernel.h"
+namespace paddle_mobile {
+namespace operators {
+using std::string;
+using std::vector;
+class FusionDeconvAddBNMatcher : public framework::FusionOpMatcher {
+ public:
+  FusionDeconvAddBNMatcher() {
+    node_ = framework::Node(G_OP_TYPE_CONV_TRANSPOSE);
+    node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
+        std::make_shared<framework::Node>(G_OP_TYPE_BATCHNORM);
+  }
+  void FolderNodes(
+      framework::Node *node,
+      std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
+    node->Folder(node_.Depth(), Type(),
+                 {{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}, {"X", "X"}}},
+                  {G_OP_TYPE_BATCHNORM,
+                   {{"Scale", "Scale"},
+                    {"Mean", "Mean"},
+                    {"Bias", "Bias"},
+                    {"Variance", "Variance"},
+                    {"Y", "BNY"}}}},
+                 removed_nodes);
+  }
+  std::string Type() { return G_OP_TYPE_FUSION_DECONV_ADD_BN; }
+};
+template <typename DeviceType, typename T>
+class FusionDeconvAddBNOp : public framework::OperatorWithKernel<
+                                DeviceType, FusionDeconvAddBNParam<DeviceType>,
+                                operators::DeconvAddBNKernel<DeviceType, T>> {
+ public:
+  FusionDeconvAddBNOp(const string &type, const VariableNameMap &inputs,
+                      const VariableNameMap &outputs,
+                      const framework::AttributeMap &attrs,
+                      std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<
+            DeviceType, FusionDeconvAddBNParam<DeviceType>,
+            operators::DeconvAddBNKernel<DeviceType, T>>(type, inputs, outputs,
+                                                         attrs, scope) {}
+  void InferShape() const {
+    auto input = this->param_.Input();
+    auto in_dims = input->dims();
+    auto filter = this->param_.Filter();
+    auto filter_dims = filter->dims();
+    std::vector<int> strides = this->param_.Strides();
+    std::vector<int> paddings = this->param_.Paddings();
+    std::vector<int> dilations = this->param_.Dilations();
+    int groups = this->param_.Groups();
+    PADDLE_MOBILE_ENFORCE(
+        in_dims.size() == 4 || in_dims.size() == 5,
+        "ConvTransposeOp intput should be 4-D or 5-D tensor.");
+    PADDLE_MOBILE_ENFORCE(
+        in_dims.size() == filter_dims.size(),
+        "ConvTransposeOp input dimension and filter dimension "
+        "should be the same.");
+    PADDLE_MOBILE_ENFORCE(
+        in_dims.size() - strides.size() == 2U,
+        "ConvTransposeOp input dimension and strides dimension should "
+        "be consistent.");
+    PADDLE_MOBILE_ENFORCE(paddings.size() == strides.size(),
+                          "ConvTransposeOp paddings dimension and strides "
+                          "dimension should be the same.");
+    PADDLE_MOBILE_ENFORCE(paddings.size() == dilations.size(),
+                          "ConvTransposeOp paddings dimension and dilations "
+                          "dimension should be the same.");
+    PADDLE_MOBILE_ENFORCE(
+        in_dims[1] == filter_dims[0],
+        "In ConvTransposeOp, The number of input channels should "
+        "be equal to the number of filter's channels.");
+    std::vector<int64_t> output_shape({in_dims[0], filter_dims[1] * groups});
+    for (size_t i = 0; i < strides.size(); ++i) {
+      auto filter_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1;
+      output_shape.push_back((in_dims[i + 2] - 1) * strides[i] -
+                             2 * paddings[i] + filter_extent);
+    }
+    this->param_.Output()->Resize(framework::make_ddim(output_shape));
+  }
+ protected:
+};
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif  // FUSION_DECONV_ADD_BN_OP
--- a/src/operators/fusion_deconv_add_bn_relu_op.cpp
+++ b/src/operators/fusion_deconv_add_bn_relu_op.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef FUSION_DECONVADDBNRELU_OP
+#include "operators/fusion_deconv_add_bn_relu_op.h"
+namespace paddle_mobile {
+namespace operators {}
+}  // namespace paddle_mobile
+namespace ops = paddle_mobile::operators;
+REGISTER_FUSION_MATCHER(fusion_deconv_add_bn_relu,
+                        ops::FusionDeconvAddBNReluMatcher);
+#ifdef PADDLE_MOBILE_CPU
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+REGISTER_OPERATOR_FPGA(fusion_deconv_add_bn_relu, ops::FusionDeconvAddBNReluOp);
+#endif
+#endif
--- a/src/operators/fusion_deconv_add_bn_relu_op.h
+++ b/src/operators/fusion_deconv_add_bn_relu_op.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef FUSION_DECONVADDBNRELU_OP
+#pragma once
+#include <string>
+#include <vector>
+#include "framework/operator.h"
+#include "framework/program/program-optimize/fusion_op_register.h"
+#include "operators/kernel/deconv_add_bn_relu_kernel.h"
+namespace paddle_mobile {
+namespace operators {
+using std::string;
+using std::vector;
+class FusionDeconvAddBNReluMatcher : public framework::FusionOpMatcher {
+ public:
+  FusionDeconvAddBNReluMatcher() {
+    node_ = framework::Node(G_OP_TYPE_CONV_TRANSPOSE);
+    node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
+        std::make_shared<framework::Node>(G_OP_TYPE_BATCHNORM) >
+        std::make_shared<framework::Node>(G_OP_TYPE_RELU);
+  }
+  void FolderNodes(
+      framework::Node *node,
+      std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
+    node->Folder(node_.Depth(), Type(),
+                 {{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}, {"X", "X"}}},
+                  {G_OP_TYPE_BATCHNORM,
+                   {{"Scale", "Scale"},
+                    {"Mean", "Mean"},
+                    {"Bias", "Bias"},
+                    {"Variance", "Variance"},
+                    {"Y", "BNY"}}}},
+                 removed_nodes);
+  }
+  std::string Type() { return G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU; }
+};
+template <typename DeviceType, typename T>
+class FusionDeconvAddBNReluOp
+    : public framework::OperatorWithKernel<
+          DeviceType, FusionDeconvAddBNReluParam<DeviceType>,
+          operators::DeconvAddBNReluKernel<DeviceType, T>> {
+ public:
+  FusionDeconvAddBNReluOp(const string &type, const VariableNameMap &inputs,
+                          const VariableNameMap &outputs,
+                          const framework::AttributeMap &attrs,
+                          std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<
+            DeviceType, FusionDeconvAddBNReluParam<DeviceType>,
+            operators::DeconvAddBNReluKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}
+  void InferShape() const {
+    auto input = this->param_.Input();
+    auto in_dims = input->dims();
+    auto filter = this->param_.Filter();
+    auto filter_dims = filter->dims();
+    std::vector<int> strides = this->param_.Strides();
+    std::vector<int> paddings = this->param_.Paddings();
+    std::vector<int> dilations = this->param_.Dilations();
+    int groups = this->param_.Groups();
+    PADDLE_MOBILE_ENFORCE(
+        in_dims.size() == 4 || in_dims.size() == 5,
+        "ConvTransposeOp intput should be 4-D or 5-D tensor.");
+    PADDLE_MOBILE_ENFORCE(
+        in_dims.size() == filter_dims.size(),
+        "ConvTransposeOp input dimension and filter dimension "
+        "should be the same.");
+    PADDLE_MOBILE_ENFORCE(
+        in_dims.size() - strides.size() == 2U,
+        "ConvTransposeOp input dimension and strides dimension should "
+        "be consistent.");
+    PADDLE_MOBILE_ENFORCE(paddings.size() == strides.size(),
+                          "ConvTransposeOp paddings dimension and strides "
+                          "dimension should be the same.");
+    PADDLE_MOBILE_ENFORCE(paddings.size() == dilations.size(),
+                          "ConvTransposeOp paddings dimension and dilations "
+                          "dimension should be the same.");
+    PADDLE_MOBILE_ENFORCE(
+        in_dims[1] == filter_dims[0],
+        "In ConvTransposeOp, The number of input channels should "
+        "be equal to the number of filter's channels.");
+    std::vector<int64_t> output_shape({in_dims[0], filter_dims[1] * groups});
+    for (size_t i = 0; i < strides.size(); ++i) {
+      auto filter_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1;
+      output_shape.push_back((in_dims[i + 2] - 1) * strides[i] -
+                             2 * paddings[i] + filter_extent);
+    }
+    this->param_.Output()->Resize(framework::make_ddim(output_shape));
+  }
+ protected:
+};
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif  // FUSION_DECONV_ADD_BN_RELU_OP
--- a/src/operators/kernel/deconv_add_bn_kernel.h
+++ b/src/operators/kernel/deconv_add_bn_kernel.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef FUSION_DECONVADDBN_OP
+#pragma once
+#include "framework/operator.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+using framework::OpKernelBase;
+template <typename DeviceType, typename T>
+class DeconvAddBNKernel
+    : public OpKernelBase<DeviceType, FusionDeconvAddBNParam<DeviceType>> {
+ public:
+  void Compute(const FusionDeconvAddBNParam<DeviceType> &param);
+  bool Init(FusionDeconvAddBNParam<DeviceType> *param);
+};
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/deconv_add_bn_relu_kernel.h
+++ b/src/operators/kernel/deconv_add_bn_relu_kernel.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef FUSION_DECONVADDBNRELU_OP
+#pragma once
+#include "framework/operator.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+using framework::OpKernelBase;
+template <typename DeviceType, typename T>
+class DeconvAddBNReluKernel
+    : public OpKernelBase<DeviceType, FusionDeconvAddBNReluParam<DeviceType>> {
+ public:
+  void Compute(const FusionDeconvAddBNReluParam<DeviceType> &param);
+  bool Init(FusionDeconvAddBNReluParam<DeviceType> *param);
+};
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp
--- a/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp
@@ -16,13 +16,10 @@ limitations under the License. */
 #include "operators/kernel/conv_bn_relu_kernel.h"
 #include <cmath>
 namespace paddle_mobile {
 namespace operators {
 template <>
 bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
-  // bool relu_enabled = true;
  paddle_mobile::fpga::ActivationType activation_enable =
      paddle_mobile::fpga::LEAKYRELU;
  int16_t leaky_relu_negative_slope = 0;
@@ -43,7 +40,6 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
  auto new_bias = new Tensor();
  auto new_scale_ptr = new_scale->mutable_data<float>({channel});
  auto new_bias_ptr = new_bias->mutable_data<float>({channel});
  for (int i = 0; i < channel; i++) {
    new_scale_ptr[i] = bn_scale_ptr[i] /
                       static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5));
@@ -51,7 +47,16 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
    bs_ptr[i + channel] = new_scale_ptr[i];
    bs_ptr[i] = new_bias_ptr[i];
  }
+  const int groups = param->Groups();
+  if (groups == channel) {
+    fpga::format_dwconv_data(filter, out, new_scale_ptr, &new_bias_ptr);
+    fpga::DWconvArgs dwconv_arg = {0};
+    fpga::fill_dwconv_arg(&dwconv_arg, input, out, filter, activation_enable,
+                          leaky_relu_negative_slope, param->Strides()[0],
+                          param->Strides()[1], param->Paddings()[0],
+                          param->Paddings()[1], new_bias_ptr);
+    param->SetFpgaArgs(dwconv_arg);
+  } else {
    fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
    fpga::SplitConvArgs conv_arg = {0};
    fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
@@ -59,16 +64,19 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
                         param->Strides()[0], param->Strides()[1],
                         param->Paddings()[0], param->Paddings()[1], bs_ptr);
    param->SetFpgaArgs(conv_arg);
+  }
  delete new_scale;
  delete new_bias;
  return true;
 }
 template <>
 void ConvBNReluKernel<FPGA, float>::Compute(
    const FusionConvBNReluParam<FPGA> &param) {
+  if (param.Groups() == param.Output()->dims()[1]) {
+    fpga::ComputeDWConv(param.FpgaDwconvArgs());
+  } else {
    fpga::ComputeFpgaConv(param.FpgaArgs());
+  }
 }
 }  // namespace operators

--- a/src/operators/kernel/fpga/V1/conv_transpose_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/conv_transpose_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef CONV_TRANSPOSE_OP
+#include "operators/kernel/conv_transpose_kernel.h"
+#include "framework/operator.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+template <>
+bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
+  // bool relu_enabled = false;
+  paddle_mobile::fpga::ActivationType activation_enable =
+      paddle_mobile::fpga::NONE;
+  int16_t leaky_relu_negative_slope = 0;
+  auto input = const_cast<Tensor *>(param->Input());
+  // const Tensor *bias = param->Bias();
+  // auto bias_ptr = bias->data<float>();
+  auto filter = const_cast<Tensor *>(param->Filter());
+  auto out = param->Output();
+  // PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
+  //                      "Output channel should be equal to bias number");
+  int channel = out->dims()[1];
+  int sub_conv_n = param->Strides()[0];
+  auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sub_conv_n *  // NOLINT
+                                           sizeof(float));             // NOLINT
+  for (int i = 0; i < channel * sub_conv_n; i++) {
+    bs_ptr[i + sub_conv_n * channel] = 1;
+    // bs_ptr[i] = bias_ptr[i % (channel)];
+  }
+  PADDLE_MOBILE_ENFORCE(param->Strides()[1] == param->Strides()[0],
+                        "stride_width should be equal to stride_height ");
+  PADDLE_MOBILE_ENFORCE(filter->dims()[2] == filter->dims()[3],
+                        "filter width should be equal to filter height ");
+  PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
+                        "filter axis should be the multiple of stride axis ");
+  if (param->Groups() == channel) {
+    fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
+                               sub_conv_n);
+    fpga::DWDeconvArgs DWDeconv_arg = {0};
+    fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter,
+                            activation_enable, leaky_relu_negative_slope,
+                            param->Strides()[0], param->Strides()[1],
+                            param->Paddings()[0], param->Paddings()[1], bs_ptr);
+    param->SetFpgaArgs(DWDeconv_arg);
+  } else {
+    fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
+    fpga::DeconvArgs deconv_arg = {0};
+    fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
+                          leaky_relu_negative_slope, param->Groups(),
+                          param->Strides()[0], param->Strides()[1],
+                          param->Paddings()[0], param->Paddings()[1], bs_ptr);
+    param->SetFpgaArgs(deconv_arg);
+  }
+  return true;
+}
+template <>
+void ConvTransposeKernel<FPGA, float>::Compute(
+    const ConvTransposeParam<FPGA> &param) {
+  if (param.Groups() == param.Output()->dims()[1]) {
+    fpga::ComputeDWDeconv(param.FpgaDWDconvArgs());
+  } else {
+    fpga::ComputeFpgaDeconv(param.FpgaArgs());
+  }
+}
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/fpga/V1/deconv_add_bn_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/deconv_add_bn_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef FUSION_DECONVADDBN_OP
+#include "operators/kernel/deconv_add_bn_kernel.h"
+#include "framework/operator.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+template <>
+bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
+  // bool relu_enabled = true;
+  paddle_mobile::fpga::ActivationType activation_enable =
+      paddle_mobile::fpga::NONE;
+  int16_t leaky_relu_negative_slope = 0;
+  auto input = const_cast<Tensor *>(param->Input());
+  const Tensor *bias = param->InputBias();
+  auto bias_ptr = bias->data<float>();
+  auto filter = const_cast<Tensor *>(param->Filter());
+  auto out = param->Output();
+  PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
+                        "Output channel should be equal to bias number");
+  int channel = out->dims()[1];
+  int sub_conv_n = param->Strides()[0];
+  auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sub_conv_n *  // NOLINT
+                                           sizeof(float));             // NOLINT
+  for (int i = 0; i < channel * sub_conv_n; i++) {
+    bs_ptr[i + sub_conv_n * channel] = 1;
+    bs_ptr[i] = bias_ptr[i % (channel)];
+  }
+  PADDLE_MOBILE_ENFORCE(param->Strides()[1] == param->Strides()[0],
+                        "stride_width should be equal to stride_height ");
+  PADDLE_MOBILE_ENFORCE(filter->dims()[2] == filter->dims()[3],
+                        "filter width should be equal to filter height ");
+  PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
+                        "filter axis should be the multiple of stride axis ");
+  if (param->Groups() == channel) {
+    fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
+                               sub_conv_n);
+    fpga::DWDeconvArgs DWDeconv_arg = {0};
+    fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter,
+                            activation_enable, leaky_relu_negative_slope,
+                            param->Strides()[0], param->Strides()[1],
+                            param->Paddings()[0], param->Paddings()[1], bs_ptr);
+    param->SetFpgaArgs(DWDeconv_arg);
+  } else {
+    fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
+    fpga::DeconvArgs deconv_arg = {0};
+    fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
+                          leaky_relu_negative_slope, param->Groups(),
+                          param->Strides()[0], param->Strides()[1],
+                          param->Paddings()[0], param->Paddings()[1], bs_ptr);
+    param->SetFpgaArgs(deconv_arg);
+  }
+  return true;
+}
+template <>
+void DeconvAddBNKernel<FPGA, float>::Compute(
+    const FusionDeconvAddBNParam<FPGA> &param) {
+  // fpga::ComputeFpgaDeconv(param.FpgaArgs());
+  if (param.Groups() == param.Output()->dims()[1]) {
+    fpga::ComputeDWDeconv(param.FpgaDWDconvArgs());
+  } else {
+    fpga::ComputeFpgaDeconv(param.FpgaArgs());
+  }
+}
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/fpga/V1/deconv_add_bn_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/deconv_add_bn_relu_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef FUSION_DECONVADDBNRELU_OP
+#include "operators/kernel/deconv_add_bn_relu_kernel.h"
+#include "framework/operator.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+template <>
+bool DeconvAddBNReluKernel<FPGA, float>::Init(
+    FusionDeconvAddBNReluParam<FPGA> *param) {
+  // bool relu_enabled = true;
+  paddle_mobile::fpga::ActivationType activation_enable =
+      paddle_mobile::fpga::LEAKYRELU;
+  int16_t leaky_relu_negative_slope = 0;
+  auto input = const_cast<Tensor *>(param->Input());
+  const Tensor *bias = param->InputBias();
+  auto bias_ptr = bias->data<float>();
+  auto filter = const_cast<Tensor *>(param->Filter());
+  auto out = param->Output();
+  PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
+                        "Output channel should be equal to bias number");
+  int channel = out->dims()[1];
+  int sub_conv_n = param->Strides()[0];
+  auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sub_conv_n *  // NOLINT
+                                           sizeof(float));             // NOLINT
+  for (int i = 0; i < channel * sub_conv_n; i++) {
+    bs_ptr[i + sub_conv_n * channel] = 1;
+    bs_ptr[i] = bias_ptr[i % (channel)];
+  }
+  PADDLE_MOBILE_ENFORCE(param->Strides()[1] == param->Strides()[0],
+                        "stride_width should be equal to stride_height ");
+  PADDLE_MOBILE_ENFORCE(filter->dims()[2] == filter->dims()[3],
+                        "filter width should be equal to filter height ");
+  PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
+                        "filter axis should be the multiple of stride axis ");
+  if (param->Groups() == channel) {
+    fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
+                               sub_conv_n);
+    fpga::DWDeconvArgs DWDeconv_arg = {0};
+    fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter,
+                            activation_enable, leaky_relu_negative_slope,
+                            param->Strides()[0], param->Strides()[1],
+                            param->Paddings()[0], param->Paddings()[1], bs_ptr);
+    param->SetFpgaArgs(DWDeconv_arg);
+  } else {
+    fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
+    fpga::DeconvArgs deconv_arg = {0};
+    fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
+                          leaky_relu_negative_slope, param->Groups(),
+                          param->Strides()[0], param->Strides()[1],
+                          param->Paddings()[0], param->Paddings()[1], bs_ptr);
+    param->SetFpgaArgs(deconv_arg);
+  }
+  return true;
+}
+template <>
+void DeconvAddBNReluKernel<FPGA, float>::Compute(
+    const FusionDeconvAddBNReluParam<FPGA> &param) {
+  // fpga::ComputeFpgaDeconv(param.FpgaArgs());
+  if (param.Groups() == param.Output()->dims()[1]) {
+    fpga::ComputeDWDeconv(param.FpgaDWDconvArgs());
+  } else {
+    fpga::ComputeFpgaDeconv(param.FpgaArgs());
+  }
+}
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/pad2d_kernel.h
+++ b/src/operators/kernel/pad2d_kernel.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include "framework/operator.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+template <typename DeviceType, typename T>
+class Pad2dKernel
+    : public framework::OpKernelBase<DeviceType, Pad2dParam<DeviceType>> {
+ public:
+  void Compute(const Pad2dParam<DeviceType> &param);
+  bool Init(Pad2dParam<DeviceType> *param);
+};
+}  // namespace operators
+}  // namespace paddle_mobile
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -1221,6 +1221,7 @@ class FetchParam : public OpParam {
  RType *input_x_;
  Tensor *out_;
 #ifdef PADDLE_MOBILE_FPGA
 public:
  fpga::BypassArgs fpga_bypass_args;
@@ -2415,6 +2416,120 @@ class FusionDeconvAddParam : public ConvTransposeParam<Dtype> {
 template <typename Dtype>
 using FusionDeconvAddReluParam = FusionDeconvAddParam<Dtype>;
 #endif
+#ifdef FUSION_DECONVADDBN_OP
+template <typename Dtype>
+class FusionDeconvAddBNParam : public ConvTransposeParam<Dtype> {
+  typedef typename DtypeTensorTrait<Dtype>::gtype GType;
+  typedef typename DtypeTensorTrait<Dtype>::rtype RType;
+ public:
+  FusionDeconvAddBNParam(const VariableNameMap &inputs,
+                         const VariableNameMap &outputs,
+                         const AttributeMap &attrs, const Scope &scope)
+      : ConvTransposeParam<Dtype>(inputs, outputs, attrs, scope) {
+    output_ = OpParam::OutFrom<GType>(outputs, scope);
+    input_bias_ = OpParam::InputBiasFrom<GType>(inputs, scope);
+    input_mean_ = OpParam::InputMeanFrom<GType>(inputs, scope);
+    input_scale_ = OpParam::InputScaleFrom<GType>(inputs, scope);
+    input_variance_ = OpParam::InputVarianceFrom<GType>(inputs, scope);
+    epsilon_ = OpParam::GetAttr<float>("epsilon", attrs);
+    momentum_ = OpParam::GetAttr<float>("momentum", attrs);
+    //    is_test_ = OpParam::GetAttr<bool>("is_test", attrs);
+  }
+  RType *Output() const { return output_; }
+  const RType *InputBias() const { return input_bias_; }
+  const RType *InputMean() const { return input_mean_; }
+  const RType *InputScale() const { return input_scale_; }
+  const RType *InputVariance() const { return input_variance_; }
+  const float &Epsilon() const { return epsilon_; }
+  const float &Momentum() const { return momentum_; }
+  const bool &IsTest() const { return is_test_; }
+  void SetNewScale(RType *new_scale) { new_scale_ = new_scale; }
+  void SetNewBias(RType *new_bias) { new_bias_ = new_bias; }
+  const RType *NewScale() const { return new_scale_; }
+  const RType *NewBias() const { return new_bias_; }
+ protected:
+  RType *output_;
+  RType *input_bias_;
+  RType *input_mean_;
+  RType *input_scale_;
+  RType *input_variance_;
+  float epsilon_;
+  float momentum_;
+  bool is_test_;
+  RType *new_bias_;
+  RType *new_scale_;
+};
+#endif
+#ifdef FUSION_DECONVADDBNRELU_OP
+template <typename Dtype>
+class FusionDeconvAddBNReluParam : public ConvTransposeParam<Dtype> {
+  typedef typename DtypeTensorTrait<Dtype>::gtype GType;
+  typedef typename DtypeTensorTrait<Dtype>::rtype RType;
+ public:
+  FusionDeconvAddBNReluParam(const VariableNameMap &inputs,
+                             const VariableNameMap &outputs,
+                             const AttributeMap &attrs, const Scope &scope)
+      : ConvTransposeParam<Dtype>(inputs, outputs, attrs, scope) {
+    output_ = OpParam::OutFrom<GType>(outputs, scope);
+    input_bias_ = OpParam::InputBiasFrom<GType>(inputs, scope);
+    input_mean_ = OpParam::InputMeanFrom<GType>(inputs, scope);
+    input_scale_ = OpParam::InputScaleFrom<GType>(inputs, scope);
+    input_variance_ = OpParam::InputVarianceFrom<GType>(inputs, scope);
+    epsilon_ = OpParam::GetAttr<float>("epsilon", attrs);
+    momentum_ = OpParam::GetAttr<float>("momentum", attrs);
+    //    is_test_ = OpParam::GetAttr<bool>("is_test", attrs);
+  }
+  RType *Output() const { return output_; }
+  const RType *InputBias() const { return input_bias_; }
+  const RType *InputMean() const { return input_mean_; }
+  const RType *InputScale() const { return input_scale_; }
+  const RType *InputVariance() const { return input_variance_; }
+  const float &Epsilon() const { return epsilon_; }
+  const float &Momentum() const { return momentum_; }
+  const bool &IsTest() const { return is_test_; }
+  void SetNewScale(RType *new_scale) { new_scale_ = new_scale; }
+  void SetNewBias(RType *new_bias) { new_bias_ = new_bias; }
+  const RType *NewScale() const { return new_scale_; }
+  const RType *NewBias() const { return new_bias_; }
+ protected:
+  RType *output_;
+  RType *input_bias_;
+  RType *input_mean_;
+  RType *input_scale_;
+  RType *input_variance_;
+  float epsilon_;
+  float momentum_;
+  bool is_test_;
+  RType *new_bias_;
+  RType *new_scale_;
+};
+#endif
 #ifdef FUSION_DECONVRELU_OP
 template <typename Dtype>
@@ -3114,6 +3229,26 @@ class IncrementParam : public OpParam {
  int step_;
 };
 #endif  // INCREMENT_OP
+#ifdef PAD2D_OP
+template <typename Dtype>
+class Pad2dParam : public OpParam {
+  typedef typename DtypeTensorTrait<Dtype>::gtype GType;
+  typedef typename DtypeTensorTrait<Dtype>::rtype RType;
+ public:
+  Pad2dParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
+             const AttributeMap &attrs, const Scope &scope) {
+    input_x_ = InputXFrom<GType>(inputs, scope);
+    out_ = OutFrom<GType>(outputs, scope);
+  }
+  const RType *InputX() const { return input_x_; }
+  RType *Out() const { return out_; }
+ private:
+  RType *input_x_;
+  RType *out_;
+};
+#endif
 }  // namespace operators
 }  // namespace paddle_mobile
--- a/src/operators/pad2d_op.cpp
+++ b/src/operators/pad2d_op.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef PAD2D_OP
+#include "operators/pad2d_op.h"
+namespace paddle_mobile {
+namespace operators {
+template <typename Dtype, typename T>
+void Pad2dOp<Dtype, T>::InferShape() const {
+  auto input_dims = this->param_.InputX()->dims();
+  auto input_n = input_dims[0];
+  auto input_c = input_dims[1];
+  auto input_h = input_dims[2];
+  auto input_w = input_dims[3];
+  this->param_.Out()->Resize({input_n, input_c, input_h + 1, input_w + 1});
+}
+}  // namespace operators
+}  // namespace paddle_mobile
+namespace ops = paddle_mobile::operators;
+#ifdef PADDLE_MOBILE_CPU
+REGISTER_OPERATOR_CPU(pad2d, ops::Pad2dOp);
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+REGISTER_OPERATOR_FPGA(pad2d, ops::Pad2dOp);
+#endif
+#endif
--- a/src/operators/pad2d_op.h
+++ b/src/operators/pad2d_op.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef PAD2D_OP
+#pragma once
+#include <string>
+#include "framework/operator.h"
+#include "operators/kernel/pad2d_kernel.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+using framework::AttributeMap;
+using framework::OperatorWithKernel;
+using framework::Scope;
+using std::string;
+template <typename DeviceType, typename T>
+class Pad2dOp
+    : public OperatorWithKernel<DeviceType, Pad2dParam<DeviceType>,
+                                operators::Pad2dKernel<DeviceType, T>> {
+ public:
+  Pad2dOp(const string &type, const VariableNameMap &inputs,
+          const VariableNameMap &outputs, const AttributeMap &attrs,
+          std::shared_ptr<Scope> scope)
+      : OperatorWithKernel<DeviceType, Pad2dParam<DeviceType>,
+                           operators::Pad2dKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}
+  void InferShape() const override;
+ private:
+};
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/tools/op.cmake
+++ b/tools/op.cmake
@@ -131,7 +131,12 @@ if (CON GREATER -1)
  set(PROPOSAL_OP ON)
  set(ANCHOR_GENERATOR_OP ON)
  set(SLICE_OP ON)
+  set(SIGMOID_OP ON)
+  set(CONCAT_OP ON)
+  set(PAD2D_OP ON)
+  set(CONV_TRANSPOSE_OP ON)
+  set(FUSION_DECONVADDBNRELU_OP ON)
+  set(FUSION_DECONVADDBN_OP ON)
  set(FOUND_MATCH ON)
 endif()
@@ -573,7 +578,6 @@ endif()
 if (FUSION_DECONVADDRELU_OP)
  add_definitions(-DFUSION_DECONVADDRELU_OP)
 endif()
 if (WHILE_OP)
  add_definitions(-DWHILE_OP)
 endif()
@@ -602,3 +606,12 @@ endif()
 if (ROI_PERSPECTIVE_OP)
  add_definitions(-DROI_PERSPECTIVE_OP)
 endif()
+if (FUSION_DECONVADDBNRELU_OP)
+  add_definitions(-DFUSION_DECONVADDBNRELU_OP)
+endif()
+if (FUSION_DECONVADDBN_OP)
+  add_definitions(-DFUSION_DECONVADDBN_OP)
+endif()
+if (PAD2D_OP)
+  add_definitions(-DPAD2D_OP)
+endif()