Merge pull request #1447 from qnqinan/develop

add pad2d and deconv related op in FPGA track fixed#1446

Merge pull request #1447 from qnqinan/develop
add pad2d and deconv related op in FPGA track fixed#1446
8c39086c · qnqinan · GitHub · 1eaca7e5 · 85d6c449 · 8c39086c
20 changed file
--- a/src/common/types.cpp
+++ b/src/common/types.cpp
@@ -105,12 +105,14 @@ const char *G_OP_TYPE_FUSION_DECONV_ADD_RELU = "fusion_deconv_add_relu";
 const char *G_OP_TYPE_SEQUENCE_EXPAND = "sequence_expand";
 const char *G_OP_TYPE_SEQUENCE_POOL = "sequence_pool";
 const char *G_OP_TYPE_SEQUENCE_SOFTMAX = "sequence_softmax";
 const char *G_OP_TYPE_SLICE = "slice";
 const char *G_OP_TYPE_ANCHOR_GENERATOR = "anchor_generator";
 const char *G_OP_TYPE_GENERATE_PROPOSALS = "generate_proposals";
 const char *G_OP_TYPE_PSROI_POOL = "psroi_pool";
 const char *G_OP_TYPE_ROI_PERSPECTIVE = "roi_perspective_transform";
+const char *G_OP_TYPE_PAD2D = "pad2d";
+const char *G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU = "fusion_deconv_add_bn_relu";
+const char *G_OP_TYPE_FUSION_DECONV_ADD_BN = "fusion_deconv_add_bn";
 std::unordered_map<
    std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
@@ -210,5 +212,8 @@ std::unordered_map<
         {{"Scores", "BboxDeltas", "ImInfo", "Anchors", "Variances"},
          {"RpnRois", "RpnRoiProbs"}}},
        {G_OP_TYPE_PSROI_POOL, {{"X", "ROIs"}, {"Out"}}},
-        {G_OP_TYPE_ROI_PERSPECTIVE, {{"X", "ROIs"}, {"Out"}}}};
+        {G_OP_TYPE_ROI_PERSPECTIVE, {{"X", "ROIs"}, {"Out"}}},
+        {G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU, {{"Input"}, {"Out"}}},
+        {G_OP_TYPE_FUSION_DECONV_ADD_BN, {{"Input"}, {"Out"}}},
+        {G_OP_TYPE_PAD2D, {{"X"}, {"Out"}}}};
 }  // namespace paddle_mobile
--- a/src/common/types.h
+++ b/src/common/types.h
@@ -199,6 +199,9 @@ extern const char *G_OP_TYPE_ANCHOR_GENERATOR;
 extern const char *G_OP_TYPE_GENERATE_PROPOSALS;
 extern const char *G_OP_TYPE_PSROI_POOL;
 extern const char *G_OP_TYPE_ROI_PERSPECTIVE;
+extern const char *G_OP_TYPE_PAD2D;
+extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU;
+extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN;
 extern std::unordered_map<
    std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>

--- a/src/fpga/V1/api.cpp
+++ b/src/fpga/V1/api.cpp
@@ -30,9 +30,9 @@ void format_image(framework::Tensor *image_tensor) {
  auto data_ptr = image_tensor->data<float>();
  auto external_ptr = reinterpret_cast<float *>(image_tensor->external_data);
  float *p_data = external_ptr == nullptr ? data_ptr : external_ptr;
+  float *old_p = p_data;
  image::format_image(&p_data, channel, height, width);
-  if (p_data != data_ptr) {
+  if (old_p != p_data) {
    image_tensor->reset_data_ptr(p_data);
  }
 }
@@ -48,9 +48,9 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor) {
  auto dims = ofm_tensor->dims();
  size_t memory_size = 0;
  if (dims.size() == 4) {
-    auto channel = dims[1], height = dims[2], width = dims[3], num = dims[0];
+    auto channel = dims[1], height = dims[2], width = dims[3];
-    memory_size = num * height * align_to_x(channel * width, IMAGE_ALIGNMENT) *
+    memory_size =
-                  sizeof(half);
+        height * align_to_x(channel * width, IMAGE_ALIGNMENT) * sizeof(half);
  } else if (dims.size() == 2) {
    memory_size = align_to_x(dims[1], IMAGE_ALIGNMENT) * sizeof(half);
  } else {
@@ -162,7 +162,7 @@ void format_dwconv_filter(framework::Tensor *filter_tensor, float *scale_ptr) {
  fpga_copy(new_data, data_ptr, memory_size);
  filter::format_dwconv_filter(&new_data, num, height, width, scale_ptr);
  filter_tensor->reset_data_ptr(new_data);
-  filter_tensor->set_type(typeid(int8_t));
+  filter_tensor->set_type(typeid(int16_t));
 }
 void format_DWDconv_filter(framework::Tensor *filter_tensor, float *scale_ptr,
@@ -396,8 +396,8 @@ void expand_conv_arg(ConvArgs *arg) {
  // auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS;
  auto cmd = 0UL | USE_BIAS;
-  auto deconv_param = ((args.deconv_tx_param.deconv_en) << 24) |
+  auto deconv_param = ((args.deconv_tx_param.deconv_en) << 16) |
-                      ((args.deconv_tx_param.sub_conv_num) << 16) |
+                      ((args.deconv_tx_param.sub_conv_num) << 8) |
                      ((args.deconv_tx_param.omit_size) << 0);
  (*arg).driver.image_address_phy = vaddr_to_paddr(args.image.address);
  (*arg).driver.sb_address_phy = vaddr_to_paddr(args.sb_address);
@@ -623,7 +623,7 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
  fpga::format_fp16_ofm(out, dims_out_new);
  auto out_ptr = out->data<half>();
  arg->output.address =
-      out_ptr +
+      (half *)out_ptr +  // NOLINT
      omit_size * sizeof(half) *
          (align_to_x(real_out_width * arg->filter_num, IMAGE_ALIGNMENT));
  arg->output.scale_address = out->scale;
@@ -713,6 +713,7 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
    }
    for (int j = 0; j < split_num; ++j) {
+      // arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled;
      arg->split_conv_args[i]->conv_arg[j].output.activation.activation_type =
          activation_enable;
      arg->split_conv_args[i]
@@ -758,9 +759,9 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
          align_to_x(arg->split_conv_args[i]->conv_arg[j].filter_num,
                     FILTER_NUM_ALIGNMENT) *
          sizeof(int8_t);
-      auto filter_head =
+      auto filter_head = &((
-          &filter_ptr[j * element_num * filter_num_per_div +  // NOLINT
+          int8_t *)filter_ptr)[j * element_num * filter_num_per_div +  // NOLINT
-                      i * filter_sub_conv_offset];
+                               i * filter_sub_conv_offset];
      arg->split_conv_args[i]->conv_arg[j].filter_address =
          fpga_malloc(filter_size);
      arg->split_conv_args[i]->vector_conv_space.push_back(
@@ -774,6 +775,19 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
      fpga_flush(arg->split_conv_args[i]->conv_arg[j].filter_address,
                 filter_size);
+      /*{
+      static int cnt = 0;
+      std::string str = "deconv_filter";
+      if(cnt <= 1){
+          cnt++;
+          str += std::to_string(cnt);
+          int8_t result = 0;
+          fpga::savefile<int8_t>(str,
+      arg->split_conv_args[i]->conv_arg[j].filter_address, filter_size, result);
+      }
+      }*/
      size_t bs_align_num = align_to_x(
          arg->split_conv_args[i]->conv_arg[j].filter_num, BS_NUM_ALIGNMENT);
      size_t bs_size = 2 * bs_align_num * sizeof(float);
@@ -789,6 +803,20 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
      memcpy(arg->split_conv_args[i]->conv_arg[j].sb_address, bs_head, bs_size);
      fpga_flush(arg->split_conv_args[i]->conv_arg[j].sb_address, bs_size);
+      /*  {
+            static int cnt = 0;
+            std::string str = "deconv_sb";
+            if(cnt <= 1){
+                cnt++;
+                str += std::to_string(cnt);
+                float result = 0;
+                fpga::savefile<float>(str,
+         arg->split_conv_args[i]->conv_arg[j].sb_address, 2 * bs_align_num,
+         result);
+            }
+            }*/
      if (split_num == 1) {
        arg->split_conv_args[i]->conv_arg[j].output.address =
            arg->split_conv_args[i]->output.address;
@@ -835,13 +863,10 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input,
                     int16_t leaky_relu_negative_slope, int stride_h,
                     int stride_w, int padding_h, int padding_w,
                     float *bias_ptr) {
-  auto deleter = [](void *p) { fpga_free(p); };
+  auto filter_ptr = filter->data<int16_t>();
-  arg->vector_dwconv_space.push_back(
-      std::shared_ptr<char>(reinterpret_cast<char *>(bias_ptr), deleter));
-  auto filter_ptr = filter->data<uint8_t>();
  auto input_ptr = input->data<half>();
-  auto output_ptr = out->mutable_data<half>();
+  auto output_ptr = out->data<half>();
  arg->sub_conv_num = 1;
  // arg->relu_enabled = relu_enabled;
  arg->output.activation.activation_type = activation_enable;
@@ -960,10 +985,10 @@ void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input,
                    sizeof(int16_t));
    arg->dw_conv_args[i]->output.scale_address =
        static_cast<float *>(fpga_malloc(2 * sizeof(float)));
-    arg->vector_dw_conv_space.push_back(std::shared_ptr<char>(  // NOLINT
+    arg->vector_dw_conv_space.push_back(std::shared_ptr<char>(
        reinterpret_cast<char *>(arg->dw_conv_args[i]->output.address),
        deleter));
-    arg->vector_dw_conv_space.push_back(std::shared_ptr<char>(  // NOLINT
+    arg->vector_dw_conv_space.push_back(std::shared_ptr<char>(
        reinterpret_cast<char *>(arg->dw_conv_args[i]->output.scale_address),
        deleter));
  }

--- a/src/operators/fusion_deconv_add_bn_op.cpp
+++ b/src/operators/fusion_deconv_add_bn_op.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef FUSION_DECONVADDBN_OP
+#include "operators/fusion_deconv_add_bn_op.h"
+namespace paddle_mobile {
+namespace operators {}
+}  // namespace paddle_mobile
+namespace ops = paddle_mobile::operators;
+REGISTER_FUSION_MATCHER(fusion_deconv_add_bn, ops::FusionDeconvAddBNMatcher);
+#ifdef PADDLE_MOBILE_CPU
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+REGISTER_OPERATOR_FPGA(fusion_deconv_add_bn, ops::FusionDeconvAddBNOp);
+#endif
+#endif
--- a/src/operators/fusion_deconv_add_bn_op.h
+++ b/src/operators/fusion_deconv_add_bn_op.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef FUSION_DECONVADDBN_OP
+#pragma once
+#include <string>
+#include <vector>
+#include "framework/operator.h"
+#include "framework/program/program-optimize/fusion_op_register.h"
+#include "operators/kernel/deconv_add_bn_kernel.h"
+namespace paddle_mobile {
+namespace operators {
+using std::string;
+using std::vector;
+class FusionDeconvAddBNMatcher : public framework::FusionOpMatcher {
+ public:
+  FusionDeconvAddBNMatcher() {
+    node_ = framework::Node(G_OP_TYPE_CONV_TRANSPOSE);
+    node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
+        std::make_shared<framework::Node>(G_OP_TYPE_BATCHNORM);
+  }
+  void FolderNodes(
+      framework::Node *node,
+      std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
+    node->Folder(node_.Depth(), Type(),
+                 {{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}, {"X", "X"}}},
+                  {G_OP_TYPE_BATCHNORM,
+                   {{"Scale", "Scale"},
+                    {"Mean", "Mean"},
+                    {"Bias", "Bias"},
+                    {"Variance", "Variance"},
+                    {"Y", "BNY"}}}},
+                 removed_nodes);
+  }
+  std::string Type() { return G_OP_TYPE_FUSION_DECONV_ADD_BN; }
+};
+template <typename DeviceType, typename T>
+class FusionDeconvAddBNOp : public framework::OperatorWithKernel<
+                                DeviceType, FusionDeconvAddBNParam<DeviceType>,
+                                operators::DeconvAddBNKernel<DeviceType, T>> {
+ public:
+  FusionDeconvAddBNOp(const string &type, const VariableNameMap &inputs,
+                      const VariableNameMap &outputs,
+                      const framework::AttributeMap &attrs,
+                      std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<
+            DeviceType, FusionDeconvAddBNParam<DeviceType>,
+            operators::DeconvAddBNKernel<DeviceType, T>>(type, inputs, outputs,
+                                                         attrs, scope) {}
+  void InferShape() const {
+    auto input = this->param_.Input();
+    auto in_dims = input->dims();
+    auto filter = this->param_.Filter();
+    auto filter_dims = filter->dims();
+    std::vector<int> strides = this->param_.Strides();
+    std::vector<int> paddings = this->param_.Paddings();
+    std::vector<int> dilations = this->param_.Dilations();
+    int groups = this->param_.Groups();
+    PADDLE_MOBILE_ENFORCE(
+        in_dims.size() == 4 || in_dims.size() == 5,
+        "ConvTransposeOp intput should be 4-D or 5-D tensor.");
+    PADDLE_MOBILE_ENFORCE(
+        in_dims.size() == filter_dims.size(),
+        "ConvTransposeOp input dimension and filter dimension "
+        "should be the same.");
+    PADDLE_MOBILE_ENFORCE(
+        in_dims.size() - strides.size() == 2U,
+        "ConvTransposeOp input dimension and strides dimension should "
+        "be consistent.");
+    PADDLE_MOBILE_ENFORCE(paddings.size() == strides.size(),
+                          "ConvTransposeOp paddings dimension and strides "
+                          "dimension should be the same.");
+    PADDLE_MOBILE_ENFORCE(paddings.size() == dilations.size(),
+                          "ConvTransposeOp paddings dimension and dilations "
+                          "dimension should be the same.");
+    PADDLE_MOBILE_ENFORCE(
+        in_dims[1] == filter_dims[0],
+        "In ConvTransposeOp, The number of input channels should "
+        "be equal to the number of filter's channels.");
+    std::vector<int64_t> output_shape({in_dims[0], filter_dims[1] * groups});
+    for (size_t i = 0; i < strides.size(); ++i) {
+      auto filter_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1;
+      output_shape.push_back((in_dims[i + 2] - 1) * strides[i] -
+                             2 * paddings[i] + filter_extent);
+    }
+    this->param_.Output()->Resize(framework::make_ddim(output_shape));
+  }
+ protected:
+};
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif  // FUSION_DECONV_ADD_BN_OP
--- a/src/operators/fusion_deconv_add_bn_relu_op.cpp
+++ b/src/operators/fusion_deconv_add_bn_relu_op.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef FUSION_DECONVADDBNRELU_OP
+#include "operators/fusion_deconv_add_bn_relu_op.h"
+namespace paddle_mobile {
+namespace operators {}
+}  // namespace paddle_mobile
+namespace ops = paddle_mobile::operators;
+REGISTER_FUSION_MATCHER(fusion_deconv_add_bn_relu,
+                        ops::FusionDeconvAddBNReluMatcher);
+#ifdef PADDLE_MOBILE_CPU
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+REGISTER_OPERATOR_FPGA(fusion_deconv_add_bn_relu, ops::FusionDeconvAddBNReluOp);
+#endif
+#endif
--- a/src/operators/fusion_deconv_add_bn_relu_op.h
+++ b/src/operators/fusion_deconv_add_bn_relu_op.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef FUSION_DECONVADDBNRELU_OP
+#pragma once
+#include <string>
+#include <vector>
+#include "framework/operator.h"
+#include "framework/program/program-optimize/fusion_op_register.h"
+#include "operators/kernel/deconv_add_bn_relu_kernel.h"
+namespace paddle_mobile {
+namespace operators {
+using std::string;
+using std::vector;
+class FusionDeconvAddBNReluMatcher : public framework::FusionOpMatcher {
+ public:
+  FusionDeconvAddBNReluMatcher() {
+    node_ = framework::Node(G_OP_TYPE_CONV_TRANSPOSE);
+    node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
+        std::make_shared<framework::Node>(G_OP_TYPE_BATCHNORM) >
+        std::make_shared<framework::Node>(G_OP_TYPE_RELU);
+  }
+  void FolderNodes(
+      framework::Node *node,
+      std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
+    node->Folder(node_.Depth(), Type(),
+                 {{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}, {"X", "X"}}},
+                  {G_OP_TYPE_BATCHNORM,
+                   {{"Scale", "Scale"},
+                    {"Mean", "Mean"},
+                    {"Bias", "Bias"},
+                    {"Variance", "Variance"},
+                    {"Y", "BNY"}}}},
+                 removed_nodes);
+  }
+  std::string Type() { return G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU; }
+};
+template <typename DeviceType, typename T>
+class FusionDeconvAddBNReluOp
+    : public framework::OperatorWithKernel<
+          DeviceType, FusionDeconvAddBNReluParam<DeviceType>,
+          operators::DeconvAddBNReluKernel<DeviceType, T>> {
+ public:
+  FusionDeconvAddBNReluOp(const string &type, const VariableNameMap &inputs,
+                          const VariableNameMap &outputs,
+                          const framework::AttributeMap &attrs,
+                          std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<
+            DeviceType, FusionDeconvAddBNReluParam<DeviceType>,
+            operators::DeconvAddBNReluKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}
+  void InferShape() const {
+    auto input = this->param_.Input();
+    auto in_dims = input->dims();
+    auto filter = this->param_.Filter();
+    auto filter_dims = filter->dims();
+    std::vector<int> strides = this->param_.Strides();
+    std::vector<int> paddings = this->param_.Paddings();
+    std::vector<int> dilations = this->param_.Dilations();
+    int groups = this->param_.Groups();
+    PADDLE_MOBILE_ENFORCE(
+        in_dims.size() == 4 || in_dims.size() == 5,
+        "ConvTransposeOp intput should be 4-D or 5-D tensor.");
+    PADDLE_MOBILE_ENFORCE(
+        in_dims.size() == filter_dims.size(),
+        "ConvTransposeOp input dimension and filter dimension "
+        "should be the same.");
+    PADDLE_MOBILE_ENFORCE(
+        in_dims.size() - strides.size() == 2U,
+        "ConvTransposeOp input dimension and strides dimension should "
+        "be consistent.");
+    PADDLE_MOBILE_ENFORCE(paddings.size() == strides.size(),
+                          "ConvTransposeOp paddings dimension and strides "
+                          "dimension should be the same.");
+    PADDLE_MOBILE_ENFORCE(paddings.size() == dilations.size(),
+                          "ConvTransposeOp paddings dimension and dilations "
+                          "dimension should be the same.");
+    PADDLE_MOBILE_ENFORCE(
+        in_dims[1] == filter_dims[0],
+        "In ConvTransposeOp, The number of input channels should "
+        "be equal to the number of filter's channels.");
+    std::vector<int64_t> output_shape({in_dims[0], filter_dims[1] * groups});
+    for (size_t i = 0; i < strides.size(); ++i) {
+      auto filter_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1;
+      output_shape.push_back((in_dims[i + 2] - 1) * strides[i] -
+                             2 * paddings[i] + filter_extent);
+    }
+    this->param_.Output()->Resize(framework::make_ddim(output_shape));
+  }
+ protected:
+};
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif  // FUSION_DECONV_ADD_BN_RELU_OP
--- a/src/operators/kernel/deconv_add_bn_kernel.h
+++ b/src/operators/kernel/deconv_add_bn_kernel.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef FUSION_DECONVADDBN_OP
+#pragma once
+#include "framework/operator.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+using framework::OpKernelBase;
+template <typename DeviceType, typename T>
+class DeconvAddBNKernel
+    : public OpKernelBase<DeviceType, FusionDeconvAddBNParam<DeviceType>> {
+ public:
+  void Compute(const FusionDeconvAddBNParam<DeviceType> &param);
+  bool Init(FusionDeconvAddBNParam<DeviceType> *param);
+};
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/deconv_add_bn_relu_kernel.h
+++ b/src/operators/kernel/deconv_add_bn_relu_kernel.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef FUSION_DECONVADDBNRELU_OP
+#pragma once
+#include "framework/operator.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+using framework::OpKernelBase;
+template <typename DeviceType, typename T>
+class DeconvAddBNReluKernel
+    : public OpKernelBase<DeviceType, FusionDeconvAddBNReluParam<DeviceType>> {
+ public:
+  void Compute(const FusionDeconvAddBNReluParam<DeviceType> &param);
+  bool Init(FusionDeconvAddBNReluParam<DeviceType> *param);
+};
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp
--- a/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp
@@ -16,13 +16,10 @@ limitations under the License. */
 #include "operators/kernel/conv_bn_relu_kernel.h"
 #include <cmath>
 namespace paddle_mobile {
 namespace operators {
 template <>
 bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
-  // bool relu_enabled = true;
  paddle_mobile::fpga::ActivationType activation_enable =
      paddle_mobile::fpga::LEAKYRELU;
  int16_t leaky_relu_negative_slope = 0;
@@ -43,7 +40,6 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
  auto new_bias = new Tensor();
  auto new_scale_ptr = new_scale->mutable_data<float>({channel});
  auto new_bias_ptr = new_bias->mutable_data<float>({channel});
  for (int i = 0; i < channel; i++) {
    new_scale_ptr[i] = bn_scale_ptr[i] /
                       static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5));
@@ -51,24 +47,36 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
    bs_ptr[i + channel] = new_scale_ptr[i];
    bs_ptr[i] = new_bias_ptr[i];
  }
+  const int groups = param->Groups();
-  fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
+  if (groups == channel) {
-  fpga::SplitConvArgs conv_arg = {0};
+    fpga::format_dwconv_data(filter, out, new_scale_ptr, &new_bias_ptr);
-  fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
+    fpga::DWconvArgs dwconv_arg = {0};
-                       leaky_relu_negative_slope, param->Groups(),
+    fpga::fill_dwconv_arg(&dwconv_arg, input, out, filter, activation_enable,
-                       param->Strides()[0], param->Strides()[1],
+                          leaky_relu_negative_slope, param->Strides()[0],
-                       param->Paddings()[0], param->Paddings()[1], bs_ptr);
+                          param->Strides()[1], param->Paddings()[0],
-  param->SetFpgaArgs(conv_arg);
+                          param->Paddings()[1], new_bias_ptr);
+    param->SetFpgaArgs(dwconv_arg);
+  } else {
+    fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
+    fpga::SplitConvArgs conv_arg = {0};
+    fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
+                         leaky_relu_negative_slope, param->Groups(),
+                         param->Strides()[0], param->Strides()[1],
+                         param->Paddings()[0], param->Paddings()[1], bs_ptr);
+    param->SetFpgaArgs(conv_arg);
+  }
  delete new_scale;
  delete new_bias;
  return true;
 }
 template <>
 void ConvBNReluKernel<FPGA, float>::Compute(
    const FusionConvBNReluParam<FPGA> &param) {
-  fpga::ComputeFpgaConv(param.FpgaArgs());
+  if (param.Groups() == param.Output()->dims()[1]) {
+    fpga::ComputeDWConv(param.FpgaDwconvArgs());
+  } else {
+    fpga::ComputeFpgaConv(param.FpgaArgs());
+  }
 }
 }  // namespace operators

--- a/src/operators/kernel/fpga/V1/conv_transpose_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/conv_transpose_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef CONV_TRANSPOSE_OP
+#include "operators/kernel/conv_transpose_kernel.h"
+#include "framework/operator.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+template <>
+bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
+  // bool relu_enabled = false;
+  paddle_mobile::fpga::ActivationType activation_enable =
+      paddle_mobile::fpga::NONE;
+  int16_t leaky_relu_negative_slope = 0;
+  auto input = const_cast<Tensor *>(param->Input());
+  // const Tensor *bias = param->Bias();
+  // auto bias_ptr = bias->data<float>();
+  auto filter = const_cast<Tensor *>(param->Filter());
+  auto out = param->Output();
+  // PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
+  //                      "Output channel should be equal to bias number");
+  int channel = out->dims()[1];
+  int sub_conv_n = param->Strides()[0];
+  auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sub_conv_n *  // NOLINT
+                                           sizeof(float));             // NOLINT
+  for (int i = 0; i < channel * sub_conv_n; i++) {
+    bs_ptr[i + sub_conv_n * channel] = 1;
+    bs_ptr[i] = 0;  // bias_ptr[i % (channel)];
+  }
+  PADDLE_MOBILE_ENFORCE(param->Strides()[1] == param->Strides()[0],
+                        "stride_width should be equal to stride_height ");
+  PADDLE_MOBILE_ENFORCE(filter->dims()[2] == filter->dims()[3],
+                        "filter width should be equal to filter height ");
+  PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
+                        "filter axis should be the multiple of stride axis ");
+  if (param->Groups() == channel) {
+    fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
+                               sub_conv_n);
+    fpga::DWDeconvArgs DWDeconv_arg = {0};
+    fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter,
+                            activation_enable, leaky_relu_negative_slope,
+                            param->Strides()[0], param->Strides()[1],
+                            param->Paddings()[0], param->Paddings()[1], bs_ptr);
+    param->SetFpgaArgs(DWDeconv_arg);
+  } else {
+    fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
+    fpga::DeconvArgs deconv_arg = {0};
+    fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
+                          leaky_relu_negative_slope, param->Groups(),
+                          param->Strides()[0], param->Strides()[1],
+                          param->Paddings()[0], param->Paddings()[1], bs_ptr);
+    param->SetFpgaArgs(deconv_arg);
+  }
+  return true;
+}
+template <>
+void ConvTransposeKernel<FPGA, float>::Compute(
+    const ConvTransposeParam<FPGA> &param) {
+  if (param.Groups() == param.Output()->dims()[1]) {
+    fpga::ComputeDWDeconv(param.FpgaDWDconvArgs());
+  } else {
+    fpga::ComputeFpgaDeconv(param.FpgaArgs());
+  }
+}
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/fpga/V1/deconv_add_bn_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/deconv_add_bn_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef FUSION_DECONVADDBN_OP
+#include "operators/kernel/deconv_add_bn_kernel.h"
+#include "framework/operator.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+template <>
+bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
+  // bool relu_enabled = true;
+  paddle_mobile::fpga::ActivationType activation_enable =
+      paddle_mobile::fpga::NONE;
+  int16_t leaky_relu_negative_slope = 0;
+  auto input = const_cast<Tensor *>(param->Input());
+  const Tensor *bias = param->InputBias();
+  auto bias_ptr = bias->data<float>();
+  auto filter = const_cast<Tensor *>(param->Filter());
+  auto out = param->Output();
+  PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
+                        "Output channel should be equal to bias number");
+  int channel = out->dims()[1];
+  int sub_conv_n = param->Strides()[0];
+  auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sub_conv_n *  // NOLINT
+                                           sizeof(float));             // NOLINT
+  for (int i = 0; i < channel * sub_conv_n; i++) {
+    bs_ptr[i + sub_conv_n * channel] = 1;
+    bs_ptr[i] = bias_ptr[i % (channel)];
+  }
+  PADDLE_MOBILE_ENFORCE(param->Strides()[1] == param->Strides()[0],
+                        "stride_width should be equal to stride_height ");
+  PADDLE_MOBILE_ENFORCE(filter->dims()[2] == filter->dims()[3],
+                        "filter width should be equal to filter height ");
+  PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
+                        "filter axis should be the multiple of stride axis ");
+  if (param->Groups() == channel) {
+    fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
+                               sub_conv_n);
+    fpga::DWDeconvArgs DWDeconv_arg = {0};
+    fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter,
+                            activation_enable, leaky_relu_negative_slope,
+                            param->Strides()[0], param->Strides()[1],
+                            param->Paddings()[0], param->Paddings()[1], bs_ptr);
+    param->SetFpgaArgs(DWDeconv_arg);
+  } else {
+    fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
+    fpga::DeconvArgs deconv_arg = {0};
+    fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
+                          leaky_relu_negative_slope, param->Groups(),
+                          param->Strides()[0], param->Strides()[1],
+                          param->Paddings()[0], param->Paddings()[1], bs_ptr);
+    param->SetFpgaArgs(deconv_arg);
+  }
+  return true;
+}
+template <>
+void DeconvAddBNKernel<FPGA, float>::Compute(
+    const FusionDeconvAddBNParam<FPGA> &param) {
+  // fpga::ComputeFpgaDeconv(param.FpgaArgs());
+  if (param.Groups() == param.Output()->dims()[1]) {
+    fpga::ComputeDWDeconv(param.FpgaDWDconvArgs());
+  } else {
+    fpga::ComputeFpgaDeconv(param.FpgaArgs());
+  }
+}
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/fpga/V1/deconv_add_bn_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/deconv_add_bn_relu_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef FUSION_DECONVADDBNRELU_OP
+#include "operators/kernel/deconv_add_bn_relu_kernel.h"
+#include "framework/operator.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+template <>
+bool DeconvAddBNReluKernel<FPGA, float>::Init(
+    FusionDeconvAddBNReluParam<FPGA> *param) {
+  // bool relu_enabled = true;
+  paddle_mobile::fpga::ActivationType activation_enable =
+      paddle_mobile::fpga::LEAKYRELU;
+  int16_t leaky_relu_negative_slope = 0;
+  auto input = const_cast<Tensor *>(param->Input());
+  const Tensor *bias = param->InputBias();
+  auto bias_ptr = bias->data<float>();
+  auto filter = const_cast<Tensor *>(param->Filter());
+  auto out = param->Output();
+  PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
+                        "Output channel should be equal to bias number");
+  int channel = out->dims()[1];
+  int sub_conv_n = param->Strides()[0];
+  auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sub_conv_n *  // NOLINT
+                                           sizeof(float));             // NOLINT
+  for (int i = 0; i < channel * sub_conv_n; i++) {
+    bs_ptr[i + sub_conv_n * channel] = 1;
+    bs_ptr[i] = bias_ptr[i % (channel)];
+  }
+  PADDLE_MOBILE_ENFORCE(param->Strides()[1] == param->Strides()[0],
+                        "stride_width should be equal to stride_height ");
+  PADDLE_MOBILE_ENFORCE(filter->dims()[2] == filter->dims()[3],
+                        "filter width should be equal to filter height ");
+  PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
+                        "filter axis should be the multiple of stride axis ");
+  if (param->Groups() == channel) {
+    fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
+                               sub_conv_n);
+    fpga::DWDeconvArgs DWDeconv_arg = {0};
+    fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter,
+                            activation_enable, leaky_relu_negative_slope,
+                            param->Strides()[0], param->Strides()[1],
+                            param->Paddings()[0], param->Paddings()[1], bs_ptr);
+    param->SetFpgaArgs(DWDeconv_arg);
+  } else {
+    fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
+    fpga::DeconvArgs deconv_arg = {0};
+    fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
+                          leaky_relu_negative_slope, param->Groups(),
+                          param->Strides()[0], param->Strides()[1],
+                          param->Paddings()[0], param->Paddings()[1], bs_ptr);
+    param->SetFpgaArgs(deconv_arg);
+  }
+  return true;
+}
+template <>
+void DeconvAddBNReluKernel<FPGA, float>::Compute(
+    const FusionDeconvAddBNReluParam<FPGA> &param) {
+  // fpga::ComputeFpgaDeconv(param.FpgaArgs());
+  if (param.Groups() == param.Output()->dims()[1]) {
+    fpga::ComputeDWDeconv(param.FpgaDWDconvArgs());
+  } else {
+    fpga::ComputeFpgaDeconv(param.FpgaArgs());
+  }
+}
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/fpga/V1/pad2d_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/pad2d_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "operators/kernel/pad2d_kernel.h"
+namespace paddle_mobile {
+namespace operators {
+template <>
+bool Pad2dKernel<FPGA, float>::Init(Pad2dParam<FPGA> *param) {
+  Tensor *output = param->Out();
+  fpga::format_fp16_ofm(output);
+  return true;
+}
+void pad2dFunc(const framework::Tensor *input, framework::Tensor *output) {
+  auto input_data = (input->data<half>());
+  auto output_data = (output->data<half>());
+  auto input_c = input->dims()[1];
+  auto input_h = input->dims()[2];
+  auto input_w = input->dims()[3];
+  auto output_c = output->dims()[1];
+  auto output_w = output->dims()[3];
+  auto copysize = input_c * input_w;
+  for (int h = 0; h < input_h; ++h) {
+    auto input_offset = h * input_c * input_w;
+    auto output_offset = h * paddle_mobile::fpga::align_to_x(
+                                 output_c * output_w, IMAGE_ALIGNMENT);
+    memcpy((output_data + output_offset), (input_data + input_offset),
+           copysize * sizeof(half));
+  }
+}
+template <>
+void Pad2dKernel<FPGA, float>::Compute(const Pad2dParam<FPGA> &param) {
+  auto in_x = param.InputX();
+  auto out = param.Out();
+  fpga::fpga_invalidate((void *)in_x->data<half>(),  // NOLINT
+                        in_x->numel() * sizeof(half));
+  pad2dFunc(in_x, out);
+  (out->scale)[0] = (in_x->scale)[0];
+  (out->scale)[1] = (in_x->scale)[1];
+  DLOG << (out->scale)[0];
+  DLOG << (out->scale)[1];
+  size_t outputSize =
+      out->dims()[2] *
+      paddle_mobile::fpga::align_to_x((out->dims()[1]) * (out->dims()[3]),
+                                      IMAGE_ALIGNMENT) *
+      sizeof(half);
+  fpga::fpga_flush(out->data<half>(), outputSize);
+}
+}  // namespace operators
+}  // namespace paddle_mobile
--- a/src/operators/kernel/pad2d_kernel.h
+++ b/src/operators/kernel/pad2d_kernel.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include "framework/operator.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+template <typename DeviceType, typename T>
+class Pad2dKernel
+    : public framework::OpKernelBase<DeviceType, Pad2dParam<DeviceType>> {
+ public:
+  void Compute(const Pad2dParam<DeviceType> &param);
+  bool Init(Pad2dParam<DeviceType> *param);
+};
+}  // namespace operators
+}  // namespace paddle_mobile
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -1221,6 +1221,7 @@ class FetchParam : public OpParam {
  RType *input_x_;
  Tensor *out_;
 #ifdef PADDLE_MOBILE_FPGA
 public:
  fpga::BypassArgs fpga_bypass_args;
@@ -2415,6 +2416,120 @@ class FusionDeconvAddParam : public ConvTransposeParam<Dtype> {
 template <typename Dtype>
 using FusionDeconvAddReluParam = FusionDeconvAddParam<Dtype>;
 #endif
+#ifdef FUSION_DECONVADDBN_OP
+template <typename Dtype>
+class FusionDeconvAddBNParam : public ConvTransposeParam<Dtype> {
+  typedef typename DtypeTensorTrait<Dtype>::gtype GType;
+  typedef typename DtypeTensorTrait<Dtype>::rtype RType;
+ public:
+  FusionDeconvAddBNParam(const VariableNameMap &inputs,
+                         const VariableNameMap &outputs,
+                         const AttributeMap &attrs, const Scope &scope)
+      : ConvTransposeParam<Dtype>(inputs, outputs, attrs, scope) {
+    output_ = OpParam::OutFrom<GType>(outputs, scope);
+    input_bias_ = OpParam::InputBiasFrom<GType>(inputs, scope);
+    input_mean_ = OpParam::InputMeanFrom<GType>(inputs, scope);
+    input_scale_ = OpParam::InputScaleFrom<GType>(inputs, scope);
+    input_variance_ = OpParam::InputVarianceFrom<GType>(inputs, scope);
+    epsilon_ = OpParam::GetAttr<float>("epsilon", attrs);
+    momentum_ = OpParam::GetAttr<float>("momentum", attrs);
+    //    is_test_ = OpParam::GetAttr<bool>("is_test", attrs);
+  }
+  RType *Output() const { return output_; }
+  const RType *InputBias() const { return input_bias_; }
+  const RType *InputMean() const { return input_mean_; }
+  const RType *InputScale() const { return input_scale_; }
+  const RType *InputVariance() const { return input_variance_; }
+  const float &Epsilon() const { return epsilon_; }
+  const float &Momentum() const { return momentum_; }
+  const bool &IsTest() const { return is_test_; }
+  void SetNewScale(RType *new_scale) { new_scale_ = new_scale; }
+  void SetNewBias(RType *new_bias) { new_bias_ = new_bias; }
+  const RType *NewScale() const { return new_scale_; }
+  const RType *NewBias() const { return new_bias_; }
+ protected:
+  RType *output_;
+  RType *input_bias_;
+  RType *input_mean_;
+  RType *input_scale_;
+  RType *input_variance_;
+  float epsilon_;
+  float momentum_;
+  bool is_test_;
+  RType *new_bias_;
+  RType *new_scale_;
+};
+#endif
+#ifdef FUSION_DECONVADDBNRELU_OP
+template <typename Dtype>
+class FusionDeconvAddBNReluParam : public ConvTransposeParam<Dtype> {
+  typedef typename DtypeTensorTrait<Dtype>::gtype GType;
+  typedef typename DtypeTensorTrait<Dtype>::rtype RType;
+ public:
+  FusionDeconvAddBNReluParam(const VariableNameMap &inputs,
+                             const VariableNameMap &outputs,
+                             const AttributeMap &attrs, const Scope &scope)
+      : ConvTransposeParam<Dtype>(inputs, outputs, attrs, scope) {
+    output_ = OpParam::OutFrom<GType>(outputs, scope);
+    input_bias_ = OpParam::InputBiasFrom<GType>(inputs, scope);
+    input_mean_ = OpParam::InputMeanFrom<GType>(inputs, scope);
+    input_scale_ = OpParam::InputScaleFrom<GType>(inputs, scope);
+    input_variance_ = OpParam::InputVarianceFrom<GType>(inputs, scope);
+    epsilon_ = OpParam::GetAttr<float>("epsilon", attrs);
+    momentum_ = OpParam::GetAttr<float>("momentum", attrs);
+    //    is_test_ = OpParam::GetAttr<bool>("is_test", attrs);
+  }
+  RType *Output() const { return output_; }
+  const RType *InputBias() const { return input_bias_; }
+  const RType *InputMean() const { return input_mean_; }
+  const RType *InputScale() const { return input_scale_; }
+  const RType *InputVariance() const { return input_variance_; }
+  const float &Epsilon() const { return epsilon_; }
+  const float &Momentum() const { return momentum_; }
+  const bool &IsTest() const { return is_test_; }
+  void SetNewScale(RType *new_scale) { new_scale_ = new_scale; }
+  void SetNewBias(RType *new_bias) { new_bias_ = new_bias; }
+  const RType *NewScale() const { return new_scale_; }
+  const RType *NewBias() const { return new_bias_; }
+ protected:
+  RType *output_;
+  RType *input_bias_;
+  RType *input_mean_;
+  RType *input_scale_;
+  RType *input_variance_;
+  float epsilon_;
+  float momentum_;
+  bool is_test_;
+  RType *new_bias_;
+  RType *new_scale_;
+};
+#endif
 #ifdef FUSION_DECONVRELU_OP
 template <typename Dtype>
@@ -3114,6 +3229,26 @@ class IncrementParam : public OpParam {
  int step_;
 };
 #endif  // INCREMENT_OP
+#ifdef PAD2D_OP
+template <typename Dtype>
+class Pad2dParam : public OpParam {
+  typedef typename DtypeTensorTrait<Dtype>::gtype GType;
+  typedef typename DtypeTensorTrait<Dtype>::rtype RType;
+ public:
+  Pad2dParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
+             const AttributeMap &attrs, const Scope &scope) {
+    input_x_ = InputXFrom<GType>(inputs, scope);
+    out_ = OutFrom<GType>(outputs, scope);
+  }
+  const RType *InputX() const { return input_x_; }
+  RType *Out() const { return out_; }
+ private:
+  RType *input_x_;
+  RType *out_;
+};
+#endif
 }  // namespace operators
 }  // namespace paddle_mobile
--- a/src/operators/pad2d_op.cpp
+++ b/src/operators/pad2d_op.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef PAD2D_OP
+#include "operators/pad2d_op.h"
+namespace paddle_mobile {
+namespace operators {
+template <typename Dtype, typename T>
+void Pad2dOp<Dtype, T>::InferShape() const {
+  auto input_dims = this->param_.InputX()->dims();
+  auto input_n = input_dims[0];
+  auto input_c = input_dims[1];
+  auto input_h = input_dims[2];
+  auto input_w = input_dims[3];
+  this->param_.Out()->Resize({input_n, input_c, input_h + 1, input_w + 1});
+}
+}  // namespace operators
+}  // namespace paddle_mobile
+namespace ops = paddle_mobile::operators;
+#ifdef PADDLE_MOBILE_CPU
+REGISTER_OPERATOR_CPU(pad2d, ops::Pad2dOp);
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+REGISTER_OPERATOR_FPGA(pad2d, ops::Pad2dOp);
+#endif
+#endif
--- a/src/operators/pad2d_op.h
+++ b/src/operators/pad2d_op.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef PAD2D_OP
+#pragma once
+#include <string>
+#include "framework/operator.h"
+#include "operators/kernel/pad2d_kernel.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+using framework::AttributeMap;
+using framework::OperatorWithKernel;
+using framework::Scope;
+using std::string;
+template <typename DeviceType, typename T>
+class Pad2dOp
+    : public OperatorWithKernel<DeviceType, Pad2dParam<DeviceType>,
+                                operators::Pad2dKernel<DeviceType, T>> {
+ public:
+  Pad2dOp(const string &type, const VariableNameMap &inputs,
+          const VariableNameMap &outputs, const AttributeMap &attrs,
+          std::shared_ptr<Scope> scope)
+      : OperatorWithKernel<DeviceType, Pad2dParam<DeviceType>,
+                           operators::Pad2dKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}
+  void InferShape() const override;
+ private:
+};
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/tools/op.cmake
+++ b/tools/op.cmake
@@ -131,7 +131,12 @@ if (CON GREATER -1)
  set(PROPOSAL_OP ON)
  set(ANCHOR_GENERATOR_OP ON)
  set(SLICE_OP ON)
+  set(SIGMOID_OP ON)
+  set(CONCAT_OP ON)
+  set(PAD2D_OP ON)
+  set(CONV_TRANSPOSE_OP ON)
+  set(FUSION_DECONVADDBNRELU_OP ON)
+  set(FUSION_DECONVADDBN_OP ON)
  set(FOUND_MATCH ON)
 endif()
@@ -573,7 +578,6 @@ endif()
 if (FUSION_DECONVADDRELU_OP)
  add_definitions(-DFUSION_DECONVADDRELU_OP)
 endif()
 if (WHILE_OP)
  add_definitions(-DWHILE_OP)
 endif()
@@ -602,3 +606,12 @@ endif()
 if (ROI_PERSPECTIVE_OP)
  add_definitions(-DROI_PERSPECTIVE_OP)
 endif()
+if (FUSION_DECONVADDBNRELU_OP)
+  add_definitions(-DFUSION_DECONVADDBNRELU_OP)
+endif()
+if (FUSION_DECONVADDBN_OP)
+  add_definitions(-DFUSION_DECONVADDBN_OP)
+endif()
+if (PAD2D_OP)
+  add_definitions(-DPAD2D_OP)
+endif()