Merge pull request #769 from zhangyang0701/develop

Need to realize conv_bn & conv_bn_relu for FPGA googlenet close #768

Merge pull request #769 from zhangyang0701/develop
Need to realize conv_bn & conv_bn_relu for FPGA googlenet close #768
bd00d449 · Chon · GitHub · 4dd3de63 · 6cf91e8f · bd00d449
14 changed file
--- a/src/operators/feed_op.h
+++ b/src/operators/feed_op.h
@@ -41,7 +41,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
  void RunImpl() const { fpga::PerformBypass(param_.FpgaArgs()); }
  void Init() {
    const Tensor *input = param_.InputX();
-    auto input_ptr = input->mutable_data<float>();
+    auto input_ptr = (const_cast<Tensor *>(input))->mutable_data<float>();
    Tensor *output = param_.Out();
    auto output_ptr = output->mutable_data<half>();
    fpga::BypassArgs args;

--- a/src/operators/fusion_conv_bn_op.cpp
+++ b/src/operators/fusion_conv_bn_op.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVBN_OP
+
+#include "operators/fusion_conv_bn_op.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <typename Dtype, typename T>
+void FusionConvBNOp<Dtype, T>::InferShape() const {
+  auto in_dims = this->param_.Input()->dims();
+  auto filter_dims = this->param_.Filter()->dims();
+  const std::vector<int> &strides = this->param_.Strides();
+  std::vector<int> paddings = this->param_.Paddings();
+  int groups = this->param_.Groups();
+  std::vector<int> dilations = this->param_.Dilations();
+
+  PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
+                         dilations.size() == paddings.size() &&
+                         paddings.size() == strides.size()),
+                        "ConvParam is not suitable");
+
+  std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
+  for (size_t i = 0; i < strides.size(); ++i) {
+    output_shape.push_back(
+        math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
+                             paddings[i], strides[i]));
+  }
+
+  framework::DDim ddim = framework::make_ddim(output_shape);
+  this->param_.Output()->Resize(ddim);
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+namespace ops = paddle_mobile::operators;
+#ifdef PADDLE_MOBILE_CPU
+REGISTER_OPERATOR_CPU(fusion_conv_bn, ops::FusionConvBNOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+REGISTER_OPERATOR_FPGA(fusion_conv_bn, ops::FusionConvBNOp);
+#endif
+
+#endif
--- a/src/operators/fusion_conv_bn_op.h
+++ b/src/operators/fusion_conv_bn_op.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVBN_OP
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include "framework/operator.h"
+#include "framework/program/program-optimize/fusion_op_register.h"
+#include "operators/kernel/conv_bn_kernel.h"
+#include "operators/op_param.h"
+
+namespace paddle_mobile {
+namespace operators {
+using std::string;
+using std::vector;
+class FusionConvBNMatcher : public framework::FusionOpMatcher {
+ public:
+  FusionConvBNMatcher() {
+    node_ = framework::Node(G_OP_TYPE_CONV);
+    node_ > std::make_shared<framework::Node>(G_OP_TYPE_BATCHNORM);
+  }
+
+  void FolderNodes(
+      framework::Node *node,
+      std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
+    node->Folder(node_.Depth(), Type(),
+                 {{G_OP_TYPE_BATCHNORM,
+                   {{"Scale", "Scale"},
+                    {"Mean", "Mean"},
+                    {"Bias", "Bias"},
+                    {"Variance", "Variance"}}}},
+                 removed_nodes);
+  }
+
+  std::string Type() { return G_OP_TYPE_FUSION_CONV_BN; }
+};
+
+template <typename DeviceType, typename T>
+class FusionConvBNOp : public framework::OperatorWithKernel<
+                           DeviceType, FusionConvBNParam,
+                           operators::ConvBNKernel<DeviceType, T>> {
+ public:
+  FusionConvBNOp(const string &type, const VariableNameMap &inputs,
+                 const VariableNameMap &outputs,
+                 const framework::AttributeMap &attrs,
+                 std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType, FusionConvBNParam,
+                                      operators::ConvBNKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}
+
+  void InferShape() const override;
+
+ protected:
+};
+
+#ifdef PADDLE_MOBILE_CPU
+
+#ifndef FUSION_CONV_BN_REGISTER
+static framework::FusionOpRegistrar fusion_conv_bn_registrar(
+    new FusionConvBNMatcher());
+#define FUSION_CONV_BN_REGISTER
+#endif
+
+#endif
+
+#ifdef PADDLE_MOBILE_MALI_GPU
+
+#endif
+
+#ifdef PADDLE_MOBILE_FPGA
+
+#ifndef FUSION_CONV_BN_REGISTER
+static framework::FusionOpRegistrar fusion_conv_bn_registrar(
+    new FusionConvBNMatcher());
+#define FUSION_CONV_BN_REGISTER
+#endif
+#endif
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(fusion_conv_bn);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+USE_OP_FPGA(fusion_conv_bn);
+#endif
+
+#endif
--- a/src/operators/fusion_conv_bn_relu_op.cpp
+++ b/src/operators/fusion_conv_bn_relu_op.cpp
@@ -55,6 +55,7 @@ REGISTER_OPERATOR_CPU(fusion_conv_bn_relu, ops::FusionConvBNReluOp);
 #ifdef PADDLE_MOBILE_MALI_GPU
 #endif
 #ifdef PADDLE_MOBILE_FPGA
+REGISTER_OPERATOR_FPGA(fusion_conv_bn_relu, ops::FusionConvBNReluOp);
 #endif

 #endif
--- a/src/operators/fusion_conv_bn_relu_op.h
+++ b/src/operators/fusion_conv_bn_relu_op.h
@@ -87,6 +87,12 @@ static framework::FusionOpRegistrar fusion_conv_bn_relu_registrar(
 #endif

 #ifdef PADDLE_MOBILE_FPGA
+
+#ifndef FUSION_CONV_BN_RELU_REGISTER
+static framework::FusionOpRegistrar fusion_conv_bn_relu_registrar(
+    new FusionConvBNReluMatcher());
+#define FUSION_CONV_BN_RELU_REGISTER
+#endif
 #endif

 }  // namespace operators
@@ -98,6 +104,7 @@ USE_OP_CPU(fusion_conv_bn_relu);
 #ifdef PADDLE_MOBILE_MALI_GPU
 #endif
 #ifdef PADDLE_MOBILE_FPGA
+USE_OP_FPGA(fusion_conv_bn_relu);
 #endif

 #endif
--- a/src/operators/kernel/conv_bn_kernel.h
+++ b/src/operators/kernel/conv_bn_kernel.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#ifdef FUSION_CONVBN_OP
+
+#include <vector>
+#include "framework/ddim.h"
+#include "framework/operator.h"
+#include "operators/math/conv_func.h"
+#include "operators/math/im2col.h"
+#include "operators/math/math_function.h"
+#include "operators/math/vol2col.h"
+#include "operators/op_param.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+using framework::DDim;
+using framework::OpKernelBase;
+
+template <typename DeviceType, typename T>
+class ConvBNKernel : public OpKernelBase<DeviceType, FusionConvBNParam> {
+ public:
+  void Compute(const FusionConvBNParam &param) const;
+  bool Init(FusionConvBNParam *param);
+};
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
--- a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
@@ -61,8 +61,8 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
  param->SetNewBias(new_bias);

  fpga::quantify_filter(filter);
-
  auto filter_ptr = filter->data<float>();
+
  fpga::ConvArgs convArgs;
  convArgs.relu_enabled = relu_enabled;
  convArgs.filter_address = (void *)filter_ptr;

--- a/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
@@ -15,7 +15,7 @@ limitations under the License. */
 #ifdef FUSION_CONVADDBNRELU_OP

 #include "operators/kernel/conv_add_bn_relu_kernel.h"
-#include "memory/t_malloc.h"
+#include "fpga/fpga_quantilization.h"

 namespace paddle_mobile {
 namespace operators {
@@ -27,8 +27,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(FusionConvAddBNReluParam *param) {
  auto input_ptr = input->data<half>();
  const Tensor *bias = param->Bias();
  auto bias_ptr = bias->data<float>();
-  const Tensor *filter = param->Filter();
-  auto filter_ptr = filter->data<float>();
+  Tensor *filter = param->Filter();
  Tensor *out = param->Output();
  auto out_ptr = out->mutable_data<half>();
  auto bn_mean_ptr = param->InputMean()->data<float>();
@@ -57,6 +56,8 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(FusionConvAddBNReluParam *param) {
  }
  param->SetNewScale(new_scale);
  param->SetNewBias(new_bias);
+  fpga::quantify_filter(filter);
+  auto filter_ptr = filter->data<float>();

  fpga::ConvArgs convArgs;
  convArgs.relu_enabled = relu_enabled;

--- a/src/operators/kernel/fpga/conv_add_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_add_relu_kernel.cpp
@@ -15,7 +15,7 @@ limitations under the License. */
 #ifdef FUSION_CONVADDRELU_OP

 #include "operators/kernel/conv_add_relu_kernel.h"
-#include "common/enforce.h"
+#include "fpga/fpga_quantilization.h"

 namespace paddle_mobile {
 namespace operators {
@@ -27,8 +27,7 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam *param) {
  auto input_ptr = input->data<half>();
  const Tensor *bias = param->Bias();
  auto bias_ptr = bias->data<float>();
-  const Tensor *filter = param->Filter();
-  auto filter_ptr = filter->data<float>();
+  Tensor *filter = param->Filter();
  Tensor *out = param->Output();
  auto out_ptr = out->mutable_data<half>();

@@ -41,6 +40,9 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam *param) {
    bs_ptr[i * 2 + 1] = bias_ptr[i];
  }

+  fpga::quantify_filter(filter);
+  auto filter_ptr = filter->data<float>();
+
  fpga::ConvArgs convArgs;
  convArgs.relu_enabled = relu_enabled;
  convArgs.filter_address = (void *)filter_ptr;

--- a/src/operators/kernel/fpga/conv_bn_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_bn_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVBN_OP
+
+#include "operators/kernel/conv_bn_kernel.h"
+#include "fpga/api/fpga_api.h"
+#include "fpga/fpga_quantilization.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam *param) {
+  bool relu_enabled = false;
+  const Tensor *input = param->Input();
+  auto input_ptr = input->data<half>();
+  Tensor *filter = param->Filter();
+
+  Tensor *out = param->Output();
+  auto out_ptr = out->mutable_data<half>();
+  auto bn_mean_ptr = param->InputMean()->data<float>();
+  auto bn_var_ptr = param->InputVariance()->data<float>();
+  auto bn_scale_ptr = param->InputScale()->data<float>();
+  auto bn_bias_ptr = param->InputBias()->data<float>();
+  const float epsilon = param->Epsilon();
+  PADDLE_MOBILE_ENFORCE(input->dims()[1] == param->InputBias()->dims()[0],
+                        "Image channel should be equal to bias number");
+
+  const int channel = input->dims()[1];
+  float *bs_ptr =
+      reinterpret_cast<float *>(fpga::fpga_malloc(2 * channel * sizeof(float)));
+  Tensor *new_scale = new Tensor();
+  Tensor *new_bias = new Tensor();
+  auto new_scale_ptr = new_scale->mutable_data<float>({channel});
+  auto new_bias_ptr = new_bias->mutable_data<float>({channel});
+
+  for (int i = 0; i < channel; i++) {
+    new_scale_ptr[i] = bn_scale_ptr[i] /
+                       static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5));
+    new_bias_ptr[i] = bn_bias_ptr[i] + (0 - bn_mean_ptr[i]) * new_scale_ptr[i];
+    bs_ptr[i * 2] = new_scale_ptr[i];
+    bs_ptr[i * 2 + 1] = new_bias_ptr[i];
+  }
+  param->SetNewScale(new_scale);
+  param->SetNewBias(new_bias);
+  fpga::quantify_filter(filter);
+  auto filter_ptr = filter->data<float>();
+
+  fpga::ConvArgs convArgs;
+  convArgs.relu_enabled = relu_enabled;
+  convArgs.filter_address = (void *)filter_ptr;
+  convArgs.filter_num = filter->dims()[0];
+  convArgs.group_num = param->Groups();
+  convArgs.sb_address = (void *)bs_ptr;
+  convArgs.kernel.stride_h = param->Strides()[0];
+  convArgs.kernel.stride_w = param->Strides()[1];
+  convArgs.kernel.height = filter->dims()[2];
+  convArgs.kernel.width = filter->dims()[3];
+  convArgs.image.address = (void *)input_ptr;
+  convArgs.image.channels = input->dims()[1];
+  convArgs.image.height = input->dims()[2];
+  convArgs.image.width = input->dims()[3];
+  convArgs.image.pad_height = param->Paddings()[0];
+  convArgs.image.pad_width = param->Paddings()[1];
+  convArgs.image.scale_address = input->fpga_args().scale_pointer();
+  convArgs.output.address = (void *)out_ptr;
+  convArgs.output.scale_address = out->fpga_args().scale_pointer();
+  param->SetFpgaArgs(convArgs);
+
+  return true;
+}
+
+template <>
+void ConvBNKernel<FPGA, float>::Compute(const FusionConvBNParam &param) const {
+  fpga::ComputeFpgaConv(param.FpgaArgs());
+}
+template class ConvBNKernel<FPGA, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
--- a/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVBNRELU_OP
+
+#include "operators/kernel/conv_bn_relu_kernel.h"
+#include "fpga/fpga_quantilization.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam *param) {
+  bool relu_enabled = true;
+  const Tensor *input = param->Input();
+  auto input_ptr = input->data<half>();
+  Tensor *filter = param->Filter();
+  Tensor *out = param->Output();
+  auto out_ptr = out->mutable_data<half>();
+  auto bn_mean_ptr = param->InputMean()->data<float>();
+  auto bn_var_ptr = param->InputVariance()->data<float>();
+  auto bn_scale_ptr = param->InputScale()->data<float>();
+  auto bn_bias_ptr = param->InputBias()->data<float>();
+  const float epsilon = param->Epsilon();
+  PADDLE_MOBILE_ENFORCE(input->dims()[1] == param->InputBias()->dims()[0],
+                        "Image channel should be equal to bias number");
+
+  const int channel = input->dims()[1];
+  float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
+  Tensor *new_scale = new Tensor();
+  Tensor *new_bias = new Tensor();
+  auto new_scale_ptr = new_scale->mutable_data<float>({channel});
+  auto new_bias_ptr = new_bias->mutable_data<float>({channel});
+
+  for (int i = 0; i < channel; i++) {
+    new_scale_ptr[i] = bn_scale_ptr[i] /
+                       static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5));
+    new_bias_ptr[i] = bn_bias_ptr[i] + (0 - bn_mean_ptr[i]) * new_scale_ptr[i];
+    bs_ptr[i * 2] = new_scale_ptr[i];
+    bs_ptr[i * 2 + 1] = new_bias_ptr[i];
+  }
+  param->SetNewScale(new_scale);
+  param->SetNewBias(new_bias);
+  fpga::quantify_filter(filter);
+  auto filter_ptr = filter->data<float>();
+
+  fpga::ConvArgs convArgs;
+  convArgs.relu_enabled = relu_enabled;
+  convArgs.filter_address = (void *)filter_ptr;
+  convArgs.filter_num = filter->dims()[0];
+  convArgs.group_num = param->Groups();
+  convArgs.sb_address = (void *)bs_ptr;
+  convArgs.kernel.stride_h = param->Strides()[0];
+  convArgs.kernel.stride_w = param->Strides()[1];
+  convArgs.kernel.height = filter->dims()[2];
+  convArgs.kernel.width = filter->dims()[3];
+  convArgs.image.address = (void *)input_ptr;
+  convArgs.image.channels = input->dims()[1];
+  convArgs.image.height = input->dims()[2];
+  convArgs.image.width = input->dims()[3];
+  convArgs.image.pad_height = param->Paddings()[0];
+  convArgs.image.pad_width = param->Paddings()[1];
+  convArgs.image.scale_address = input->fpga_args().scale_pointer();
+  convArgs.output.address = (void *)out_ptr;
+  convArgs.output.scale_address = out->fpga_args().scale_pointer();
+  param->SetFpgaArgs(convArgs);
+  return true;
+}
+
+template <>
+void ConvBNReluKernel<FPGA, float>::Compute(
+    const FusionConvBNReluParam &param) const {
+  fpga::ComputeFpgaConv(param.FpgaArgs());
+}
+template class ConvBNReluKernel<FPGA, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -1127,6 +1127,100 @@ class FusionConvAddBNReluParam : public OpParam {
 };
 #endif

+#ifdef FUSION_CONVBN_OP
+class FusionConvBNParam : public OpParam {
+ public:
+  FusionConvBNParam(const VariableNameMap &inputs,
+                    const VariableNameMap &outputs, const AttributeMap &attrs,
+                    const Scope &scope) {
+    axis_ = GetAttr<int>("axis", attrs);
+    filter_ = FilterFrom<LoDTensor>(inputs, scope);
+    input_ = InputFrom<LoDTensor>(inputs, scope);
+    output_y_ = OutputYFrom<LoDTensor>(outputs, scope);
+    strides_ = GetAttr<vector<int>>("strides", attrs);
+    paddings_ = GetAttr<vector<int>>("paddings", attrs);
+    dilations_ = GetAttr<vector<int>>("dilations", attrs);
+    groups = GetAttr<int>("groups", attrs);
+    input_bias_ = InputBiasFrom<LoDTensor>(inputs, scope);
+    input_mean_ = InputMeanFrom<LoDTensor>(inputs, scope);
+    input_scale_ = InputScaleFrom<LoDTensor>(inputs, scope);
+    input_variance_ = InputVarianceFrom<LoDTensor>(inputs, scope);
+    epsilon_ = GetAttr<float>("epsilon", attrs);
+    momentum_ = GetAttr<float>("momentum", attrs);
+    //    is_test_ = GetAttr<bool>("is_test", attrs);
+  }
+
+  const int &Axis() const { return axis_; }
+
+  const Tensor *Input() const { return input_; }
+
+#ifdef PADDLE_MOBILE_FPGA
+  Tensor *Filter() const { return filter_; }
+#else
+  const Tensor *Filter() const { return filter_; }
+#endif
+  Tensor *Output() const { return output_y_; }
+
+  const vector<int> &Strides() const { return strides_; }
+
+  const vector<int> &Paddings() const { return paddings_; }
+
+  const vector<int> &Dilations() const { return dilations_; }
+
+  const int &Groups() const { return groups; }
+
+  const Tensor *InputBias() const { return input_bias_; }
+
+  const Tensor *InputMean() const { return input_mean_; }
+
+  const Tensor *InputScale() const { return input_scale_; }
+
+  const Tensor *InputVariance() const { return input_variance_; }
+
+  const float &Epsilon() const { return epsilon_; }
+
+  const float &Momentum() const { return momentum_; }
+
+  const bool &IsTest() const { return is_test_; }
+
+  void SetNewScale(Tensor *new_scale) { new_scale_ = new_scale; }
+
+  void SetNewBias(Tensor *new_bias) { new_bias_ = new_bias; }
+
+  const Tensor *NewScale() const { return new_scale_; }
+
+  const Tensor *NewBias() const { return new_bias_; }
+
+ protected:
+  int axis_;
+  Tensor *input_;
+  Tensor *output_y_;
+  Tensor *filter_;
+  vector<int> strides_;
+  vector<int> paddings_;
+  vector<int> dilations_;
+  int groups;
+  Tensor *input_bias_;
+  Tensor *input_mean_;
+  Tensor *input_scale_;
+  Tensor *input_variance_;
+  float epsilon_;
+  float momentum_;
+  bool is_test_;
+  Tensor *new_bias_;
+  Tensor *new_scale_;
+#ifdef PADDLE_MOBILE_FPGA
+
+ private:
+  fpga::ConvArgs fpga_conv_args;
+
+ public:
+  const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; }
+  void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; }
+#endif
+};
+#endif
+
 #ifdef FUSION_CONVADDBN_OP
 class FusionConvAddBNParam : public OpParam {
 public:
@@ -1329,7 +1423,11 @@ class FusionConvBNReluParam : public OpParam {

  const Tensor *Input() const { return input_; }

+#ifdef PADDLE_MOBILE_FPGA
+  Tensor *Filter() const { return filter_; }
+#else
  const Tensor *Filter() const { return filter_; }
+#endif

  Tensor *Output() const { return output_; }

@@ -1380,6 +1478,15 @@ class FusionConvBNReluParam : public OpParam {
  bool is_test_;
  Tensor *new_bias_;
  Tensor *new_scale_;
+#ifdef PADDLE_MOBILE_FPGA
+
+ private:
+  fpga::ConvArgs fpga_conv_args;
+
+ public:
+  const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; }
+  void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; }
+#endif
 };
 #endif


--- a/test/net/test_googlenet.cpp
+++ b/test/net/test_googlenet.cpp
@@ -17,20 +17,27 @@ limitations under the License. */
 #include "../test_include.h"

 int main() {
+#ifdef PADDLE_MOBILE_FPGA
+  paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
+#endif
+
+#ifdef PADDLE_MOBILE_CPU
  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+#endif
+
  paddle_mobile.SetThreadNum(4);
  bool optimize = true;
  auto time1 = time();
-  if (paddle_mobile.Load(g_googlenet, optimize)) {
+  if (paddle_mobile.Load(g_resnet, optimize)) {
    auto time2 = time();
    std::cout << "load cost :" << time_diff(time1, time2) << "ms" << std::endl;
    std::vector<float> input;
    std::vector<int64_t> dims{1, 3, 224, 224};
-    GetInput<float>(g_test_image_1x3x224x224, &input, dims);
+    // GetInput<float>(g_test_image_1x3x224x224, &input, dims);
    // 预热一次
    auto vec_result = paddle_mobile.Predict(input, dims);
    auto time3 = time();
-    for (int i = 0; i < 10; ++i) {
+    for (int i = 0; i < 1; ++i) {
      auto vec_result = paddle_mobile.Predict(input, dims);
    }
    auto time4 = time();

--- a/tools/op.cmake
+++ b/tools/op.cmake
@@ -82,7 +82,8 @@ if ("FPGAnets" IN_LIST NET)
  set(CONCAT_OP ON)
  set(SOFTMAX_OP ON)
  set(DROPOUT_OP ON)
-  # set(CONV_OP ON)
+  set(FUSION_CONVBNRELU_OP ON)
+  set(FUSION_CONVBN_OP ON)

  set(FOUND_MATCH ON)   
 endif()
@@ -241,8 +242,8 @@ endif()
 if (FUSION_ELEMENTWISEADDRELU_OP)
  add_definitions(-DFUSION_ELEMENTWISEADDRELU_OP)
 endif()
-if (REGION_OP)
-  add_definitions(-DREGION_OP)
+if (FUSION_CONVBN_OP)
+  add_definitions(-DFUSION_CONVBN_OP)
 endif()

 if (CONV_TRANSPOSE_OP)