Merge pull request #1303 from zhangyang0701/develop

add kernels for V1 for FPGA track

Merge pull request #1303 from zhangyang0701/develop
add kernels for V1 for FPGA track
76c60710 · qnqinan · GitHub · a6cc7b0a · d662c4ac · 76c60710
9 changed file
--- a/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp
@@ -12,49 +12,31 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#ifdef FUSION_CONVADDBNRELU_OP
+#ifdef FUSION_CONVADD_OP

-#include "operators/kernel/conv_add_bn_relu_kernel.h"
+#include "operators/kernel/conv_add_kernel.h"

 namespace paddle_mobile {
 namespace operators {

 template <>
-bool ConvAddBNReluKernel<FPGA, float>::Init(
-    FusionConvAddBNReluParam<FPGA> *param) {
-  bool relu_enabled = true;
+bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
+  bool relu_enabled = false;
  auto input = const_cast<Tensor *>(param->Input());
  const Tensor *bias = param->Bias();
  auto bias_ptr = bias->data<float>();
  auto filter = const_cast<Tensor *>(param->Filter());
  auto out = param->Output();
-  auto bn_mean_ptr = param->InputMean()->data<float>();
-  auto bn_var_ptr = param->InputVariance()->data<float>();
-  auto bn_scale_ptr = param->InputScale()->data<float>();
-  auto bn_bias_ptr = param->InputBias()->data<float>();
-  const float epsilon = param->Epsilon();
-  PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0] &&
-                            bias->dims()[0] == param->InputBias()->dims()[0],
-                        "Output channel should be equal to bias number");

-  const int channel = out->dims()[1];
+  PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
+                        "Output channel should be equal to bias number");
+  int channel = out->dims()[1];
  auto bs_ptr =
      (float *)fpga::fpga_malloc(2 * channel * sizeof(float));  // NOLINT
-  auto new_scale = new Tensor();
-  auto new_bias = new Tensor();
-  auto new_scale_ptr = new_scale->mutable_data<float>({channel});
-  auto new_bias_ptr = new_bias->mutable_data<float>({channel});
-
  for (int i = 0; i < channel; i++) {
-    new_scale_ptr[i] = bn_scale_ptr[i] /
-                       static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5));
-    new_bias_ptr[i] =
-        bn_bias_ptr[i] + (bias_ptr[i] - bn_mean_ptr[i]) * new_scale_ptr[i];
-    bs_ptr[i + 2] = new_scale_ptr[i];
-    bs_ptr[i] = new_bias_ptr[i];
+    bs_ptr[i + channel] = 1;
+    bs_ptr[i] = bias_ptr[i];
  }
-  param->SetNewScale(new_scale);
-  param->SetNewBias(new_bias);

  float max_value = fpga::filter_find_max(filter);
  fpga::format_filter(filter, max_value, param->Groups());
@@ -75,8 +57,8 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
 }

 template <>
-void ConvAddBNReluKernel<FPGA, float>::Compute(
-    const FusionConvAddBNReluParam<FPGA> &param) {
+void ConvAddKernel<FPGA, float>::Compute(
+    const FusionConvAddParam<FPGA> &param) {
  fpga::ComputeFpgaConv(param.FpgaArgs());
 }


--- a/src/operators/kernel/fpga/V2/fc_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V2/fc_relu_kernel.cpp
@@ -11,55 +11,26 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#ifdef FUSION_FCRELU_OP
-#include "operators/kernel/fc_relu_kernel.h"
+
+#ifdef FUSION_DECONVADD_OP
+
+#include "operators/kernel/deconv_add_kernel.h"
+#include "framework/operator.h"
+#include "operators/op_param.h"

 namespace paddle_mobile {
 namespace operators {

 template <>
-bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
-  bool relu_enabled = true;
-  auto input_x = const_cast<LoDTensor *>(param->InputX());
-  auto filter = const_cast<Tensor *>(param->InputY());
-  auto input_z = param->InputZ();
-  auto input_z_ptr = input_z->data<float>();
-  auto out = param->Out();
-  PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0],
-                        "Image channel should be equal to weight number");
-  int channel = (uint32_t)out->dims()[1];
-  auto bs_ptr =
-      (float *)fpga::fpga_malloc(2 * channel * sizeof(float));  // NOLINT
-  for (int i = 0; i < channel; i++) {
-    bs_ptr[i + channel] = 1;
-    bs_ptr[i] = input_z_ptr[i];
-  }
-
-  int num = (uint32_t)filter->dims()[1];
-  int chw = (uint32_t)filter->dims()[0];
-  PADDLE_MOBILE_ENFORCE(
-      chw == input_x->numel(),
-      "Filter element num should be equal to IFM element num");
-  int height = (uint32_t)input_x->dims()[2];
-  int width = (uint32_t)input_x->dims()[3];
-  int filter_channel = chw / height / width;
-
-  out->Resize(framework::make_ddim({1, channel, 1, 1}));
-  filter->Resize(framework::make_ddim({num, filter_channel, height, width}));
-  fpga::format_fc_data(filter, out, bs_ptr);
-
-  fpga::SplitConvArgs conv_arg = {0};
-  fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1,
-                       0, 0, bs_ptr);
-  param->SetFpgaArgs(conv_arg);
+bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
  return true;
 }
+
 template <>
-void FusionFcReluKernel<FPGA, float>::Compute(
-    const FusionFcReluParam<FPGA> &param) {
-  fpga::ComputeFpgaConv(param.FpgaArgs());
-}
+void DeconvAddKernel<FPGA, float>::Compute(
+    const FusionDeconvAddParam<FPGA> &param) {}

 }  // namespace operators
 }  // namespace paddle_mobile
+
 #endif
--- a/src/operators/kernel/fpga/V1/deconv_add_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/deconv_add_relu_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_DECONVADDRELU_OP
+
+#include "operators/kernel/deconv_add_relu_kernel.h"
+#include "framework/operator.h"
+#include "operators/op_param.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool DeconvAddReluKernel<FPGA, float>::Init(
+    FusionDeconvAddReluParam<FPGA> *param) {
+  return true;
+}
+
+template <>
+void DeconvAddReluKernel<FPGA, float>::Compute(
+    const FusionDeconvAddReluParam<FPGA> &param) {}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
--- a/src/operators/kernel/fpga/V1/fc_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/fc_relu_kernel.cpp
@@ -11,60 +11,54 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#ifdef FUSION_FCRELU_OP
-#include "operators/kernel/fc_relu_kernel.h"
+#ifdef ELEMENTWISEADD_OP
+
+#include "operators/kernel/elementwise_add_kernel.h"

 namespace paddle_mobile {
 namespace operators {

 template <>
-bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
-  bool relu_enabled = true;
-  auto input_x = const_cast<LoDTensor *>(param->InputX());
-  auto filter = const_cast<Tensor *>(param->InputY());
-  auto input_z = param->InputZ();
-  auto input_z_ptr = input_z->data<float>();
-  auto out = param->Out();
-  PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0],
-                        "Image channel should be equal to weight number");
-  int channel = (uint32_t)out->dims()[1];
-  auto bs_ptr =
-      (float *)fpga::fpga_malloc(2 * channel * sizeof(float));  // NOLINT
-  for (int i = 0; i < channel; i++) {
-    bs_ptr[i + channel] = 1;
-    bs_ptr[i] = input_z_ptr[i];
-  }
-
-  int num = (uint32_t)filter->dims()[1];
-  int chw = (uint32_t)filter->dims()[0];
-  PADDLE_MOBILE_ENFORCE(
-      chw == input_x->numel(),
-      "Filter element num should be equal to IFM element num");
-  int height = (uint32_t)input_x->dims()[2];
-  int width = (uint32_t)input_x->dims()[3];
-  int filter_channel = chw / height / width;
-
-  out->Resize(framework::make_ddim({1, channel, 1, 1}));
-  filter->Resize(framework::make_ddim({num, filter_channel, height, width}));
-  float max_value = fpga::filter_find_max(filter);
-  fpga::format_fc_filter(filter, max_value);
-
-  int element_num_per_div = fpga::get_filter_num_per_div(filter, 1);
-  fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
+bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
+  bool relu_enabled = false;
+  auto *input_x = const_cast<LoDTensor *>(param->InputX());
+  auto *input_y = const_cast<LoDTensor *>(param->InputY());
+  auto *out = param->Out();
+  auto input_x_ptr = input_x->data<float>();
+  auto input_y_ptr = input_y->data<float>();
  fpga::format_fp16_ofm(out);
+  auto out_ptr = out->mutable_data<float>();

-  fpga::SplitConvArgs conv_arg = {0};
-  fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1,
-                       0, 0, bs_ptr);
-  param->SetFpgaArgs(conv_arg);
+  fpga::EWAddArgs ewaddArgs = {0};
+  ewaddArgs.relu_enabled = relu_enabled;
+  ewaddArgs.const0 = 0x3c00;  // =1
+  ewaddArgs.const1 = 0x3c00;  // =1
+  ewaddArgs.image0.address = input_x_ptr;
+  ewaddArgs.image0.channels = (uint32_t)input_x->dims()[1];
+  ewaddArgs.image0.scale_address = input_x->scale;
+  ewaddArgs.image0.height = (uint32_t)input_x->dims()[2];
+  ewaddArgs.image0.width = (uint32_t)input_x->dims()[3];
+  ewaddArgs.image0.pad_height = 0;
+  ewaddArgs.image0.pad_width = 0;
+  ewaddArgs.image1.address = input_y_ptr;
+  ewaddArgs.image1.channels = (uint32_t)input_y->dims()[1];
+  ewaddArgs.image1.scale_address = input_y->scale;
+  ewaddArgs.image1.height = (uint32_t)input_y->dims()[2];
+  ewaddArgs.image1.width = (uint32_t)input_y->dims()[3];
+  ewaddArgs.image1.pad_height = 0;
+  ewaddArgs.image1.pad_width = 0;
+  ewaddArgs.output.scale_address = out->scale;
+  ewaddArgs.output.address = out_ptr;
+  param->SetFpgaArgs(ewaddArgs);
  return true;
 }
+
 template <>
-void FusionFcReluKernel<FPGA, float>::Compute(
-    const FusionFcReluParam<FPGA> &param) {
-  fpga::ComputeFpgaConv(param.FpgaArgs());
+void ElementwiseAddKernel<FPGA, float>::Compute(
+    const ElementwiseAddParam<FPGA> &param) {
+  fpga::ComputeFpgaEWAdd(param.FpgaArgs());
 }
-
 }  // namespace operators
 }  // namespace paddle_mobile
+
 #endif
--- a/src/operators/kernel/fpga/V1/split_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/split_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef SPLIT_OP
+
+#include "operators/kernel/split_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+template <>
+bool SplitKernel<FPGA, float>::Init(SplitParam<FPGA>* param) {
+  return true;
+}
+template <>
+void SplitKernel<FPGA, float>::Compute(const SplitParam<FPGA>& param) {}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp
+++ b/src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp
@@ -12,21 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#ifdef CONV_TRANSPOSE_OP
+#ifdef TANH_OP

-#include "operators/kernel/conv_transpose_kernel.h"
+#include "operators/kernel/tanh_kernel.h"

 namespace paddle_mobile {
 namespace operators {

 template <>
-bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
+bool TanhKernel<FPGA, float>::Init(TanhParam<FPGA> *param) {
  return true;
 }

 template <>
-void ConvTransposeKernel<FPGA, float>::Compute(
-    const ConvTransposeParam<FPGA> &param) {}
+void TanhKernel<FPGA, float>::Compute(const TanhParam<FPGA> &param) {}

 }  // namespace operators
 }  // namespace paddle_mobile

--- a/src/operators/kernel/fpga/V1/transpose2_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/transpose2_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef TRANSPOSE2_OP
+
+#include "operators/kernel/transpose2_kernel.h"
+#include "operators/kernel/central-arm-func/transpose2_arm_func.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool Transpose2Kernel<FPGA, float>::Init(Transpose2Param<FPGA> *param) {
+  return true;
+}
+
+template <>
+void Transpose2Kernel<FPGA, float>::Compute(
+    const Transpose2Param<FPGA> &param) {
+  // Transpose2Compute<float>(param);
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -2211,7 +2211,6 @@ class DropoutParam : public OpParam {
 };
 #endif

-#ifdef CONV_TRANSPOSE_OP
 template <typename Dtype>
 class ConvTransposeParam : public OpParam {
  typedef typename DtypeTensorTrait<Dtype>::gtype GType;
@@ -2266,7 +2265,7 @@ class ConvTransposeParam : public OpParam {
  void SetFpgaArgs(const fpga::DeconvArgs &args) { fpga_conv_args = args; }
 #endif
 };
-#endif
+
 #ifdef FUSION_DECONVADD_OP
 template <typename Dtype>
 class FusionDeconvAddParam : public ConvTransposeParam<Dtype> {

--- a/tools/op.cmake
+++ b/tools/op.cmake
@@ -109,16 +109,19 @@ list(FIND NET "FPGA_NET_V1" CON)
 if (CON GREATER -1)
  message("FPGA_NET_V1 enabled")
  set(FUSION_CONVADDRELU_OP ON)
-  set(FUSION_CONVADDBNRELU_OP ON)
-  set(FUSION_CONVADDBN_OP ON)
  set(FUSION_ELEMENTWISEADDRELU_OP ON)
  set(FUSION_FC_OP ON)
-  set(FUSION_FCRELU_OP ON)
  set(POOL_OP ON)
-  set(CONCAT_OP ON)
  set(SOFTMAX_OP ON)
  set(FUSION_CONVBNRELU_OP ON)
  set(FUSION_CONVBN_OP ON)
+  set(TANH_OP ON)
+  set(ELEMENTWISEADD_OP ON)
+  set(TRANSPOSE2_OP ON)
+  set(FUSION_CONVADD_OP ON)
+  set(SPLIT_OP ON)
+  set(FUSION_DECONVADD_OP ON)
+  set(FUSION_DECONVADDRELU_OP ON)
  set(FOUND_MATCH ON)
 endif()

@@ -132,7 +135,6 @@ if (CON GREATER -1)
  set(SOFTMAX_OP ON)
  set(FUSION_CONVBNRELU_OP ON)
  set(FUSION_CONVBN_OP ON)
-  set(CONV_TRANSPOSE_OP ON)
  set(TANH_OP ON)
  set(ELEMENTWISEADD_OP ON)
  set(TRANSPOSE2_OP ON)
@@ -140,8 +142,6 @@ if (CON GREATER -1)
  set(SPLIT_OP ON)
  set(FUSION_DECONVADD_OP ON)
  set(FUSION_DECONVADDRELU_OP ON)
-
-
  set(FOUND_MATCH ON)
 endif()