Merge pull request #400 from cocodark/develop

add conv_add op & param

Merge pull request #400 from cocodark/develop
add conv_add op & param
422bddc4 · WangLiu · GitHub · 09df644f · db9c183d · 422bddc4
8 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,6 +36,7 @@ else()
 endif()

 if (USE_EXCEPTION)
+    message(STATUS "use exception")
    add_definitions(-DENABLE_EXCEPTION)
    add_definitions(-fexceptions)
 else()
@@ -86,6 +87,7 @@ if (googlenet)
    add_definitions(-DFUSION_FC_OP)
    add_definitions(-DPOOL_OP)
    add_definitions(-DRELU_OP)
+    add_definitions(-DFUSION_CONVADD_OP)
 elseif (mobilenet)
    add_definitions(-DCONV_OP)
    add_definitions(-DELEMENTWISEADD_OP)

--- a/src/common/types.h
+++ b/src/common/types.h
@@ -99,6 +99,7 @@ static std::unordered_map<
    std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
    op_input_output_key = {
        {G_OP_TYPE_CONV, {{"Input"}, {"Output"}}},
+        {G_OP_TYPE_CONV_ADD, {{"Input"}, {"Out"}}},
        {G_OP_TYPE_RELU, {{"X"}, {"Out"}}},
        {G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}},
        {G_OP_TYPE_MUL, {{"X"}, {"Out"}}},

--- a/src/operators/fusion_conv_add.cpp
+++ b/src/operators/fusion_conv_add.cpp
@@ -12,14 +12,37 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#ifdef FUSIONCONVADD_OP
+#ifdef FUSION_CONVADD_OP

 #include "operators/fusion_conv_add.h"
+
 namespace paddle_mobile {
 namespace operators {

 template <typename Dtype, typename T>
-void FushionConvAddOp<Dtype, T>::InferShape() const {}
+void FushionConvAddOp<Dtype, T>::InferShape() const {
+  auto in_dims = param_.Input()->dims();
+  auto filter_dims = param_.Filter()->dims();
+  const std::vector<int> &strides = param_.Strides();
+  std::vector<int> paddings = param_.Paddings();
+  int groups = param_.Groups();
+  std::vector<int> dilations = param_.Dilations();
+
+  PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
+                         dilations.size() == paddings.size() &&
+                         paddings.size() == strides.size()),
+                        "ConvParam is not suitable");
+
+  std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
+  for (size_t i = 0; i < strides.size(); ++i) {
+    output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
+                                          dilations[i], paddings[i],
+                                          strides[i]));
+  }
+
+  framework::DDim ddim = framework::make_ddim(output_shape);
+  param_.Output()->Resize(ddim);
+}
 template class FushionConvAddOp<CPU, float>;
 }  // namespace operators
 }  // namespace paddle_mobile

--- a/src/operators/fusion_conv_add.h
+++ b/src/operators/fusion_conv_add.h
@@ -12,15 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#ifdef FUSIONCONVADD_OP
+#ifdef FUSION_CONVADD_OP

 #pragma once

 #include <string>
 #include <vector>
-
 #include "framework/operator.h"
 #include "framework/program/program-optimize/fusion_op_register.h"
+#include "op_param.h"
+#include "operators/kernel/conv_add_kernel.h"

 namespace paddle_mobile {
 namespace operators {
@@ -53,19 +54,32 @@ class FushionConvAddOp : public framework::OperatorWithKernel<DeviceType> {
                   const framework::AttributeMap &attrs,
                   std::shared_ptr<framework::Scope> scope)
      : framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
-                                                  scope) {}
+                                                  scope),
+        param_(inputs, outputs, attrs, *scope) {}

-  void RunImpl() const {}
+  void RunImpl() const {
+    operators::ConvAddKernel<DeviceType, T> kernel;
+    kernel.Compute(param_);
+    this->ClearVariables({"Filter", "Input", "Y"});
+  }

  using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
  void InferShape() const override;

 protected:
-  //  FushionFcParam param_;
+  FushionConvAddParam param_;
 };

+inline int ConvOutputSize(int input_size, int filter_size, int dilation,
+                          int padding, int stride) {
+  const int dkernel = dilation * (filter_size - 1) + 1;
+  int output_size = (input_size + 2 * padding - dkernel) / stride + 1;
+  return output_size;
+}
+
 #ifdef PADDLE_MOBILE_CPU
-static framework::FusionOpRegistrar fc_registrar(new FusionConvAddMatcher());
+static framework::FusionOpRegistrar convadd_registrar(
+    new FusionConvAddMatcher());
 #endif
 #ifdef PADDLE_MOBILE_MALI_GPU
 #endif

--- a/src/operators/kernel/arm/conv_add_kernel.cpp
+++ b/src/operators/kernel/arm/conv_add_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef FUSION_CONVADD_OP
+
+#include "operators/kernel/conv_add_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+void ConvAddKernel<CPU, float>::Compute(
+    const FushionConvAddParam &param) const {
+  DLOG << param;
+
+  const Tensor *input = param.Input();
+  Tensor filter = *param.Filter();
+  Tensor *output = param.Output();
+  output->mutable_data<float>();
+  int groups = param.Groups();
+  std::vector<int> strides = param.Strides();
+  std::vector<int> paddings = param.Paddings();
+  std::vector<int> dilations = param.Dilations();
+
+  //  DLOG << " compute end get Attrs " << strides[0];
+
+  const int batch_size = static_cast<int>(input->dims()[0]);
+
+  std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
+
+  std::vector<int64_t> output_shape_vec(framework::vectorize(output->dims()));
+  size_t data_dim = filter_shape_vec.size() - 2;
+  std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
+  col_shape_vec[0] = input->dims()[1] / groups;
+  for (size_t j = 0; j < data_dim; ++j) {
+    col_shape_vec[j + 1] = filter_shape_vec[j + 2];
+    col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
+  }
+  framework::DDim col_shape(framework::make_ddim(col_shape_vec));
+
+  framework::DDim col_matrix_shape =
+      framework::flatten_to_2d(col_shape, data_dim + 1);
+
+  bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations);
+  Tensor col;
+  Tensor col_matrix;
+  if (is_expand) {
+    col.mutable_data<float>(col_shape);
+    col_matrix.ShareDataWith(col);
+    col_matrix.Resize(col_matrix_shape);
+  }
+
+  framework::DDim input_shape = framework::slice_ddim(
+      input->dims(), 1, static_cast<int>(input->dims().size()));
+
+  framework::DDim filter_matrix_shape = {filter.dims()[0],
+                                         filter.numel() / filter.dims()[0]};
+  filter.Resize(filter_matrix_shape);
+  DLOG << " filter.dims() = " << filter.dims();
+  framework::DDim output_matrix_shape = {
+      output->dims()[1],
+      output->numel() / (output->dims()[0] * output->dims()[1])};
+
+  // convolution operator: im2col(or vol2col) + gemm
+  int in_step = static_cast<int>(input->dims()[1]) / groups;
+  int out_step = static_cast<int>(output->dims()[1]) / groups;
+
+  math::Vol2ColFunctor<CPU, float> vol2col;
+  math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
+
+  for (int i = 0; i < batch_size; i++) {
+    Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
+    Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
+
+    for (int g = 0; g < groups; g++) {
+      Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
+
+      if (!is_expand) {
+        col.ShareDataWith(in_slice);
+        col_matrix.ShareDataWith(col);
+        col_matrix.Resize(col_matrix_shape);
+      } else if (data_dim == 2U) {
+        // im2col
+        im2col(in_slice, dilations, strides,
+               std::vector<int>{paddings[0], paddings[1], paddings[0],
+                                paddings[1]},
+               &col);
+      } else if (data_dim == 3U) {
+        // vol2col
+        vol2col(in_slice, dilations, strides, paddings, &col);
+      }
+
+      // gemm
+      Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
+      Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
+      math::matmul<float>(filter_slice, false, col_matrix, false,
+                          static_cast<float>(1), &out_slice,
+                          static_cast<float>(0));
+    }
+  }
+}
+template class ConvAddKernel<CPU, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
--- a/src/operators/kernel/conv_add_kernel.h
+++ b/src/operators/kernel/conv_add_kernel.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVADD_OP
+
+#pragma once
+
+#include <vector>
+#include "framework/operator.h"
+#include "operators/math/im2col.h"
+#include "operators/math/math_function.h"
+#include "operators/math/vol2col.h"
+#include "operators/op_param.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+using framework::OpKernelBase;
+
+template <typename DeviceType, typename T>
+class ConvAddKernel : public OpKernelBase<DeviceType, FushionConvAddParam> {
+ public:
+  void Compute(const FushionConvAddParam &param) const;
+};
+
+inline bool IsExpand(const std::vector<int64_t> &filter_dim,
+                     const std::vector<int> &strides,
+                     const std::vector<int> &paddings,
+                     const std::vector<int> &dilations) {
+  bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true;
+  for (size_t j = 0; j < strides.size(); ++j) {
+    filter_1 = filter_1 && (static_cast<int>(filter_dim[j + 2]) == 1);
+    strides_1 = strides_1 && (strides[j] == 1);
+    padding_0 = padding_0 && (paddings[j] == 0);
+    dilation_1 = dilation_1 && (dilations[j] == 1);
+  }
+
+  return !(filter_1 && strides_1 && padding_0 && dilation_1);
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
--- a/src/operators/op_param.cpp
+++ b/src/operators/op_param.cpp
@@ -39,5 +39,31 @@ Print &operator<<(Print &printer, const ConvParam &conv_param) {
 }
 #endif

+#ifdef FUSION_CONVADD_OP
+
+Print &operator<<(Print &printer, const FushionConvAddParam &conv_param) {
+  printer << "parameter of conv_add: "
+          << "\n";
+  printer << "  stride: "
+          << " (" << conv_param.Strides()[0] << conv_param.Strides()[1] << ") "
+          << "\n";
+  printer << "  paddings: "
+          << " (" << conv_param.Paddings()[0] << conv_param.Paddings()[1]
+          << ") "
+          << "\n";
+  printer << "  dilations: "
+          << " (" << conv_param.Dilations()[0] << conv_param.Dilations()[1]
+          << ") "
+          << "\n";
+  printer << "  groups: " << conv_param.Groups() << "\n";
+  printer << "  input  dims: " << conv_param.Input()->dims() << "\n";
+  printer << "  filter dims: " << conv_param.Filter()->dims() << "\n";
+  printer << "  bias dims: " << conv_param.Bias()->dims() << "\n";
+  printer << "  output dims: " << conv_param.Output()->dims();
+  return printer;
+}
+
+#endif
+
 }  // namespace operators
 }  // namespace paddle_mobile
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -165,6 +165,8 @@ class OpParam {
  template <typename T>
  static T *GetVarValue(const string &key, const VariableNameMap &var_map,
                        const Scope &scope) {
+    PADDLE_MOBILE_ENFORCE(var_map.count(key) > 0,
+                          "%s is not contained in var_map", key.c_str())
    auto var_vec = var_map.at(key);
    if (!var_vec.empty()) {
      auto var = scope.FindVar(var_vec[0]);
@@ -787,5 +789,54 @@ class FushionFcParam : public OpParam {
 };
 #endif

+#ifdef FUSION_CONVADD_OP
+class FushionConvAddParam : public OpParam {
+ public:
+  FushionConvAddParam(const VariableNameMap &inputs,
+                      const VariableNameMap &outputs, const AttributeMap &attrs,
+                      const Scope &scope) {
+    bias_ = InputYFrom<LoDTensor>(inputs, scope);
+    axis_ = GetAttr<int>("axis", attrs);
+    filter_ = FilterFrom<LoDTensor>(inputs, scope);
+    input_ = InputFrom<LoDTensor>(inputs, scope);
+    output_ = OutFrom<LoDTensor>(outputs, scope);
+    strides_ = GetAttr<vector<int>>("strides", attrs);
+    paddings_ = GetAttr<vector<int>>("paddings", attrs);
+    dilations_ = GetAttr<vector<int>>("dilations", attrs);
+    groups = GetAttr<int>("groups", attrs);
+  }
+  Tensor *Bias() const { return bias_; }
+
+  const int &Axis() const { return axis_; }
+
+  const Tensor *Input() const { return input_; }
+
+  const Tensor *Filter() const { return filter_; }
+
+  Tensor *Output() const { return output_; }
+
+  const vector<int> &Strides() const { return strides_; }
+
+  const vector<int> &Paddings() const { return paddings_; }
+
+  const vector<int> &Dilations() const { return dilations_; }
+
+  const int &Groups() const { return groups; }
+
+ private:
+  Tensor *bias_;
+  int axis_;
+  Tensor *input_;
+  Tensor *output_;
+  Tensor *filter_;
+  vector<int> strides_;
+  vector<int> paddings_;
+  vector<int> dilations_;
+  int groups;
+};
+
+Print &operator<<(Print &printer, const FushionConvAddParam &conv_param);
+#endif
+
 }  // namespace operators
 }  // namespace paddle_mobile