Merge pull request #476 from smilejames/develop

modify files and code structure about central-arm-func

Merge pull request #476 from smilejames/develop
modify files and code structure about central-arm-func
641d9510 · Ruilong Liu · GitHub · c71c2f88 · 1cd5ce84 · 641d9510
9 changed file
--- a/src/operators/kernel/arm/batchnorm_kernel.cpp
+++ b/src/operators/kernel/arm/batchnorm_kernel.cpp
@@ -17,7 +17,7 @@ limitations under the License. */
 #pragma once

 #include "operators/kernel/batchnorm_kernel.h"
-#include "operators/kernel/central-arm-func/batchnorm_func.h"
+#include "operators/kernel/central-arm-func/batchnorm_arm_func.h"

 namespace paddle_mobile {
 namespace operators {

--- a/src/operators/kernel/arm/conv_add_relu_kernel.cpp
+++ b/src/operators/kernel/arm/conv_add_relu_kernel.cpp
@@ -15,7 +15,7 @@ limitations under the License. */
 #ifdef FUSION_CONVADD_RELU_OP

 #include "operators/kernel/conv_add_relu_kernel.h"
-#include "operators/kernel/central-arm-func/conv_add_relu_func.h"
+#include "operators/kernel/central-arm-func/conv_add_relu_arm_func.h"

 namespace paddle_mobile {
 namespace operators {

--- a/src/operators/kernel/arm/conv_kernel.cpp
+++ b/src/operators/kernel/arm/conv_kernel.cpp
@@ -15,7 +15,7 @@ limitations under the License. */
 #ifdef CONV_OP

 #include "operators/kernel/conv_kernel.h"
-#include "operators/kernel/central-arm-func/conv_func.h"
+#include "operators/kernel/central-arm-func/conv_arm_func.h"

 namespace paddle_mobile {
 namespace operators {

--- a/src/operators/kernel/arm/depthwise_conv_kernel.cpp
+++ b/src/operators/kernel/arm/depthwise_conv_kernel.cpp
@@ -15,7 +15,7 @@ limitations under the License. */
 #ifdef DEPTHWISECONV_OP

 #include "operators/kernel/depthwise_conv_kernel.h"
-#include "operators/kernel/conv_kernel.h"
+#include "operators/kernel/central-arm-func/depthwise_conv_arm_func.h"

 namespace paddle_mobile {
 namespace operators {
@@ -27,91 +27,7 @@ bool DepthwiseConvKernel<CPU, float>::Init(const ConvParam &para) const {

 template <>
 void DepthwiseConvKernel<CPU, float>::Compute(const ConvParam &param) const {
-  LOG(kLOG_DEBUG) << param;
-
-  const Tensor *input = param.Input();
-  Tensor filter = *param.Filter();
-  Tensor *output = param.Output();
-  output->mutable_data<float>();
-  int groups = param.Groups();
-  std::vector<int> strides = param.Strides();
-  std::vector<int> paddings = param.Paddings();
-  std::vector<int> dilations = param.Dilations();
-
-  //  DLOG << " compute end get Attrs " << strides[0];
-
-  const int batch_size = static_cast<int>(input->dims()[0]);
-
-  std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
-  std::vector<int64_t> output_shape_vec(framework::vectorize(output->dims()));
-  size_t data_dim = filter_shape_vec.size() - 2;
-  std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
-  col_shape_vec[0] = input->dims()[1] / groups;
-  for (size_t j = 0; j < data_dim; ++j) {
-    col_shape_vec[j + 1] = filter_shape_vec[j + 2];
-    col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
-  }
-  framework::DDim col_shape(framework::make_ddim(col_shape_vec));
-
-  framework::DDim col_matrix_shape =
-      framework::flatten_to_2d(col_shape, data_dim + 1);
-
-  bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations);
-  Tensor col;
-  Tensor col_matrix;
-  if (is_expand) {
-    col.mutable_data<float>(col_shape);
-    col_matrix.ShareDataWith(col);
-    col_matrix.Resize(col_matrix_shape);
-  }
-
-  framework::DDim input_shape = framework::slice_ddim(
-      input->dims(), 1, static_cast<int>(input->dims().size()));
-
-  framework::DDim filter_matrix_shape = {filter.dims()[0],
-                                         filter.numel() / filter.dims()[0]};
-  filter.Resize(filter_matrix_shape);
-  framework::DDim output_matrix_shape = {
-      output->dims()[1],
-      output->numel() / (output->dims()[0] * output->dims()[1])};
-
-  // convolution operator: im2col(or vol2col) + gemm
-  int in_step = static_cast<int>(input->dims()[1]) / groups;
-  int out_step = static_cast<int>(output->dims()[1]) / groups;
-
-  math::Vol2ColFunctor<CPU, float> vol2col;
-  math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
-
-  for (int i = 0; i < batch_size; i++) {
-    Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
-    Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
-
-    for (int g = 0; g < groups; g++) {
-      Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
-
-      if (!is_expand) {
-        col.ShareDataWith(in_slice);
-        col_matrix.ShareDataWith(col);
-        col_matrix.Resize(col_matrix_shape);
-      } else if (data_dim == 2U) {
-        // im2col
-        im2col(in_slice, dilations, strides,
-               std::vector<int>{paddings[0], paddings[1], paddings[0],
-                                paddings[1]},
-               &col);
-      } else if (data_dim == 3U) {
-        // vol2col
-        vol2col(in_slice, dilations, strides, paddings, &col);
-      }
-
-      // gemm
-      Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
-      Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
-      math::matmul<float>(filter_slice, false, col_matrix, false,
-                          static_cast<float>(1), &out_slice,
-                          static_cast<float>(0));
-    }
-  }
+  DepthwiseConvCompute<float>(param);
 }

 template class DepthwiseConvKernel<CPU, float>;

--- a/src/operators/kernel/central-arm-func/batchnorm_func.h
+++ b/src/operators/kernel/central-arm-func/batchnorm_func.h
--- a/src/operators/kernel/central-arm-func/conv_add_relu_func.h
+++ b/src/operators/kernel/central-arm-func/conv_add_relu_func.h
@@ -15,6 +15,7 @@ limitations under the License. */
 #ifdef FUSION_CONVADD_RELU_OP

 #pragma once
+#include <vector>
 #include "operators/op_param.h"

 namespace paddle_mobile {

--- a/src/operators/kernel/central-arm-func/conv_func.h
+++ b/src/operators/kernel/central-arm-func/conv_func.h
@@ -15,6 +15,8 @@ limitations under the License. */
 #ifdef CONV_OP

 #pragma once
+#include <vector>
+#include "operators/math/conv_func.h"
 #include "operators/op_param.h"

 namespace paddle_mobile {
@@ -48,7 +50,8 @@ void ConvCompute(const ConvParam &param) {
  framework::DDim col_matrix_shape =
      framework::flatten_to_2d(col_shape, data_dim + 1);

-  bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations);
+  bool is_expand =
+      math::IsExpand(filter_shape_vec, strides, paddings, dilations);
  Tensor col;
  Tensor col_matrix;
  if (is_expand) {

--- a/src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h
+++ b/src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef DEPTHWISECONV_OP
+
+#pragma once
+#include <vector>
+#include "operators/math/conv_func.h"
+#include "operators/op_param.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <typename P>
+void DepthwiseConvCompute(const ConvParam &param) {
+  const Tensor *input = param.Input();
+  Tensor filter = *param.Filter();
+  Tensor *output = param.Output();
+  output->mutable_data<float>();
+  int groups = param.Groups();
+  std::vector<int> strides = param.Strides();
+  std::vector<int> paddings = param.Paddings();
+  std::vector<int> dilations = param.Dilations();
+
+  //  DLOG << " compute end get Attrs " << strides[0];
+
+  const int batch_size = static_cast<int>(input->dims()[0]);
+
+  std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
+  std::vector<int64_t> output_shape_vec(framework::vectorize(output->dims()));
+  size_t data_dim = filter_shape_vec.size() - 2;
+  std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
+  col_shape_vec[0] = input->dims()[1] / groups;
+  for (size_t j = 0; j < data_dim; ++j) {
+    col_shape_vec[j + 1] = filter_shape_vec[j + 2];
+    col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
+  }
+  framework::DDim col_shape(framework::make_ddim(col_shape_vec));
+
+  framework::DDim col_matrix_shape =
+      framework::flatten_to_2d(col_shape, data_dim + 1);
+
+  bool is_expand =
+      math::IsExpand(filter_shape_vec, strides, paddings, dilations);
+  Tensor col;
+  Tensor col_matrix;
+  if (is_expand) {
+    col.mutable_data<float>(col_shape);
+    col_matrix.ShareDataWith(col);
+    col_matrix.Resize(col_matrix_shape);
+  }
+
+  framework::DDim input_shape = framework::slice_ddim(
+      input->dims(), 1, static_cast<int>(input->dims().size()));
+
+  framework::DDim filter_matrix_shape = {filter.dims()[0],
+                                         filter.numel() / filter.dims()[0]};
+  filter.Resize(filter_matrix_shape);
+  framework::DDim output_matrix_shape = {
+      output->dims()[1],
+      output->numel() / (output->dims()[0] * output->dims()[1])};
+
+  // convolution operator: im2col(or vol2col) + gemm
+  int in_step = static_cast<int>(input->dims()[1]) / groups;
+  int out_step = static_cast<int>(output->dims()[1]) / groups;
+
+  math::Vol2ColFunctor<CPU, float> vol2col;
+  math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
+
+  for (int i = 0; i < batch_size; i++) {
+    Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
+    Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
+
+    for (int g = 0; g < groups; g++) {
+      Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
+
+      if (!is_expand) {
+        col.ShareDataWith(in_slice);
+        col_matrix.ShareDataWith(col);
+        col_matrix.Resize(col_matrix_shape);
+      } else if (data_dim == 2U) {
+        // im2col
+        im2col(in_slice, dilations, strides,
+               std::vector<int>{paddings[0], paddings[1], paddings[0],
+                                paddings[1]},
+               &col);
+      } else if (data_dim == 3U) {
+        // vol2col
+        vol2col(in_slice, dilations, strides, paddings, &col);
+      }
+
+      // gemm
+      Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
+      Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
+      math::matmul<float>(filter_slice, false, col_matrix, false,
+                          static_cast<float>(1), &out_slice,
+                          static_cast<float>(0));
+    }
+  }
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
--- a/src/operators/kernel/conv_kernel.h
+++ b/src/operators/kernel/conv_kernel.h
@@ -35,21 +35,6 @@ class ConvKernel : public OpKernelBase<DeviceType, ConvParam> {
  bool Init(const ConvParam &para) const;
 };

-inline bool IsExpand(const std::vector<int64_t> &filter_dim,
-                     const std::vector<int> &strides,
-                     const std::vector<int> &paddings,
-                     const std::vector<int> &dilations) {
-  bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true;
-  for (size_t j = 0; j < strides.size(); ++j) {
-    filter_1 = filter_1 && (static_cast<int>(filter_dim[j + 2]) == 1);
-    strides_1 = strides_1 && (strides[j] == 1);
-    padding_0 = padding_0 && (paddings[j] == 0);
-    dilation_1 = dilation_1 && (dilations[j] == 1);
-  }
-
-  return !(filter_1 && strides_1 && padding_0 && dilation_1);
-}
-
 }  // namespace operators
 }  // namespace paddle_mobile