fix result error

075a6c0c · eclipsess · 6b22a7b9 · 075a6c0c · 075a6c0c · 075a6c0c
4 changed file
--- a/src/operators/kernel/central-arm-func/conv_arm_func.h
+++ b/src/operators/kernel/central-arm-func/conv_arm_func.h
@@ -15,19 +15,17 @@ limitations under the License. */
 #ifdef CONV_OP

 #pragma once
+#include <operators/math/depthwise_conv_3x3.h>
 #include <vector>
-#include "operators/math/conv_func.h"
 #include "operators/op_param.h"

 namespace paddle_mobile {
 namespace operators {

-template <typename P>
-void ConvCompute(const ConvParam &param) {
+inline void ConvBasic(const ConvParam &param) {
  const Tensor *input = param.Input();
  Tensor filter = *param.Filter();
  Tensor *output = param.Output();
-  output->mutable_data<float>();
  int groups = param.Groups();
  std::vector<int> strides = param.Strides();
  std::vector<int> paddings = param.Paddings();
@@ -98,17 +96,37 @@ void ConvCompute(const ConvParam &param) {
        // vol2col
        vol2col(in_slice, dilations, strides, paddings, &col);
      }
-
      // gemm
      Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
      Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
      math::matmul<float>(filter_slice, false, col_matrix, false,
                          static_cast<float>(1), &out_slice,
-                          static_cast<float>(0));
+                          static_cast<float>(1));
    }
  }
 }

+template <typename P>
+void ConvCompute(const ConvParam &param) {
+  Tensor Bias;
+  Bias.mutable_data<float>({param.Groups()});
+  if (param.Groups() == param.Input()->dims()[1] &&
+      param.Input()->dims()[1] == param.Output()->dims()[1] &&
+      param.Filter()->dims()[2] == param.Filter()->dims()[3] &&
+      param.Filter()->dims()[2] == 3 && param.Strides()[0] == 1) {
+    math::DepthwiseConv3x3s1p1(param.Input(), param.Filter(), param.Output(),
+                               &Bias, false);
+  } else if (param.Groups() == param.Input()->dims()[1] &&
+             param.Input()->dims()[1] == param.Output()->dims()[1] &&
+             param.Filter()->dims()[2] == param.Filter()->dims()[3] &&
+             param.Filter()->dims()[2] == 3 && param.Strides()[0] == 2) {
+    math::DepthwiseConv3x3(param.Input(), param.Strides(), param.Paddings(),
+                           param.Filter(), &Bias, param.Output(), false);
+  } else {
+    ConvBasic(param);
+  }
+}
+
 }  // namespace operators
 }  // namespace paddle_mobile


--- a/src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h
+++ b/src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h
@@ -15,8 +15,9 @@ limitations under the License. */
 #ifdef DEPTHWISECONV_OP

 #pragma once
+#include <operators/math/depthwise_conv_3x3.h>
 #include <vector>
-#include "operators/math/conv_func.h"
+#include "operators/kernel/central-arm-func/conv_arm_func.h"
 #include "operators/op_param.h"

 namespace paddle_mobile {
@@ -24,89 +25,21 @@ namespace operators {

 template <typename P>
 void DepthwiseConvCompute(const ConvParam &param) {
-  const Tensor *input = param.Input();
-  Tensor filter = *param.Filter();
-  Tensor *output = param.Output();
-  output->mutable_data<float>();
-  int groups = param.Groups();
-  std::vector<int> strides = param.Strides();
-  std::vector<int> paddings = param.Paddings();
-  std::vector<int> dilations = param.Dilations();
-
-  //  DLOG << " compute end get Attrs " << strides[0];
-
-  const int batch_size = static_cast<int>(input->dims()[0]);
-
-  std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
-  std::vector<int64_t> output_shape_vec(framework::vectorize(output->dims()));
-  size_t data_dim = filter_shape_vec.size() - 2;
-  std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
-  col_shape_vec[0] = input->dims()[1] / groups;
-  for (size_t j = 0; j < data_dim; ++j) {
-    col_shape_vec[j + 1] = filter_shape_vec[j + 2];
-    col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
-  }
-  framework::DDim col_shape(framework::make_ddim(col_shape_vec));
-
-  framework::DDim col_matrix_shape =
-      framework::flatten_to_2d(col_shape, data_dim + 1);
-
-  bool is_expand =
-      math::IsExpand(filter_shape_vec, strides, paddings, dilations);
-  Tensor col;
-  Tensor col_matrix;
-  if (is_expand) {
-    col.mutable_data<float>(col_shape);
-    col_matrix.ShareDataWith(col);
-    col_matrix.Resize(col_matrix_shape);
-  }
-
-  framework::DDim input_shape = framework::slice_ddim(
-      input->dims(), 1, static_cast<int>(input->dims().size()));
-
-  framework::DDim filter_matrix_shape = {filter.dims()[0],
-                                         filter.numel() / filter.dims()[0]};
-  filter.Resize(filter_matrix_shape);
-  framework::DDim output_matrix_shape = {
-      output->dims()[1],
-      output->numel() / (output->dims()[0] * output->dims()[1])};
-
-  // convolution operator: im2col(or vol2col) + gemm
-  int in_step = static_cast<int>(input->dims()[1]) / groups;
-  int out_step = static_cast<int>(output->dims()[1]) / groups;
-
-  math::Vol2ColFunctor<CPU, float> vol2col;
-  math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
-
-  for (int i = 0; i < batch_size; i++) {
-    Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
-    Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
-
-    for (int g = 0; g < groups; g++) {
-      Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
-
-      if (!is_expand) {
-        col.ShareDataWith(in_slice);
-        col_matrix.ShareDataWith(col);
-        col_matrix.Resize(col_matrix_shape);
-      } else if (data_dim == 2U) {
-        // im2col
-        im2col(in_slice, dilations, strides,
-               std::vector<int>{paddings[0], paddings[1], paddings[0],
-                                paddings[1]},
-               &col);
-      } else if (data_dim == 3U) {
-        // vol2col
-        vol2col(in_slice, dilations, strides, paddings, &col);
-      }
-
-      // gemm
-      Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
-      Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
-      math::matmul<float>(filter_slice, false, col_matrix, false,
-                          static_cast<float>(1), &out_slice,
-                          static_cast<float>(0));
-    }
+  Tensor Bias;
+  Bias.mutable_data<float>({param.Groups()});
+  if (param.Groups() == param.Input()->dims()[1] &&
+      param.Filter()->dims()[2] == param.Filter()->dims()[3] &&
+      param.Filter()->dims()[2] == 3 && param.Strides()[0] == 1) {
+    math::DepthwiseConv3x3s1p1(param.Input(), param.Filter(), param.Output(),
+                               &Bias, false);
+  } else if (param.Groups() == param.Input()->dims()[1] &&
+             param.Input()->dims()[1] == param.Output()->dims()[1] &&
+             param.Filter()->dims()[2] == param.Filter()->dims()[3] &&
+             param.Filter()->dims()[2] == 3 && param.Strides()[0] == 2) {
+    math::DepthwiseConv3x3(param.Input(), param.Strides(), param.Paddings(),
+                           param.Filter(), &Bias, param.Output(), false);
+  } else {
+    ConvBasic(param);
  }
 }


--- a/src/operators/math/depthwise_conv_3x3.cpp
+++ b/src/operators/math/depthwise_conv_3x3.cpp
@@ -275,33 +275,38 @@ void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter,
      float w22 = filter_data_tmp[8];

      output_data[0] = w11 * input_data[0] + w12 * input_data[1] +
-                       w21 * input_data[l] + w22 * input_data[l + 1] +
-                       bias_data[j];
+                       w21 * input_data[l] + w22 * input_data[l + 1];
      output_data[l - 1] = w10 * input_data[l - 2] + w11 * input_data[l - 1] +
                           w20 * input_data[2 * l - 2] +
-                           w21 * input_data[2 * l - 1] + bias_data[j];
+                           w21 * input_data[2 * l - 1];
      output_data[(l - 1) * l] =
          w01 * input_data[(l - 2) * l] + w02 * input_data[(l - 2) * l + 1] +
-          w11 * input_data[(l - 1) * l] + w12 * input_data[(l - 1) * l + 1] +
-          bias_data[j];
+          w11 * input_data[(l - 1) * l] + w12 * input_data[(l - 1) * l + 1];
      output_data[l * l - 1] = w00 * input_data[(l - 2) * (l + 1)] +
                               w01 * input_data[(l - 2) * (l + 1) + 1] +
                               w10 * input_data[l * l - 2] +
-                               w11 * input_data[l * l - 1] + bias_data[j];
-
+                               w11 * input_data[l * l - 1];
+      if (if_bias) {
+        output_data[0] += bias_data[j];
+        output_data[l - 1] += bias_data[j];
+        output_data[(l - 1) * l] += bias_data[j];
+        output_data[l * l - 1] += bias_data[j];
+      }
      for (int i = 1; i < l - 1; ++i) {
        output_data[i * l] =
            w01 * input_data[i * l - l] + w02 * input_data[i * l - l + 1] +
            w11 * input_data[i * l] + w12 * input_data[i * l + 1] +
-            w21 * input_data[i * l + l] + w22 * input_data[i * l + l + 1] +
-            bias_data[j];
+            w21 * input_data[i * l + l] + w22 * input_data[i * l + l + 1];
        output_data[i * l + l - 1] = w00 * input_data[i * l + l - 1 - l - 1] +
                                     w01 * input_data[i * l + l - 1 - l] +
                                     w10 * input_data[i * l + l - 1 - 1] +
                                     w11 * input_data[i * l + l - 1] +
                                     w20 * input_data[i * l + l - 1 + l - 1] +
-                                     w21 * input_data[i * l + l - 1 + l] +
-                                     bias_data[j];
+                                     w21 * input_data[i * l + l - 1 + l];
+        if (if_bias) {
+          output_data[i * l] += bias_data[j];
+          output_data[i * l + l - 1] += bias_data[j];
+        }
      }

      // top 1 row and bottom 1 row
@@ -502,6 +507,7 @@ void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter,
    }
  }
 }
+
 void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, Tensor filter,
                                   Tensor *output, Tensor *bias, bool if_bias,
                                   Tensor *new_scale, Tensor *new_bias,

--- a/tools/push2android.sh
+++ b/tools/push2android.sh
+#!/usr/bin/env sh
+
+push_fn () {
+sh build.sh android
+MODELS_PATH="../test/models/*"
+MODELS_SRC="../test/models"
+IMAGE_PATH="../test/images/*"
+EXE_FILE="../test/build/*"
+EXE_DIR="data/local/tmp/bin"
+adb shell mkdir ${EXE_DIR}
+MODELS_DIR="data/local/tmp/models"
+adb shell mkdir ${MODELS_DIR}
+for file in `ls ${MODELS_SRC}`
+do 
+    adb shell mkdir ${MODELS_DIR}"/"${file}
+done
+
+if [[ -d "../src/operators/kernel/mali/ACL_Android/build" ]]; then
+ACL_BUILD_PATH="../src/operators/kernel/mali/ACL_Android/build/*"
+adb push ${ACL_BUILD_PATH} ${EXE_DIR}
+fi
+
+IMAGES_DIR="data/local/tmp/images"
+adb shell mkdir ${IMAGES_DIR}
+LIB_PATH="../build/release/arm-v7a/build/*"
+adb push ${EXE_FILE} ${EXE_DIR}
+adb push ${LIB_PATH} ${EXE_DIR}
+if [[ $1 != "npm" ]]; then
+adb push ${IMAGE_PATH} ${IMAGES_DIR}
+adb push ${MODELS_PATH} ${MODELS_DIR}
+fi
+}
+
+if [[ $1 == "npm" ]]; then
+push_fn $1
+else
+push_fn
+fi