Merge pull request #1195 from Eclipsess/develop

fix #1194 add test-eng(not all) and fix some op

Merge pull request #1195 from Eclipsess/develop
fix #1194 add test-eng(not all) and fix some op
a8f851af · Ray Liu · GitHub · 9e72b6e7 · ddbcabed · a8f851af
5 changed file
--- a/src/operators/kernel/arm/im2sequence_kernel.cpp
+++ b/src/operators/kernel/arm/im2sequence_kernel.cpp
@@ -35,7 +35,7 @@ template <>
 void Im2SequenceKernel<CPU, float>::Compute(
    const Im2SequenceParam<CPU> &param) const {
  const Tensor *in_x = param.Input();
-  Tensor *out = param.Output();
+  framework::LoDTensor *out = param.Output();
  out->mutable_data<float>();

  std::vector<int> kernels = param.Kernels();
@@ -52,22 +52,31 @@ void Im2SequenceKernel<CPU, float>::Compute(
                                       paddings[2], strides[0]);
  int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1],
                                      paddings[3], strides[1]);
-  const std::vector<int> dilations({1, 1});

+  out->mutable_data<float>({batch_size * output_height * output_width,
+                            img_channels * kernels[0] * kernels[1]});
+  const std::vector<int> dilations({1, 1});
  // TODO: verify
  auto out_dims = out->dims();
  out->Resize({batch_size, out->numel() / batch_size});
-
  for (int i = 0; i < batch_size; i++) {
    const Tensor src =
        in_x->Slice(i, i + 1).Resize({img_channels, img_height, img_width});
    Tensor dst = out->Slice(i, i + 1).Resize(
        {output_height, output_width, img_channels, kernels[0], kernels[1]});
-
    math::Im2ColFunctor<math::ColFormat::kOCF, CPU, float> f;
    f(src, dilations, strides, paddings, &dst);
  }
  out->Resize(out_dims);
+  framework::LoD lod(1);
+  lod[0].reserve(batch_size + 1);
+  int offset = 0;
+  lod[0].push_back(offset);
+  for (int i = 0; i < batch_size; ++i) {
+    offset += output_height * output_width;
+    lod[0].push_back(offset);
+  }
+  out->set_lod(lod);
 }

 template class Im2SequenceKernel<CPU, float>;

--- a/src/operators/kernel/central-arm-func/pool_arm_func.h
+++ b/src/operators/kernel/central-arm-func/pool_arm_func.h
@@ -76,7 +76,7 @@ void PoolCompute(const PoolParam<CPU> &param) {
      }
    }

-  } else if (ksize[0] == 2 && ksize[0] == ksize[1] && strides[0] == 2 &&
+  } else if (0 && ksize[0] == 2 && ksize[0] == ksize[1] && strides[0] == 2 &&
             strides[0] == strides[1] && paddings[0] == paddings[1] &&
             paddings[1] == 0) {
 #if __ARM_NEON

--- a/src/operators/math/im2col.cpp
+++ b/src/operators/math/im2col.cpp
@@ -53,7 +53,7 @@ void Im2ColFunctor<ColFormat::kCFO, CPU, float>::operator()(
       (((isize - 2 * padding[0] + filter_height) % stride[0] == 0) ? 1 : 0));
  int fill = isize % 2;
  if (stride[0] == 1 && filter_height == 3 && pad1 && pad2 &&
-      dilation[0] == 1 && im_height > 2) {
+      dilation[0] == 1 && im_height > 2 && im_height == im_width) {
    for (int c = 0; c < im_channels; ++c) {
      int oosize = osize * osize;
      int nk4 = osize / 4;
@@ -225,7 +225,7 @@ void Im2ColFunctor<ColFormat::kCFO, CPU, float>::operator()(
      im_data += isize * isize;
    }
  } else if (stride[0] == 2 && filter_height == 3 && pad1 && dilation[0] == 1 &&
-             im_height > 2) {
+             im_height > 2 && im_height == im_width) {
    for (int c = 0; c < im_channels; ++c) {
      int oosize = osize * osize;
      int nk4 = osize / 4;
@@ -605,7 +605,6 @@ class Im2ColFunctor<ColFormat::kOCF, CPU, T> {

    const T *im_data = im.data<T>();
    T *col_data = col->data<T>();
-
    for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) {
      for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) {
        for (int channel = 0; channel < im_channels; ++channel) {
@@ -617,7 +616,6 @@ class Im2ColFunctor<ColFormat::kOCF, CPU, T> {
                 ++filter_col_idx) {
              int im_col_offset =
                  col_col_idx * stride[1] + filter_col_idx - padding[1];
-
              int col_offset =
                  ((((col_row_idx)*col_width + col_col_idx) * im_channels +
                    channel) *
@@ -625,7 +623,6 @@ class Im2ColFunctor<ColFormat::kOCF, CPU, T> {
                   filter_row_idx) *
                      filter_width +
                  filter_col_idx;
-
              int im_offset = (channel * im_height + im_row_offset) * im_width +
                              im_col_offset;
              col_data[col_offset] =

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -347,6 +347,10 @@ if (NOT FOUND_MATCH)
    ADD_EXECUTABLE(test-multi-process net/test_multi_inference_predict.cpp test_helper.h test_include.h)
    target_link_libraries(test-multi-process paddle-mobile)

+    # gen test
+    ADD_EXECUTABLE(test-eng net/test_eng.cpp test_helper.h test_include.h)
+    target_link_libraries(test-eng paddle-mobile)
+

    #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp)
 endif ()
--- a/test/net/test_eng.cpp
+++ b/test/net/test_eng.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main() {
+#ifdef PADDLE_MOBILE_CPU
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+#endif
+  //    paddle_mobile.SetThreadNum(4);
+  auto time1 = time();
+  if (paddle_mobile.Load(std::string(g_eng) + "/model",
+                         std::string(g_eng) + "/params", false, false, 1,
+                         true)) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+    std::vector<int64_t> dims{1, 1, 48, 512};
+    LoDTensor input_tensor;
+    SetupTensor<float>(&input_tensor, {1, 1, 48, 512}, static_cast<float>(0),
+                       static_cast<float>(1));
+
+    std::vector<float> input(input_tensor.data<float>(),
+                             input_tensor.data<float>() + input_tensor.numel());
+    //   预热十次
+    for (int i = 0; i < 1; ++i) {
+      paddle_mobile.PredictLod(input_tensor);
+    }
+    auto time3 = time();
+    for (int i = 0; i < 1; ++i) {
+      paddle_mobile.PredictLod(input_tensor);
+    }
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
+              << std::endl;
+  }
+  return 0;
+}