diff --git a/src/operators/kernel/arm/im2sequence_kernel.cpp b/src/operators/kernel/arm/im2sequence_kernel.cpp index 8295fd94a31db2ad1c10d32a8c639b067e422f45..cc6ae2ae8bc7cde9b365817ba9cafc19776da913 100644 --- a/src/operators/kernel/arm/im2sequence_kernel.cpp +++ b/src/operators/kernel/arm/im2sequence_kernel.cpp @@ -35,7 +35,7 @@ template <> void Im2SequenceKernel::Compute( const Im2SequenceParam ¶m) const { const Tensor *in_x = param.Input(); - Tensor *out = param.Output(); + framework::LoDTensor *out = param.Output(); out->mutable_data(); std::vector kernels = param.Kernels(); @@ -52,22 +52,31 @@ void Im2SequenceKernel::Compute( paddings[2], strides[0]); int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1], paddings[3], strides[1]); - const std::vector dilations({1, 1}); + out->mutable_data({batch_size * output_height * output_width, + img_channels * kernels[0] * kernels[1]}); + const std::vector dilations({1, 1}); // TODO: verify auto out_dims = out->dims(); out->Resize({batch_size, out->numel() / batch_size}); - for (int i = 0; i < batch_size; i++) { const Tensor src = in_x->Slice(i, i + 1).Resize({img_channels, img_height, img_width}); Tensor dst = out->Slice(i, i + 1).Resize( {output_height, output_width, img_channels, kernels[0], kernels[1]}); - math::Im2ColFunctor f; f(src, dilations, strides, paddings, &dst); } out->Resize(out_dims); + framework::LoD lod(1); + lod[0].reserve(batch_size + 1); + int offset = 0; + lod[0].push_back(offset); + for (int i = 0; i < batch_size; ++i) { + offset += output_height * output_width; + lod[0].push_back(offset); + } + out->set_lod(lod); } template class Im2SequenceKernel; diff --git a/src/operators/kernel/central-arm-func/pool_arm_func.h b/src/operators/kernel/central-arm-func/pool_arm_func.h index 37479c22efe95b6506054cf3ded5855aa766c34c..1c22a2646ea4efd91e6c73333c21e0d0c56fcb17 100644 --- a/src/operators/kernel/central-arm-func/pool_arm_func.h +++ b/src/operators/kernel/central-arm-func/pool_arm_func.h @@ -76,7 +76,7 @@ void PoolCompute(const PoolParam ¶m) { } } - } else if (ksize[0] == 2 && ksize[0] == ksize[1] && strides[0] == 2 && + } else if (0 && ksize[0] == 2 && ksize[0] == ksize[1] && strides[0] == 2 && strides[0] == strides[1] && paddings[0] == paddings[1] && paddings[1] == 0) { #if __ARM_NEON diff --git a/src/operators/math/im2col.cpp b/src/operators/math/im2col.cpp index 4c81e7fa3bd4e5ea36f04b453d4f84468745f919..47055ec4f24e5b5b226c1f084bb2253d2ebb77c7 100644 --- a/src/operators/math/im2col.cpp +++ b/src/operators/math/im2col.cpp @@ -53,7 +53,7 @@ void Im2ColFunctor::operator()( (((isize - 2 * padding[0] + filter_height) % stride[0] == 0) ? 1 : 0)); int fill = isize % 2; if (stride[0] == 1 && filter_height == 3 && pad1 && pad2 && - dilation[0] == 1 && im_height > 2) { + dilation[0] == 1 && im_height > 2 && im_height == im_width) { for (int c = 0; c < im_channels; ++c) { int oosize = osize * osize; int nk4 = osize / 4; @@ -225,7 +225,7 @@ void Im2ColFunctor::operator()( im_data += isize * isize; } } else if (stride[0] == 2 && filter_height == 3 && pad1 && dilation[0] == 1 && - im_height > 2) { + im_height > 2 && im_height == im_width) { for (int c = 0; c < im_channels; ++c) { int oosize = osize * osize; int nk4 = osize / 4; @@ -605,7 +605,6 @@ class Im2ColFunctor { const T *im_data = im.data(); T *col_data = col->data(); - for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) { for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) { for (int channel = 0; channel < im_channels; ++channel) { @@ -617,7 +616,6 @@ class Im2ColFunctor { ++filter_col_idx) { int im_col_offset = col_col_idx * stride[1] + filter_col_idx - padding[1]; - int col_offset = ((((col_row_idx)*col_width + col_col_idx) * im_channels + channel) * @@ -625,7 +623,6 @@ class Im2ColFunctor { filter_row_idx) * filter_width + filter_col_idx; - int im_offset = (channel * im_height + im_row_offset) * im_width + im_col_offset; col_data[col_offset] = diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a47bb0c9e7e988fe11fb79c9e4384f0488f19ceb..2bd7169533f637add2a752feaceca8df132cb262 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -347,6 +347,10 @@ if (NOT FOUND_MATCH) ADD_EXECUTABLE(test-multi-process net/test_multi_inference_predict.cpp test_helper.h test_include.h) target_link_libraries(test-multi-process paddle-mobile) + # gen test + ADD_EXECUTABLE(test-eng net/test_eng.cpp test_helper.h test_include.h) + target_link_libraries(test-eng paddle-mobile) + #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp) endif () diff --git a/test/net/test_eng.cpp b/test/net/test_eng.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4a78af8310cf7f1db976fbc344a21dd0bb4b25a6 --- /dev/null +++ b/test/net/test_eng.cpp @@ -0,0 +1,50 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "../test_helper.h" +#include "../test_include.h" + +int main() { +#ifdef PADDLE_MOBILE_CPU + paddle_mobile::PaddleMobile paddle_mobile; +#endif + // paddle_mobile.SetThreadNum(4); + auto time1 = time(); + if (paddle_mobile.Load(std::string(g_eng) + "/model", + std::string(g_eng) + "/params", false, false, 1, + true)) { + auto time2 = time(); + std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; + std::vector dims{1, 1, 48, 512}; + LoDTensor input_tensor; + SetupTensor(&input_tensor, {1, 1, 48, 512}, static_cast(0), + static_cast(1)); + + std::vector input(input_tensor.data(), + input_tensor.data() + input_tensor.numel()); + // 预热十次 + for (int i = 0; i < 1; ++i) { + paddle_mobile.PredictLod(input_tensor); + } + auto time3 = time(); + for (int i = 0; i < 1; ++i) { + paddle_mobile.PredictLod(input_tensor); + } + auto time4 = time(); + std::cout << "predict cost :" << time_diff(time3, time4) << "ms" + << std::endl; + } + return 0; +}