diff --git a/src/operators/kernel/arm/im2sequence_kernel.cpp b/src/operators/kernel/arm/im2sequence_kernel.cpp index 8295fd94a31db2ad1c10d32a8c639b067e422f45..cc6ae2ae8bc7cde9b365817ba9cafc19776da913 100644 --- a/src/operators/kernel/arm/im2sequence_kernel.cpp +++ b/src/operators/kernel/arm/im2sequence_kernel.cpp @@ -35,7 +35,7 @@ template <> void Im2SequenceKernel::Compute( const Im2SequenceParam ¶m) const { const Tensor *in_x = param.Input(); - Tensor *out = param.Output(); + framework::LoDTensor *out = param.Output(); out->mutable_data(); std::vector kernels = param.Kernels(); @@ -52,22 +52,31 @@ void Im2SequenceKernel::Compute( paddings[2], strides[0]); int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1], paddings[3], strides[1]); - const std::vector dilations({1, 1}); + out->mutable_data({batch_size * output_height * output_width, + img_channels * kernels[0] * kernels[1]}); + const std::vector dilations({1, 1}); // TODO: verify auto out_dims = out->dims(); out->Resize({batch_size, out->numel() / batch_size}); - for (int i = 0; i < batch_size; i++) { const Tensor src = in_x->Slice(i, i + 1).Resize({img_channels, img_height, img_width}); Tensor dst = out->Slice(i, i + 1).Resize( {output_height, output_width, img_channels, kernels[0], kernels[1]}); - math::Im2ColFunctor f; f(src, dilations, strides, paddings, &dst); } out->Resize(out_dims); + framework::LoD lod(1); + lod[0].reserve(batch_size + 1); + int offset = 0; + lod[0].push_back(offset); + for (int i = 0; i < batch_size; ++i) { + offset += output_height * output_width; + lod[0].push_back(offset); + } + out->set_lod(lod); } template class Im2SequenceKernel; diff --git a/test/net/test_eng.cpp b/test/net/test_eng.cpp index d10cb33a6922f4a60b798211ab4415312b86796c..4a78af8310cf7f1db976fbc344a21dd0bb4b25a6 100644 --- a/test/net/test_eng.cpp +++ b/test/net/test_eng.cpp @@ -27,9 +27,9 @@ int main() { true)) { auto time2 = time(); std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; - std::vector dims{1, 1, 48, 48}; + std::vector dims{1, 1, 48, 512}; LoDTensor input_tensor; - SetupTensor(&input_tensor, {1, 1, 48, 48}, static_cast(0), + SetupTensor(&input_tensor, {1, 1, 48, 512}, static_cast(0), static_cast(1)); std::vector input(input_tensor.data(), @@ -39,9 +39,9 @@ int main() { paddle_mobile.PredictLod(input_tensor); } auto time3 = time(); - // for (int i = 0; i < 10; ++i) { - // paddle_mobile.Predict(input, dims); - // } + for (int i = 0; i < 1; ++i) { + paddle_mobile.PredictLod(input_tensor); + } auto time4 = time(); std::cout << "predict cost :" << time_diff(time3, time4) << "ms" << std::endl;