From 4727af0d4eccb0e5472d0f74db0249b79a566000 Mon Sep 17 00:00:00 2001 From: liuqi Date: Thu, 14 Sep 2017 19:28:01 +0800 Subject: [PATCH] Fix bug: construct padding before neon kernel. --- mace/kernels/neon/conv_2d_neon.cc | 6 +++--- mace/ops/conv_2d_test.cc | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/mace/kernels/neon/conv_2d_neon.cc b/mace/kernels/neon/conv_2d_neon.cc index 9891eae9..06d3b3e7 100644 --- a/mace/kernels/neon/conv_2d_neon.cc +++ b/mace/kernels/neon/conv_2d_neon.cc @@ -31,9 +31,9 @@ static inline void ConstructInputWithPadding(const float* input, // Skip the padded top rows output_ptr += padded_top * output_width; - for (; batch > 0; --batch) { - for (; channels > 0; --channels) { - for(; height > 0; --height) { + for (int i = 0; i < batch; ++i) { + for (int j = 0; j < channels; ++j) { + for (int k = 0; k < height; ++k) { memcpy(output_ptr + padded_left, input, width * sizeof(float)); input += width; output_ptr += output_width; diff --git a/mace/ops/conv_2d_test.cc b/mace/ops/conv_2d_test.cc index 32a4e50d..66c3e220 100644 --- a/mace/ops/conv_2d_test.cc +++ b/mace/ops/conv_2d_test.cc @@ -199,11 +199,11 @@ TEST_F(Conv2dOpTest, ConvNxNS12) { srand(time(NULL)); // generate random input - index_t batch = 1 + rand() % 5; - index_t input_channels = 3 + rand() % 50; - index_t height = 10 + rand() % 100; - index_t width = 10 + rand() % 100; - index_t output_channels = 3 + rand() % 50; + index_t batch = 1 + rand() % 10; + index_t input_channels = 1 + rand() % 50; + index_t height = 7 + rand() % 100; + index_t width = 7 + rand() % 100; + index_t output_channels = 1 + rand() % 50; // Construct graph auto& net = test_net(); OpDefBuilder("Conv2d", "Conv2dTest") @@ -236,12 +236,12 @@ TEST_F(Conv2dOpTest, ConvNxNS12) { // Run NEON net.RunOp(DeviceType::NEON); - ExpectTensorNear(expected, *net.GetOutput("Output"), 1e-5); + ExpectTensorNear(expected, *net.GetOutput("Output"), 1e-3); }; - for (int kernel_size : {1}) { // TODO(liu1i10) 3x3 - for (int stride : {1, 2}) { + for (int kernel_size : {3}) { // TODO(liu1i10) 3x3 + for (int stride : {1}) { func(kernel_size, kernel_size, stride, stride, VALID); func(kernel_size, kernel_size, stride, stride, SAME); } -- GitLab