diff --git a/mace/kernels/neon/conv_2d_neon.cc b/mace/kernels/neon/conv_2d_neon.cc index 9891eae992993d0230d8d9b933920b51292f104e..06d3b3e75487b5c478343d3457ad55912febb153 100644 --- a/mace/kernels/neon/conv_2d_neon.cc +++ b/mace/kernels/neon/conv_2d_neon.cc @@ -31,9 +31,9 @@ static inline void ConstructInputWithPadding(const float* input, // Skip the padded top rows output_ptr += padded_top * output_width; - for (; batch > 0; --batch) { - for (; channels > 0; --channels) { - for(; height > 0; --height) { + for (int i = 0; i < batch; ++i) { + for (int j = 0; j < channels; ++j) { + for (int k = 0; k < height; ++k) { memcpy(output_ptr + padded_left, input, width * sizeof(float)); input += width; output_ptr += output_width; diff --git a/mace/ops/conv_2d_test.cc b/mace/ops/conv_2d_test.cc index 32a4e50db53c9e0d6c0e80a09505e01de0fc5f19..40d6e3a68f2f2756527a42ea1eaf1f4d5b858e58 100644 --- a/mace/ops/conv_2d_test.cc +++ b/mace/ops/conv_2d_test.cc @@ -199,11 +199,11 @@ TEST_F(Conv2dOpTest, ConvNxNS12) { srand(time(NULL)); // generate random input - index_t batch = 1 + rand() % 5; - index_t input_channels = 3 + rand() % 50; - index_t height = 10 + rand() % 100; - index_t width = 10 + rand() % 100; - index_t output_channels = 3 + rand() % 50; + index_t batch = 1 + rand() % 10; + index_t input_channels = 1 + rand() % 50; + index_t height = 7 + rand() % 100; + index_t width = 7 + rand() % 100; + index_t output_channels = 1 + rand() % 50; // Construct graph auto& net = test_net(); OpDefBuilder("Conv2d", "Conv2dTest") @@ -236,11 +236,11 @@ TEST_F(Conv2dOpTest, ConvNxNS12) { // Run NEON net.RunOp(DeviceType::NEON); - ExpectTensorNear(expected, *net.GetOutput("Output"), 1e-5); + ExpectTensorNear(expected, *net.GetOutput("Output"), 1e-3); }; - for (int kernel_size : {1}) { // TODO(liu1i10) 3x3 + for (int kernel_size : {1, 3}) { for (int stride : {1, 2}) { func(kernel_size, kernel_size, stride, stride, VALID); func(kernel_size, kernel_size, stride, stride, SAME);