diff --git a/mace/kernels/conv_2d.h b/mace/kernels/conv_2d.h index f20c427b26e8e2d3718bd6184087e5fc20f21157..2e1a145a3aec2c37e77eecb7f853db4fd99f75e0 100644 --- a/mace/kernels/conv_2d.h +++ b/mace/kernels/conv_2d.h @@ -22,7 +22,7 @@ class Conv2dFunctor { void operator()(const T* input, // NCHW const index_t* input_shape, - const T* filter, // kernel_h, kernel_w, c_in, c_out + const T* filter, // c_out, c_in, kernel_h, kernel_w const index_t* filter_shape, const T* bias, // c_out T* output, // NCHW @@ -39,8 +39,8 @@ class Conv2dFunctor { index_t input_height = input_shape[2]; index_t input_width = input_shape[3]; - int kernel_h = filter_shape[0]; - int kernel_w = filter_shape[1]; + index_t kernel_h = filter_shape[2]; + index_t kernel_w = filter_shape[3]; int stride_h = strides_[0]; int stride_w = strides_[1]; @@ -53,10 +53,12 @@ class Conv2dFunctor { // The left-upper most offset of the padded input int padded_h_start = 0 - paddings_[0] / 2; int padded_w_start = 0 - paddings_[1] / 2; - int padded_h_stop = input_height + paddings_[0] - paddings_[0] / 2; - int padded_w_stop = input_width + paddings_[1] - paddings_[1] / 2; + index_t padded_h_stop = input_height + paddings_[0] - paddings_[0] / 2; + index_t padded_w_stop = input_width + paddings_[1] - paddings_[1] / 2; -#pragma omp parallel for collpse(2) + index_t kernel_size = input_channels * kernel_h * kernel_w; + +#pragma omp parallel for collapse(2) for (int n = 0; n < batch; ++n) { for (int c = 0; c < channels; ++c) { for (int h = 0; h < height; ++h) { @@ -65,17 +67,10 @@ class Conv2dFunctor { c * height * width + h * width + w; T sum = 0; + const T* filter_ptr = filter + c * kernel_size; for (int inc = 0; inc < input_channels; ++inc) { for (int kh = 0; kh < kernel_h; ++kh) { for (int kw = 0; kw < kernel_w; ++kw) { - /* - * TODO The tensorflow filter order is HWCiCo. - * We should consider other order for different - * implementaion to optimize memory access. - */ - int filter_offset = kh * kernel_w * input_channels * channels + - kw * input_channels * channels + - inc * channels + c; int inh = padded_h_start + h * stride_h + dilation_h * kh; int inw = padded_w_start + w * stride_w + dilation_w * kw; @@ -94,8 +89,9 @@ class Conv2dFunctor { n * input_channels * input_height * input_width + inc * input_height * input_width + inh * input_width + inw; - sum += input[input_offset] * filter[filter_offset]; + sum += input[input_offset] * *filter_ptr; } + ++filter_ptr; } } output[offset] = sum + bias[c]; diff --git a/mace/kernels/pooling.h b/mace/kernels/pooling.h index 8a35919489cd124edc6919ad4fa998d5ee91b754..52956159dea0b25a63d0d90f02f6e2c2bf2672b7 100644 --- a/mace/kernels/pooling.h +++ b/mace/kernels/pooling.h @@ -56,10 +56,8 @@ public: // The left-upper most offset of the padded input int padded_h_start = 0 - paddings_[0] / 2; int padded_w_start = 0 - paddings_[1] / 2; - int padded_h_stop = input_height + paddings_[0] - paddings_[0] / 2; - int padded_w_stop = input_width + paddings_[1] - paddings_[0] / 2; -#pragma omp parallel for collpse(2) +#pragma omp parallel for collapse(2) for (int n = 0; n < batch; ++n) { for (int c = 0; c < channels; ++c) { index_t out_offset = n * channels * height * width + diff --git a/mace/ops/conv_2d_test.cc b/mace/ops/conv_2d_test.cc index 6ab3de8da6c349477341154f7f31043c28f276ca..c95f3dc6e4317c3596034bf2b65d9210258c1481 100644 --- a/mace/ops/conv_2d_test.cc +++ b/mace/ops/conv_2d_test.cc @@ -32,7 +32,7 @@ TEST_F(Conv2dOpTest, Simple_VALID) { 1, 1, 1, 1, 1, 1, 1, 1, 1}); - AddInputFromArray("Filter", {3, 3, 2, 1}, + AddInputFromArray("Filter", {1, 2, 3, 3}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); @@ -69,7 +69,7 @@ TEST_F(Conv2dOpTest, Simple_SAME) { 1, 1, 1, 1, 1, 1, 1, 1, 1}); - AddInputFromArray("Filter", {3, 3, 2, 1}, + AddInputFromArray("Filter", {1, 2, 3, 3}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); @@ -113,16 +113,11 @@ TEST_F(Conv2dOpTest, Combined) { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); - AddInputFromArray("Filter", {3, 3, 2, 2}, - {1.0f, 0.5f, 1.0f, 0.5f, - 1.0f, 0.5f, 1.0f, 0.5f, - 1.0f, 0.5f, 1.0f, 0.5f, - 1.0f, 0.5f, 1.0f, 0.5f, - 1.0f, 0.5f, 1.0f, 0.5f, - 1.0f, 0.5f, 1.0f, 0.5f, - 1.0f, 0.5f, 1.0f, 0.5f, - 1.0f, 0.5f, 1.0f, 0.5f, - 1.0f, 0.5f, 1.0f, 0.5f}); + AddInputFromArray("Filter", {2, 2, 3, 3}, + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f}); AddInputFromArray("Bias", {2}, {0.1f, 0.2f}); // Run diff --git a/mace/ops/conv_pool_2d_base.h b/mace/ops/conv_pool_2d_base.h index 36939e31c63cb116b776e4784f011672c57b3fa2..d95668ec44b4726ec987a8ad045c30d3e2af20d8 100644 --- a/mace/ops/conv_pool_2d_base.h +++ b/mace/ops/conv_pool_2d_base.h @@ -27,7 +27,7 @@ class ConvPool2dOpBase : public Operator { dilations_(OperatorBase::GetRepeatedArgument("dilations")) {} void CalcPaddingAndOutputSize(const index_t* input_shape, // NCHW - const index_t* filter_shape, // HWIO + const index_t* filter_shape, // OIHW std::vector* output_shape, std::vector* padding_size) { MACE_CHECK(dilations_[0] > 0 && dilations_[1] > 0, @@ -44,12 +44,12 @@ class ConvPool2dOpBase : public Operator { *padding_size = {0, 0}; index_t output_height, output_width; - index_t kernel_height = filter_shape[0]; - index_t kernel_width = filter_shape[1]; - index_t output_channels = filter_shape[3]; + index_t kernel_height = filter_shape[2]; + index_t kernel_width = filter_shape[3]; + index_t output_channels = filter_shape[0]; - int k_extent_height = (kernel_height - 1) * dilations_[0] + 1; - int k_extent_width = (kernel_width - 1) * dilations_[1] + 1; + index_t k_extent_height = (kernel_height - 1) * dilations_[0] + 1; + index_t k_extent_width = (kernel_width - 1) * dilations_[1] + 1; switch (padding_) { case VALID: diff --git a/mace/ops/pooling.cc b/mace/ops/pooling.cc index 915035c858bb66d5f1aa7f61f4717187b2f11b5e..58ced63e941e7d1f0f602d4de2191aa40fcb2d0e 100644 --- a/mace/ops/pooling.cc +++ b/mace/ops/pooling.cc @@ -4,8 +4,6 @@ #include "mace/ops/pooling.h" -#include "mace/proto/mace.pb.h" -#include "mace/kernels/pooling.h" namespace mace { diff --git a/mace/ops/pooling.h b/mace/ops/pooling.h index bc62a075a3b6fe75b40caa57d31cb7f29a6ad9a7..042eb389c11d2d0c09c72530ead8f3b7c98da5ef 100644 --- a/mace/ops/pooling.h +++ b/mace/ops/pooling.h @@ -29,10 +29,10 @@ public: std::vector output_shape; std::vector paddings; std::vector filter_shape = std::vector(4); - filter_shape[0] = kernels_[0]; - filter_shape[1] = kernels_[1]; - filter_shape[2] = in_shape[0]; - filter_shape[3] = in_shape[1]; + filter_shape[0] = in_shape[1]; + filter_shape[1] = in_shape[0]; + filter_shape[2] = kernels_[0]; + filter_shape[3] = kernels_[1]; this->CalcPaddingAndOutputSize(in_shape.data(), filter_shape.data(), &output_shape, &paddings); output->Resize(output_shape); @@ -50,8 +50,8 @@ public: }; protected: - PoolingType pooling_type_; std::vector kernels_; + PoolingType pooling_type_; OP_INPUT_TAGS(INPUT); OP_OUTPUT_TAGS(OUTPUT); diff --git a/mace/ops/pooling_test.cc b/mace/ops/pooling_test.cc index b2484677a3b1b4bddc6ab4c72839a528eb0714fa..f56bff61c3620cdae9638c0d3f6166f5aed64aa4 100644 --- a/mace/ops/pooling_test.cc +++ b/mace/ops/pooling_test.cc @@ -5,7 +5,6 @@ #include "gtest/gtest.h" #include "mace/core/operator.h" -#include "mace/core/net.h" #include "mace/ops/ops_test_util.h" #include "mace/ops/conv_pool_2d_base.h" #include "mace/kernels/pooling.h"