diff --git a/mace/kernels/conv_2d.h b/mace/kernels/conv_2d.h index 2e1a145a3aec2c37e77eecb7f853db4fd99f75e0..a102ccef4075ec133158725b42a83ceb3b5a4411 100644 --- a/mace/kernels/conv_2d.h +++ b/mace/kernels/conv_2d.h @@ -108,6 +108,15 @@ class Conv2dFunctor { const int* dilations_; // [dilation_h, dilation_w] }; +template<> +void Conv2dFunctor::operator()(const float* input, // NCHW + const index_t* input_shape, + const float* filter, // c_out, c_in, kernel_h, kernel_w + const index_t* filter_shape, + const float* bias, // c_out + float* output, // NCHW + const index_t* output_shape); + } // namespace kernels } // namespace mace diff --git a/mace/kernels/neon/conv_2d_neon.cc b/mace/kernels/neon/conv_2d_neon.cc new file mode 100644 index 0000000000000000000000000000000000000000..8d45861a1b17ab2e1c59b217723aa6d30d962d63 --- /dev/null +++ b/mace/kernels/neon/conv_2d_neon.cc @@ -0,0 +1,112 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#include +#include "mace/kernels/conv_2d.h" +#include "mace/kernels/neon/conv_2d_neon_3x3.h" + +namespace mace { +namespace kernels { + +static inline void ConstructInputWithPadding(const float* input, const index_t* input_shape, + const int* paddings, + Tensor& output_tensor, + std::vector& output_shape) { + index_t batch = input_shape[0]; + index_t channels = input_shape[1]; + index_t height = input_shape[2]; + index_t width = input_shape[3]; + output_shape[0] = batch; + output_shape[1] = channels; + output_shape[2] = paddings[0] + height; + output_shape[3] = paddings[1] + width; + index_t output_width = output_shape[3]; + + int padded_left = paddings[1] / 2; + + output_tensor.Resize(output_shape); + float* output_ptr = output_tensor.mutable_data(); + memset(output_ptr, 0, output_tensor.size() * sizeof(float)); + output_ptr += paddings[0] / 2 * output_width; + + for (; batch > 0; --batch) { + for (; channels > 0; --channels) { + for(; height > 0; --height) { + memcpy(output_ptr+padded_left, input, width*sizeof(float)); + input += width; + output_ptr += output_width; + } + output_ptr += paddings[0] * output_width; + } + } +} + +template<> +void Conv2dFunctor::operator()(const float* input, // NCHW + const index_t* input_shape, + const float* filter, // c_out, c_in, kernel_h, kernel_w + const index_t* filter_shape, + const float* bias, // c_out + float* output, // NCHW + const index_t* output_shape) { + + typedef void (*Conv2dNeonFunction)(const float* input, // NCHW + const index_t* input_shape, + const float* filter, // c_out, c_in, kernel_h, kernel_w + const float* bias, // c_out + float* output, // NCHW + const index_t* output_shape); + static const Conv2dNeonFunction selector[5][2] = { + { + nullptr, + nullptr + }, + { + nullptr, + nullptr + }, + { + Conv2dNeonK3x3S1, + nullptr + }, + { + nullptr, + nullptr + }, + { + nullptr, + nullptr + } + }; + // not implement yet + if (paddings_[0] != paddings_[1] || paddings_[0] > 5 || + strides_[0] != strides_[1] || strides_[0] > 4 || + dilations_[0] != 1 || dilations_[1] != 1 || + selector[paddings_[0]-1][strides_[0]-1] == nullptr) { + Conv2dFunctor(strides_, paddings_, dilations_)( + input, + input_shape, + filter, + filter_shape, + bias, + output, + output_shape + ); + } + Tensor padded_input; + std::vector padded_input_shape(4); + ConstructInputWithPadding(input, input_shape, paddings_, padded_input, padded_input_shape); + auto conv2d_neon_func = selector[paddings_[0] - 1][strides_[0] - 1]; + conv2d_neon_func( + padded_input.data(), + padded_input_shape.data(), + filter, + bias, + output, + output_shape + ); +} + +} // namespace kernels +} // namespace mace \ No newline at end of file diff --git a/mace/kernels/neon/conv_2d_neon_3x3.h b/mace/kernels/neon/conv_2d_neon_3x3.h new file mode 100644 index 0000000000000000000000000000000000000000..9916e3e03dd6bf4139aa32dbc487c7447119f425 --- /dev/null +++ b/mace/kernels/neon/conv_2d_neon_3x3.h @@ -0,0 +1,25 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// +#ifndef MACE_KERNELS_NEON_CONV_2D_NEON_3X3_H_ +#define MACE_KERNELS_NEON_CONV_2D_NEON_3X3_H_ + +#include +#include "mace/core/common.h" + +namespace mace { +namespace kernels { + +void Conv2dNeonK3x3S1(const float* input, // NCHW + const index_t* input_shape, + const float* filter, // c_out, c_in, kernel_h, kernel_w + const float* bias, // c_out + float* output, // NCHW + const index_t* output_shape) { + +} + +} // namespace kernels +} // namespace mace + +#endif // MACE_KERNELS_NEON_CONV_2D_NEON_3X3_H_