diff --git a/mace/kernels/global_avg_pooling.h b/mace/kernels/global_avg_pooling.h new file mode 100644 index 0000000000000000000000000000000000000000..c339fd41ff3cca59b5af2dea59142bbd9d212ace --- /dev/null +++ b/mace/kernels/global_avg_pooling.h @@ -0,0 +1,48 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#ifndef MACE_KERNELS_GLOBAL_AVG_POOLING_H_ +#define MACE_KERNELS_GLOBAL_AVG_POOLING_H_ + +#include "mace/core/tensor.h" + +namespace mace { +namespace kernels { + +template +class GlobalAvgPoolingFunctor { + public: + GlobalAvgPoolingFunctor() {} + + void operator()(const T *input, const index_t *input_shape, T *output) { + index_t batch = input_shape[0]; + index_t channels = input_shape[1]; + index_t height = input_shape[2]; + index_t width = input_shape[3]; + + index_t image_size = height * width; + index_t input_offset = 0; + index_t total_channels = batch * channels; + + for (int c = 0; c < total_channels; ++c) { + T sum = 0; + for (int i = 0; i < image_size; ++i) { + sum += input[input_offset + i]; + } + output[c] = sum / image_size; + input_offset += image_size; + } + } +}; + +template <> +void GlobalAvgPoolingFunctor::operator()( + const float *input, + const index_t *input_shape, + float *output); + +} // namespace kernels +} // namespace mace + +#endif // MACE_KERNELS_GLOBAL_AVG_POOLING_H_ \ No newline at end of file diff --git a/mace/kernels/neon/global_avg_pooling_neon.cc b/mace/kernels/neon/global_avg_pooling_neon.cc new file mode 100644 index 0000000000000000000000000000000000000000..2980afec2c415fe5c645d8d38620371216d8bd63 --- /dev/null +++ b/mace/kernels/neon/global_avg_pooling_neon.cc @@ -0,0 +1,57 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#include "mace/kernels/global_avg_pooling.h" +#include + +namespace mace { +namespace kernels { + +template<> +void GlobalAvgPoolingFunctor::operator()( + const float *input, + const index_t *input_shape, + float *output) { + index_t batch = input_shape[0]; + index_t channels = input_shape[1]; + index_t height = input_shape[2]; + index_t width = input_shape[3]; + + index_t image_size = height * width; + index_t input_offset = 0; + index_t total_channels = batch * channels; + +#pragma omp parallel for + for (int c = 0; c < total_channels; ++c) { + const float *inptr = input + c * image_size; + float sum = 0.0; + + int num_vectors = image_size >> 2; + int remain = image_size - (num_vectors << 2); + + if (num_vectors > 0) { + float sum_out[4] = {0.0, 0.0, 0.0, 0.0}; + + float32x4_t sum_vector = vld1q_f32(inptr); + inptr += 4; + for (int n = 1; n < num_vectors; ++n) { + float32x4_t vector = vld1q_f32(inptr); + sum_vector = vaddq_f32(sum_vector, vector); + inptr += 4; + } + vst1q_f32(sum_out, sum_vector); + + sum = sum_out[0] + sum_out[1] + sum_out[2] + sum_out[3]; + } + + for (int i = 0; i < remain; ++i) { + sum += *inptr; + ++inptr; + } + output[c] = sum / image_size; + } +}; + +} // namespace kernels +} // namespace mace \ No newline at end of file diff --git a/mace/ops/global_avg_pooling.cc b/mace/ops/global_avg_pooling.cc new file mode 100644 index 0000000000000000000000000000000000000000..d507d76fa63ed34c02761c551142faa6a9886a0d --- /dev/null +++ b/mace/ops/global_avg_pooling.cc @@ -0,0 +1,17 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#include "mace/ops/global_avg_pooling.h" + +namespace mace { + +REGISTER_CPU_OPERATOR(GlobalAvgPooling, + GlobalAvgPoolingOp); + +#if __ARM_NEON +REGISTER_NEON_OPERATOR(GlobalAvgPooling, + GlobalAvgPoolingOp); +#endif // __ARM_NEON + +} // namespace mace diff --git a/mace/ops/global_avg_pooling.h b/mace/ops/global_avg_pooling.h new file mode 100644 index 0000000000000000000000000000000000000000..151e791ea87434dc35c416f33eb7f93606deea5f --- /dev/null +++ b/mace/ops/global_avg_pooling.h @@ -0,0 +1,43 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#ifndef MACE_OPS_GLOBAL_AVG_POOLING_H_ +#define MACE_OPS_GLOBAL_AVG_POOLING_H_ + +#include "mace/core/operator.h" +#include "mace/kernels/global_avg_pooling.h" + +namespace mace { + +template +class GlobalAvgPoolingOp : public Operator { + public: + GlobalAvgPoolingOp(const OperatorDef &operator_def, Workspace *ws) + : Operator(operator_def, ws) {} + + bool Run() override { + const Tensor *input = this->Input(INPUT); + Tensor *output = this->Output(OUTPUT); + + std::vector output_shape(4); + output_shape[0] = input->shape()[0]; + output_shape[1] = input->shape()[1]; + output_shape[2] = output_shape[3] = 1; + + output->Resize(output_shape); + + auto pooling_func = kernels::GlobalAvgPoolingFunctor(); + pooling_func(input->data(), input->shape().data(), + output->mutable_data()); + return true; + } + + protected: + OP_INPUT_TAGS(INPUT); + OP_OUTPUT_TAGS(OUTPUT); +}; + +} // namespace mace + +#endif // MACE_OPS_GLOBAL_AVG_POOLING_H_ diff --git a/mace/ops/global_avg_pooling_benchmark.cc b/mace/ops/global_avg_pooling_benchmark.cc new file mode 100644 index 0000000000000000000000000000000000000000..7097a2ae6a4041531ce0c0e36f2b6fe59490a9b3 --- /dev/null +++ b/mace/ops/global_avg_pooling_benchmark.cc @@ -0,0 +1,58 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#include "mace/kernels/global_avg_pooling.h" +#include "mace/core/operator.h" +#include "mace/core/testing/test_benchmark.h" +#include "mace/ops/ops_test_util.h" + +using namespace mace; +using namespace mace::kernels; + +template +static void GlobalAvgPooling(int iters, + int batch, + int channels, + int height, + int width) { + mace::testing::StopTiming(); + + OpsTestNet net; + OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest") + .Input("Input") + .Output("Output") + .Finalize(net.operator_def()); + + // Add input data + net.AddRandomInput("Input", {batch, channels, height, width}); + + // Warm-up + for (int i = 0; i < 5; ++i) { + net.RunOp(D); + } + + mace::testing::StartTiming(); + while (iters--) { + net.RunOp(D); + } +} + +#define BM_GLOBAL_AVG_POOLING_MACRO(N, C, H, W, DEVICE) \ + static void \ + BM_GLOBAL_AVG_POOLING_##N##_##C##_##H##_##W##_##DEVICE( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + mace::testing::ItemsProcessed(tot); \ + mace::testing::BytesProcessed(tot*(sizeof(float))); \ + GlobalAvgPooling(iters, N, C, H, W); \ + } \ + BENCHMARK(BM_GLOBAL_AVG_POOLING_##N##_##C##_##H##_##W##_##DEVICE) + +#define BM_GLOBAL_AVG_POOLING(N, C, H, W) \ + BM_GLOBAL_AVG_POOLING_MACRO(N, C, H, W, CPU); \ + BM_GLOBAL_AVG_POOLING_MACRO(N, C, H, W, NEON); + +BM_GLOBAL_AVG_POOLING(1, 3, 7, 7); +BM_GLOBAL_AVG_POOLING(1, 3, 64, 64); +BM_GLOBAL_AVG_POOLING(1, 3, 256, 256); \ No newline at end of file diff --git a/mace/ops/global_avg_pooling_test.cc b/mace/ops/global_avg_pooling_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..d5d99330883cceccc3c7d3266186fd3d832b0ca9 --- /dev/null +++ b/mace/ops/global_avg_pooling_test.cc @@ -0,0 +1,61 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// +#include "mace/core/operator.h" +#include "mace/ops/ops_test_util.h" + +using namespace mace; + +class GlobalAvgPoolingOpTest : public OpsTestBase {}; + +TEST_F(GlobalAvgPoolingOpTest, 3x7x7_CPU) { + // Construct graph + auto& net = test_net(); + OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest") + .Input("Input") + .Output("Output") + .Finalize(net.operator_def()); + + // Add input data + std::vector input(147); + for (int i = 0; i < 147; ++i) { + input[i] = i/49 + 1; + } + net.AddInputFromArray( + "Input", {1, 3, 7, 7}, input); + + // Run + net.RunOp(); + + // Check + auto expected = + CreateTensor({1, 3, 1, 1}, {1, 2, 3}); + + ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); +} + +TEST_F(GlobalAvgPoolingOpTest, 3x7x7_NEON) { + // Construct graph + auto& net = test_net(); + OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest") + .Input("Input") + .Output("Output") + .Finalize(net.operator_def()); + + // Add input data + std::vector input(147); + for (int i = 0; i < 147; ++i) { + input[i] = i/49 + 1; + } + net.AddInputFromArray( + "Input", {1, 3, 7, 7}, input); + + // Run + net.RunOp(DeviceType::NEON); + + // Check + auto expected = + CreateTensor({1, 3, 1, 1}, {1, 2, 3}); + + ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); +}