提交 7b6c3241 编写于 作者: W wuchenghui

global avg pooling

上级 5b21653b
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_KERNELS_GLOBAL_AVG_POOLING_H_
#define MACE_KERNELS_GLOBAL_AVG_POOLING_H_
#include "mace/core/tensor.h"
namespace mace {
namespace kernels {
template <DeviceType D, typename T>
class GlobalAvgPoolingFunctor {
public:
GlobalAvgPoolingFunctor() {}
void operator()(const T *input, const index_t *input_shape, T *output) {
index_t batch = input_shape[0];
index_t channels = input_shape[1];
index_t height = input_shape[2];
index_t width = input_shape[3];
index_t image_size = height * width;
index_t input_offset = 0;
index_t total_channels = batch * channels;
for (int c = 0; c < total_channels; ++c) {
T sum = 0;
for (int i = 0; i < image_size; ++i) {
sum += input[input_offset + i];
}
output[c] = sum / image_size;
input_offset += image_size;
}
}
};
template <>
void GlobalAvgPoolingFunctor<DeviceType::NEON, float>::operator()(
const float *input,
const index_t *input_shape,
float *output);
} // namespace kernels
} // namespace mace
#endif // MACE_KERNELS_GLOBAL_AVG_POOLING_H_
\ No newline at end of file
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/kernels/global_avg_pooling.h"
#include <arm_neon.h>
namespace mace {
namespace kernels {
template<>
void GlobalAvgPoolingFunctor<DeviceType::NEON, float>::operator()(
const float *input,
const index_t *input_shape,
float *output) {
index_t batch = input_shape[0];
index_t channels = input_shape[1];
index_t height = input_shape[2];
index_t width = input_shape[3];
index_t image_size = height * width;
index_t input_offset = 0;
index_t total_channels = batch * channels;
#pragma omp parallel for
for (int c = 0; c < total_channels; ++c) {
const float *inptr = input + c * image_size;
float sum = 0.0;
int num_vectors = image_size >> 2;
int remain = image_size - (num_vectors << 2);
if (num_vectors > 0) {
float sum_out[4] = {0.0, 0.0, 0.0, 0.0};
float32x4_t sum_vector = vld1q_f32(inptr);
inptr += 4;
for (int n = 1; n < num_vectors; ++n) {
float32x4_t vector = vld1q_f32(inptr);
sum_vector = vaddq_f32(sum_vector, vector);
inptr += 4;
}
vst1q_f32(sum_out, sum_vector);
sum = sum_out[0] + sum_out[1] + sum_out[2] + sum_out[3];
}
for (int i = 0; i < remain; ++i) {
sum += *inptr;
++inptr;
}
output[c] = sum / image_size;
}
};
} // namespace kernels
} // namespace mace
\ No newline at end of file
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/ops/global_avg_pooling.h"
namespace mace {
REGISTER_CPU_OPERATOR(GlobalAvgPooling,
GlobalAvgPoolingOp<DeviceType::CPU, float>);
#if __ARM_NEON
REGISTER_NEON_OPERATOR(GlobalAvgPooling,
GlobalAvgPoolingOp<DeviceType::NEON, float>);
#endif // __ARM_NEON
} // namespace mace
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_OPS_GLOBAL_AVG_POOLING_H_
#define MACE_OPS_GLOBAL_AVG_POOLING_H_
#include "mace/core/operator.h"
#include "mace/kernels/global_avg_pooling.h"
namespace mace {
template<DeviceType D, class T>
class GlobalAvgPoolingOp : public Operator<D, T> {
public:
GlobalAvgPoolingOp(const OperatorDef &operator_def, Workspace *ws)
: Operator<D, T>(operator_def, ws) {}
bool Run() override {
const Tensor *input = this->Input(INPUT);
Tensor *output = this->Output(OUTPUT);
std::vector<index_t> output_shape(4);
output_shape[0] = input->shape()[0];
output_shape[1] = input->shape()[1];
output_shape[2] = output_shape[3] = 1;
output->Resize(output_shape);
auto pooling_func = kernels::GlobalAvgPoolingFunctor<D, T>();
pooling_func(input->data<float>(), input->shape().data(),
output->mutable_data<float>());
return true;
}
protected:
OP_INPUT_TAGS(INPUT);
OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace mace
#endif // MACE_OPS_GLOBAL_AVG_POOLING_H_
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/kernels/global_avg_pooling.h"
#include "mace/core/operator.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h"
using namespace mace;
using namespace mace::kernels;
template <DeviceType D>
static void GlobalAvgPooling(int iters,
int batch,
int channels,
int height,
int width) {
mace::testing::StopTiming();
OpsTestNet net;
OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
// Add input data
net.AddRandomInput<float>("Input", {batch, channels, height, width});
// Warm-up
for (int i = 0; i < 5; ++i) {
net.RunOp(D);
}
mace::testing::StartTiming();
while (iters--) {
net.RunOp(D);
}
}
#define BM_GLOBAL_AVG_POOLING_MACRO(N, C, H, W, DEVICE) \
static void \
BM_GLOBAL_AVG_POOLING_##N##_##C##_##H##_##W##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot*(sizeof(float))); \
GlobalAvgPooling<DEVICE>(iters, N, C, H, W); \
} \
BENCHMARK(BM_GLOBAL_AVG_POOLING_##N##_##C##_##H##_##W##_##DEVICE)
#define BM_GLOBAL_AVG_POOLING(N, C, H, W) \
BM_GLOBAL_AVG_POOLING_MACRO(N, C, H, W, CPU); \
BM_GLOBAL_AVG_POOLING_MACRO(N, C, H, W, NEON);
BM_GLOBAL_AVG_POOLING(1, 3, 7, 7);
BM_GLOBAL_AVG_POOLING(1, 3, 64, 64);
BM_GLOBAL_AVG_POOLING(1, 3, 256, 256);
\ No newline at end of file
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
using namespace mace;
class GlobalAvgPoolingOpTest : public OpsTestBase {};
TEST_F(GlobalAvgPoolingOpTest, 3x7x7_CPU) {
// Construct graph
auto& net = test_net();
OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
// Add input data
std::vector<float> input(147);
for (int i = 0; i < 147; ++i) {
input[i] = i/49 + 1;
}
net.AddInputFromArray<float>(
"Input", {1, 3, 7, 7}, input);
// Run
net.RunOp();
// Check
auto expected =
CreateTensor<float>({1, 3, 1, 1}, {1, 2, 3});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
}
TEST_F(GlobalAvgPoolingOpTest, 3x7x7_NEON) {
// Construct graph
auto& net = test_net();
OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
// Add input data
std::vector<float> input(147);
for (int i = 0; i < 147; ++i) {
input[i] = i/49 + 1;
}
net.AddInputFromArray<float>(
"Input", {1, 3, 7, 7}, input);
// Run
net.RunOp(DeviceType::NEON);
// Check
auto expected =
CreateTensor<float>({1, 3, 1, 1}, {1, 2, 3});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册