From 97addf5af0839ba747591beb6ebea2d6b65ecd75 Mon Sep 17 00:00:00 2001 From: wuchenghui Date: Wed, 6 Sep 2017 16:10:03 +0800 Subject: [PATCH] add pooling op --- mace/kernels/pooling.h | 141 +++++++++++++++++++++++++++++++++++++++ mace/ops/BUILD | 39 +++++++++-- mace/ops/pooling.cc | 25 +++++++ mace/ops/pooling.h | 22 ++++++ mace/ops/pooling_test.cc | 65 ++++++++++++++++++ 5 files changed, 288 insertions(+), 4 deletions(-) create mode 100644 mace/kernels/pooling.h create mode 100644 mace/ops/pooling.cc create mode 100644 mace/ops/pooling.h create mode 100644 mace/ops/pooling_test.cc diff --git a/mace/kernels/pooling.h b/mace/kernels/pooling.h new file mode 100644 index 00000000..60666ffe --- /dev/null +++ b/mace/kernels/pooling.h @@ -0,0 +1,141 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#ifndef MACE_KERNELS_POOLING_H +#define MACE_KERNELS_POOLING_H + +#include "mace/core/tensor.h" + +namespace mace { +namespace kernels { + +enum { PoolMethod_MAX = 0, PoolMethod_AVE = 1 }; +enum { PADDING_DROP = 0, PADDING_ZERO = 1}; + +template +void PoolingFunction(const Tensor *input_tensor, Tensor *output_tensor, int pooling_type, + int kernel_size, int stride, int padding) { + // max value in NxN window + // avg value in NxN window + + vector in_shape = input_tensor->shape(); + REQUIRE(in_shape.size() == 4, "The input tensor shape should specify 4 dimensions(NCHW)"); + + int64 batch_size = in_shape[0]; + int64 channels = in_shape[1]; + int64 h = in_shape[2]; + int64 w = in_shape[3]; + + // calculate paddings and output tensor shape + int outw, outh, pad_top, pad_bottom, pad_left, pad_right; + if (padding == PADDING_ZERO) { + int wpad = kernel_size + (w - 1) / stride * stride - w; + int hpad = kernel_size + (h - 1) / stride * stride - h; + + pad_top = hpad / 2; + pad_bottom = hpad - pad_top; + pad_left = wpad / 2; + pad_right = wpad - pad_left; + + outw = (w + wpad - kernel_size) / stride + 1; + outh = (h + hpad - kernel_size) / stride + 1; + } else if (padding == PADDING_DROP) // Drop bottom-most rows and right-most columns + { + pad_top = pad_bottom = pad_left = pad_right = 0; + + outw = (w - kernel_size) / stride + 1; + outh = (h - kernel_size) / stride + 1; + } + + output_tensor->Resize({batch_size, channels, outh, outw}); + + if (pooling_type == PoolMethod_MAX) { +#pragma omp parallel for + for (int batch = 0; batch < batch_size; batch++) { + for (int q = 0; q < channels; q++) { + float *outptr = output_tensor->mutable_data() + (batch * channels + q) * outw * outh; + + for (int i = 0; i < outh; i++) { + for (int j = 0; j < outw; j++) { + float val; + float max; + if (padding == PADDING_ZERO) { + max = 0.0; + for (int m = 0; m < kernel_size; m++) { + for (int n = 0; n < kernel_size; n++) { + if (i * stride - pad_top + m < 0 || j * stride - pad_left + n < 0 || + i * stride - pad_top + m >= h || j * stride - pad_left + n >= w) { + val = 0.0; + } else { + int index = (batch * channels + q) * w * h + w * (i * stride - pad_top + m) + j * stride - pad_left + n; + val = input_tensor->data()[index]; + } + max = std::max(max, val); + } + } + } else { + const float *sptr = input_tensor->data() + (batch * channels + q) * w * h + w * i * stride + j * stride; + max = sptr[0]; + for (int m = 0; m < kernel_size; m++) { + for (int n = 0; n < kernel_size; n++) { + val = sptr[w * m + n]; + max = std::max(max, val); + } + } + } + outptr[j] = max; + } + + outptr += outw; + } + } + } + } else if (pooling_type == PoolMethod_AVE) { +#pragma omp parallel for + for (int batch = 0; batch < batch_size; batch++) { + for (int q = 0; q < channels; q++) { + float *outptr = output_tensor->mutable_data() + (batch * channels + q) * outw * outh; + + for (int i = 0; i < outh; i++) { + for (int j = 0; j < outw; j++) { + float val; + float sum = 0.0; + if (padding == PADDING_ZERO) { + for (int m = 0; m < kernel_size; m++) { + for (int n = 0; n < kernel_size; n++) { + if (i * stride - pad_top + m < 0 || j * stride - pad_left + n < 0 || + i * stride - pad_top + m >= h || j * stride - pad_left + n >= w) { + val = 0.0; + } else { + int index = + (batch * channels + q) * w * h + w * (i * stride - pad_top + m) + j * stride - pad_left + n; + val = input_tensor->data()[index]; + } + sum += val; + } + } + } else { + const float *sptr = + input_tensor->data() + (batch * channels + q) * w * h + w * i * stride + j * stride; + for (int m = 0; m < kernel_size; m++) { + for (int n = 0; n < kernel_size; n++) { + val = sptr[w * m + n]; + sum += val; + } + } + } + outptr[j] = sum / (kernel_size * kernel_size); + } + + outptr += outw; + } + } + } + } + +} +} // namespace kernels +} // namespace mace + +#endif //MACE_KERNELS_POOLING_H diff --git a/mace/ops/BUILD b/mace/ops/BUILD index 1cd2f904..09a00ab3 100644 --- a/mace/ops/BUILD +++ b/mace/ops/BUILD @@ -5,7 +5,6 @@ package( default_visibility = ["//visibility:public"], ) - licenses(["notice"]) # Apache 2.0 load("//mace:mace.bzl", "if_android") @@ -14,11 +13,43 @@ cc_library( name = "ops", srcs = glob(["*.cc"]), hdrs = glob(["*.h"]), + copts = ["-std=c++11"], deps = [ + "//mace/core", + "//mace/kernels", "//mace/proto:cc_proto", - "//mace/core:core", - "//mace/kernels:kernels", ], - copts = ['-std=c++11'], alwayslink = 1, ) + +cc_test( + name = "relu_test", + srcs = ["relu_test.cc"], + copts = ["-std=c++11"], + linkopts = if_android([ + "-pie", + "-llog", + "-latomic", + ]), + linkstatic = 1, + deps = [ + ":ops", + "@gtest//:gtest_main", + ], +) + +cc_test( + name = "pooling_test", + srcs = ["pooling_test.cc"], + copts = ["-std=c++11"], + linkopts = if_android([ + "-pie", + "-llog", + "-latomic", + ]), + linkstatic = 1, + deps = [ + ":ops", + "@gtest//:gtest_main", + ], +) diff --git a/mace/ops/pooling.cc b/mace/ops/pooling.cc new file mode 100644 index 00000000..0804b043 --- /dev/null +++ b/mace/ops/pooling.cc @@ -0,0 +1,25 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + + +#include "mace/ops/pooling.h" +#include "mace/proto/mace.pb.h" +#include "mace/kernels/pooling.h" + +namespace mace { + +template <> +bool PoolingOp::Run() { + const Tensor* input_tensor = Input(0); + Tensor* output_tensor = Output(0); + int pooling_type = this->GetSingleArgument("pooling_type", 0); + int kernel_size = this->GetSingleArgument("kernel_size", 1); + int stride = this->GetSingleArgument("stride", 1); + int padding = this->GetSingleArgument("padding", 0); + kernels::PoolingFunction(input_tensor, output_tensor, pooling_type, kernel_size, stride, padding); + return true; +} +REGISTER_CPU_OPERATOR(Pooling, PoolingOp); + +} // namespace mace diff --git a/mace/ops/pooling.h b/mace/ops/pooling.h new file mode 100644 index 00000000..26415a49 --- /dev/null +++ b/mace/ops/pooling.h @@ -0,0 +1,22 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#ifndef MACE_OPS_POOLING_H_ +#define MACE_OPS_POOLING_H_ + +#include "mace/core/operator.h" + +namespace mace { + +template +class PoolingOp : public Operator { +public: + PoolingOp(const OperatorDef &operator_def, Workspace *ws) + : Operator(operator_def, ws) {} + bool Run() override; +}; + +} // namespace mace + +#endif //MACE_OPS_POOLING_H_ diff --git a/mace/ops/pooling_test.cc b/mace/ops/pooling_test.cc new file mode 100644 index 00000000..d782e6b9 --- /dev/null +++ b/mace/ops/pooling_test.cc @@ -0,0 +1,65 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#include "gtest/gtest.h" + +#include "mace/core/operator.h" +#include "mace/core/net.h" + +using namespace mace; + +TEST(PoolingTest, Pooling) { + OperatorRegistry* registry = gDeviceTypeRegistry()->at(DeviceType::CPU); + vector registry_keys = registry->Keys(); + for (auto& key: registry_keys) { + VLOG(0) << "registry_op: " << key; + } + + // Construct graph + OperatorDef op_def; + op_def.add_input("Input0"); + op_def.add_output("Output0"); + op_def.set_name("PoolingTest"); + op_def.set_type("Pooling"); + auto pooling_type = op_def.add_arg(); + pooling_type->set_name("pooling_type"); + pooling_type->set_i(0); + auto kernel_size = op_def.add_arg(); + kernel_size->set_name("kernel_size"); + kernel_size->set_i(2); + auto stride = op_def.add_arg(); + stride->set_name("stride"); + stride->set_i(2); + auto padding = op_def.add_arg(); + padding->set_name("padding"); + padding->set_i(0); + + NetDef net_def; + net_def.set_name("NetTest"); + net_def.add_op()->CopyFrom(op_def); + + VLOG(0) << net_def.DebugString(); + + // Create workspace and input tensor + Workspace ws; + Tensor* input = ws.CreateTensor("Input0", cpu_allocator(), DataType::DT_FLOAT); + Tensor* output = ws.CreateTensor("Output0", cpu_allocator(), DataType::DT_FLOAT); + input->Resize({2, 2, 4, 4}); + float* input_data = input->mutable_data(); + for (int i = 0; i < 64; ++i) { + input_data[i] = i; + } + + // Create Net & run + auto net = CreateNet(net_def, &ws, DeviceType::CPU); + net->Run(); + + for (int d :output->shape()){ + ASSERT_EQ(d, 2); + } + + ASSERT_FLOAT_EQ(output->data()[0], 5); + ASSERT_FLOAT_EQ(output->data()[3], 15); + ASSERT_FLOAT_EQ(output->data()[15], 63); +} \ No newline at end of file -- GitLab