提交 048f7a8c 编写于 作者: L Liangliang He

Merge branch 'feature_wuch' into 'master'

fix pooling op

See merge request !27
......@@ -51,13 +51,13 @@ class Conv2dFunctor {
MACE_CHECK(batch == input_batch, "Input/Output batch size mismatch");
// The left-upper most offset of the padded input
int padded_h_start = 0 - paddings_[0];
int padded_w_start = 0 - paddings_[1];
int padded_h_stop = input_height + paddings_[0];
int padded_w_stop = input_width + paddings_[1];
int padded_h_start = 0 - paddings_[0] / 2;
int padded_w_start = 0 - paddings_[1] / 2;
int padded_h_stop = input_height + paddings_[0] - paddings_[0] / 2;
int padded_w_stop = input_width + paddings_[1] - paddings_[1] / 2;
#pragma omp parallel for collpse(2)
for (int n = 0; n < batch; ++n) {
#pragma omp parallel for
for (int c = 0; c < channels; ++c) {
for (int h = 0; h < height; ++h) {
for (int w = 0; w < width; ++w) {
......
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_KERNELS_POOLING_H
#define MACE_KERNELS_POOLING_H
#include <limits>
#include "mace/core/tensor.h"
namespace mace {
enum PoolingType {
AVG = 1, // avg_pool
MAX = 2, // max_pool
};
namespace kernels {
template<DeviceType D, typename T>
class PoolingFunctor {
public:
PoolingFunctor(const PoolingType pooling_type,
const int* kernels,
const int* strides,
const int* paddings,
const int* dilations)
: pooling_type_(pooling_type),
kernels_(kernels),
strides_(strides),
paddings_(paddings),
dilations_(dilations) {}
void operator()(const T* input,
const index_t* input_shape,
T* output,
const index_t* output_shape) {
index_t batch = output_shape[0];
index_t channels = output_shape[1];
index_t height = output_shape[2];
index_t width = output_shape[3];
index_t input_channels = input_shape[1];
index_t input_height = input_shape[2];
index_t input_width = input_shape[3];
int kernel_h = kernels_[0];
int kernel_w = kernels_[1];
int stride_h = strides_[0];
int stride_w = strides_[1];
int dilation_h = dilations_[0];
int dilation_w = dilations_[1];
// The left-upper most offset of the padded input
int padded_h_start = 0 - paddings_[0] / 2;
int padded_w_start = 0 - paddings_[1] / 2;
int padded_h_stop = input_height + paddings_[0] - paddings_[0] / 2;
int padded_w_stop = input_width + paddings_[1] - paddings_[0] / 2;
#pragma omp parallel for collpse(2)
for (int n = 0; n < batch; ++n) {
for (int c = 0; c < channels; ++c) {
index_t out_offset = n * channels * height * width +
c * height * width;
index_t in_offset = n * input_channels * input_height * input_width +
c * input_height * input_width;
for (int h = 0; h < height; ++h) {
for (int w = 0; w < width; ++w) {
T sum_or_max = 0;
switch (pooling_type_) {
case AVG:
break;
case MAX:
sum_or_max = std::numeric_limits<T>::lowest();
break;
default:
MACE_CHECK(false, "Unsupported pooling type: ", pooling_type_);
}
for (int kh = 0; kh < kernel_h; ++kh) {
for (int kw = 0; kw < kernel_w; ++kw) {
int inh = padded_h_start + h * stride_h + dilation_h * kh;
int inw = padded_w_start + w * stride_w + dilation_w * kw;
if (inh >= 0 && inh < input_height &&
inw >= 0 && inw < input_width) {
index_t input_offset = in_offset +
inh * input_width + inw;
switch (pooling_type_) {
case AVG:
sum_or_max += input[input_offset];
break;
case MAX:
sum_or_max = std::max(sum_or_max, input[input_offset]);
break;
default:
MACE_CHECK(false, "Unsupported pooling type: ",
pooling_type_);
}
}
}
}
switch (pooling_type_) {
case AVG:
output[out_offset] = sum_or_max / (kernel_h * kernel_w);
break;
case MAX:
output[out_offset] = sum_or_max;
break;
default:
MACE_CHECK(false, "Unsupported pooling type: ", pooling_type_);
}
out_offset += 1;
}
}
}
}
}
private:
const PoolingType pooling_type_;
const int* kernels_;
const int* strides_;
const int* paddings_;
const int* dilations_;
};
} // namespace kernels
} // namespace mace
#endif //MACE_KERNELS_POOLING_H
......@@ -41,9 +41,16 @@ cc_library(
)
cc_test(
name = "batch_norm_test",
srcs = ["batch_norm_test.cc"],
name = "ops_test",
srcs = glob(
["*_test.cc"],
),
copts = ["-std=c++11"],
linkopts = if_android([
"-pie",
"-llog",
"-latomic",
]),
linkstatic = 1,
deps = [
":ops",
......
......@@ -16,7 +16,7 @@ namespace mace {
template<DeviceType D, typename T>
class Conv2dOp : public ConvPool2dOpBase<D, T> {
public:
Conv2dOp(const OperatorDef &op_def, Workspace *ws)
Conv2dOp(const OperatorDef& op_def, Workspace* ws)
: ConvPool2dOpBase<D, T>(op_def, ws) {};
bool Run() override {
......@@ -27,7 +27,10 @@ class Conv2dOp : public ConvPool2dOpBase<D, T> {
std::vector<index_t> output_shape;
std::vector<int> paddings;
this->CalcPaddingAndOutputSize(input, filter, &output_shape, &paddings);
this->CalcPaddingAndOutputSize(input->shape().data(),
filter->shape().data(),
&output_shape,
&paddings);
output->Resize(output_shape);
auto conv2d = kernels::Conv2dFunctor<D, T>(this->strides_.data(),
......
......@@ -21,7 +21,7 @@ TEST_F(Conv2dOpTest, Simple_VALID) {
// Add args
AddIntsArg("strides", {1, 1});
AddIntArg("padding", static_cast<int>(Conv2dOp<DeviceType::CPU, float>::Padding::VALID));
AddIntArg("padding", Padding::VALID);
AddIntsArg("dilations", {1, 1});
// Add input data
......@@ -58,7 +58,7 @@ TEST_F(Conv2dOpTest, Simple_SAME) {
// Add args
AddIntsArg("strides", {1, 1});
AddIntArg("padding", static_cast<int>(Conv2dOp<DeviceType::CPU, float>::Padding::SAME));
AddIntArg("padding", Padding::SAME);
AddIntsArg("dilations", {1, 1});
// Add input data
......@@ -98,7 +98,7 @@ TEST_F(Conv2dOpTest, Combined) {
// Add args
AddIntsArg("strides", {2, 2});
AddIntArg("padding", static_cast<int>(Conv2dOp<DeviceType::CPU, float>::Padding::SAME));
AddIntArg("padding", Padding::SAME);
AddIntsArg("dilations", {1, 1});
// Add input data
......
......@@ -9,10 +9,16 @@
namespace mace {
enum Padding {
VALID = 0, // No padding
SAME = 1, // Pads with half the filter size (rounded down) on both sides
FULL = 2, // Pads with one less than the filter size on both sides
};
template<DeviceType D, class T>
class ConvPool2dOpBase : public Operator<D, T> {
public:
ConvPool2dOpBase(const OperatorDef &op_def, Workspace *ws)
ConvPool2dOpBase(const OperatorDef& op_def, Workspace* ws)
: Operator<D, T>(op_def, ws),
strides_(OperatorBase::GetRepeatedArgument<int>("strides")),
padding_(static_cast<Padding>(
......@@ -20,58 +26,65 @@ class ConvPool2dOpBase : public Operator<D, T> {
static_cast<int>(SAME)))),
dilations_(OperatorBase::GetRepeatedArgument<int>("dilations")) {}
void CalcPaddingAndOutputSize(const Tensor* input,
const Tensor* filter,
void CalcPaddingAndOutputSize(const index_t* input_shape, // NCHW
const index_t* filter_shape, // HWIO
std::vector<index_t>* output_shape,
std::vector<int>* padding_size) {
MACE_CHECK(dilations_[0] > 0 && dilations_[1] > 0,
"Invalid dilations, must >= 1");
"Invalid dilations, must >= 1");
MACE_CHECK((dilations_[0] == 1 || strides_[0] == 1) &&
(dilations_[1] == 1 || strides_[1] == 1),
"If dilations > 1, strides should be 1");
/*
* Convlution/pooling arithmetic:
* o = (i + 2 * p - k - (k - 1) * (d - 1)) / s + 1
* For details, see https://arxiv.org/pdf/1603.07285.pdf or
* http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html
*/
auto& input_shape = input->shape();
auto& filter_shape = filter->shape(); // HWIO
int kernel_h = filter_shape[0];
int kernel_w = filter_shape[1];
int output_channel = filter_shape[3];
MACE_CHECK(input_shape[1] == filter_shape[2],
input_shape[1], " != ", filter_shape[2]);
* Convlution/pooling arithmetic:
* o = (i + 2 * p - k - (k - 1) * (d - 1)) / s + 1
* For details, see https://arxiv.org/pdf/1603.07285.pdf or
* http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html
*/
*padding_size = {0, 0};
index_t output_height, output_width;
index_t kernel_height = filter_shape[0];
index_t kernel_width = filter_shape[1];
index_t output_channels = filter_shape[3];
int k_extent_height = (kernel_height - 1) * dilations_[0] + 1;
int k_extent_width = (kernel_width - 1) * dilations_[1] + 1;
switch (padding_) {
case VALID:
output_height = (input_shape[2] - k_extent_height) / strides_[0] + 1;
output_width = (input_shape[3] - k_extent_width) / strides_[1] + 1;
break;
case SAME:
(*padding_size)[0] = kernel_h / 2;
(*padding_size)[1] = kernel_w / 2;
output_height = (input_shape[2] - 1) / strides_[0] + 1;
output_width = (input_shape[3] - 1) / strides_[1] + 1;
break;
case FULL:
(*padding_size)[0] = kernel_h - 1;
(*padding_size)[1] = kernel_w - 1;
output_height = (input_shape[2] + k_extent_height - 2) / strides_[0] + 1;
output_width = (input_shape[3] + k_extent_width - 2) / strides_[1] + 1;
break;
default:
MACE_CHECK(false, "Unsupported padding type: ", padding_);
MACE_CHECK(false, "Unsupported padding type: ", this->padding_);
}
// Note: TensorFlow may padded one more on the right/bottom side
// TODO may be it's better to also truncate the left/top to
// utilize the more centered features. We need to benchmark
// based on the model accuracy.
(*padding_size)[0] = (output_height - 1) * strides_[0] +
k_extent_height - input_shape[2];
(*padding_size)[1] = (output_width - 1) * strides_[1] +
k_extent_width - input_shape[3];
*output_shape = std::vector<index_t>(4); // NCHW
(*output_shape)[0] = input_shape[0];
(*output_shape)[1] = output_channel;
(*output_shape)[2] = (input_shape[2] + 2 * (*padding_size)[0] - kernel_h -
(kernel_h - 1) * (dilations_[0] - 1)) /
strides_[0] + 1;
(*output_shape)[3] = (input_shape[3] + 2 * (*padding_size)[1] - kernel_w -
(kernel_w - 1) * (dilations_[1] - 1)) /
strides_[1] + 1;
(*output_shape)[1] = output_channels;
(*output_shape)[2] = output_height;
(*output_shape)[3] = output_width;
}
enum Padding {
VALID = 0, // No padding
SAME = 1, // Pads with half the filter size (rounded down) on both sides
FULL = 2, // Pads with one less than the filter size on both sides
};
protected:
std::vector<int> strides_;
Padding padding_;
......
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/ops/pooling.h"
#include "mace/proto/mace.pb.h"
#include "mace/kernels/pooling.h"
namespace mace {
REGISTER_CPU_OPERATOR(Pooling, PoolingOp<DeviceType::CPU, float>);
} // namespace mace
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_OPS_POOLING_H_
#define MACE_OPS_POOLING_H_
#include "mace/core/operator.h"
#include "mace/ops/conv_pool_2d_base.h"
#include "mace/kernels/pooling.h"
namespace mace {
template<DeviceType D, class T>
class PoolingOp : public ConvPool2dOpBase<D, T> {
public:
PoolingOp(const OperatorDef& op_def, Workspace* ws)
: ConvPool2dOpBase<D, T>(op_def, ws),
kernels_(OperatorBase::GetRepeatedArgument<int>("kernels")),
pooling_type_(static_cast<PoolingType>(
OperatorBase::GetSingleArgument<int>(
"pooling_type", static_cast<int>(AVG)))) {};
bool Run() override{
const Tensor* input = this->Input(INPUT);
Tensor* output = this->Output(OUTPUT);
std::vector<index_t> in_shape = input->shape();
std::vector<index_t> output_shape;
std::vector<int> paddings;
std::vector<index_t> filter_shape = std::vector<index_t>(4);
filter_shape[0] = kernels_[0];
filter_shape[1] = kernels_[1];
filter_shape[2] = in_shape[0];
filter_shape[3] = in_shape[1];
this->CalcPaddingAndOutputSize(in_shape.data(), filter_shape.data(),
&output_shape, &paddings);
output->Resize(output_shape);
auto pooling_func = kernels::PoolingFunctor<D, T>(pooling_type_,
kernels_.data(),
this->strides_.data(),
paddings.data(),
this->dilations_.data());
pooling_func(input->data<float>(),
in_shape.data(),
output->mutable_data<float>(),
output->shape().data());
return true;
};
protected:
PoolingType pooling_type_;
std::vector<int> kernels_;
OP_INPUT_TAGS(INPUT);
OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace mace
#endif //MACE_OPS_POOLING_H_
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "gtest/gtest.h"
#include "mace/core/operator.h"
#include "mace/core/net.h"
#include "mace/ops/ops_test_util.h"
#include "mace/ops/conv_pool_2d_base.h"
#include "mace/kernels/pooling.h"
using namespace mace;
class PoolingOpTest : public OpsTestBase {};
TEST_F(PoolingOpTest, MAX_VALID) {
// Construct graph
OpDefBuilder("Pooling", "PoolingTest")
.Input("Input")
.Output("Output")
.Finalize(operator_def());
// Add args
AddIntsArg("kernels", {2, 2});
AddIntsArg("strides", {2, 2});
AddIntArg("padding", Padding::VALID);
AddIntsArg("dilations", {1, 1});
AddIntArg("pooling_type", PoolingType::MAX);
// Add input data
AddInputFromArray<float>("Input", {1, 2, 4, 4},
{0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, 14, 15,
16, 17, 18, 19,
20, 21, 22, 23,
24, 25, 26, 27,
28, 29, 30, 31});
// Run
RunOp();
// Check
Tensor expected = CreateTensor<float>({1, 2, 2, 2},
{5, 7, 13, 15, 21, 23, 29, 31});
ExpectTensorNear<float>(expected, *GetOutput("Output"), 0.001);
}
TEST_F(PoolingOpTest, AVG_VALID) {
// Construct graph
OpDefBuilder("Pooling", "PoolingTest")
.Input("Input")
.Output("Output")
.Finalize(operator_def());
// Add args
AddIntsArg("kernels", {2, 2});
AddIntsArg("strides", {2, 2});
AddIntArg("padding", Padding::VALID);
AddIntsArg("dilations", {1, 1});
AddIntArg("pooling_type", PoolingType::AVG);
// Add input data
AddInputFromArray<float>("Input", {1, 2, 4, 4},
{0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, 14, 15,
16, 17, 18, 19,
20, 21, 22, 23,
24, 25, 26, 27,
28, 29, 30, 31});
// Run
RunOp();
// Check
Tensor expected = CreateTensor<float>({1, 2, 2, 2},
{2.5, 4.5, 10.5, 12.5, 18.5, 20.5, 26.5, 28.5});
ExpectTensorNear<float>(expected, *GetOutput("Output"), 0.001);
}
TEST_F(PoolingOpTest, MAX_SAME) {
// Construct graph
OpDefBuilder("Pooling", "PoolingTest")
.Input("Input")
.Output("Output")
.Finalize(operator_def());
// Add args
AddIntsArg("kernels", {2, 2});
AddIntsArg("strides", {2, 2});
AddIntArg("padding", Padding::SAME);
AddIntsArg("dilations", {1, 1});
AddIntArg("pooling_type", PoolingType::MAX);
// Add input data
AddInputFromArray<float>("Input", {1, 1, 3, 3},
{0, 1, 2,
3, 4, 5,
6, 7, 8});
// Run
RunOp();
// Check
Tensor expected = CreateTensor<float>({1, 1, 2, 2},
{4, 5, 7, 8});
ExpectTensorNear<float>(expected, *GetOutput("Output"), 0.001);
}
TEST_F(PoolingOpTest, MAX_VALID_DILATION) {
// Construct graph
OpDefBuilder("Pooling", "PoolingTest")
.Input("Input")
.Output("Output")
.Finalize(operator_def());
// Add args
AddIntsArg("kernels", {2, 2});
AddIntsArg("strides", {1, 1});
AddIntArg("padding", Padding::VALID);
AddIntsArg("dilations", {2, 2});
AddIntArg("pooling_type", PoolingType::MAX);
// Add input data
AddInputFromArray<float>("Input", {1, 1, 4, 4},
{0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, 14, 15});
// Run
RunOp();
// Check
Tensor expected = CreateTensor<float>({1, 1, 2, 2},
{10, 11, 14, 15});
ExpectTensorNear<float>(expected, *GetOutput("Output"), 0.001);
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册