提交 1e51497a 编写于 作者: 吴承辉

Merge branch 'feature_wuch' into 'master'

fix pooling op

See merge request !45
......@@ -61,6 +61,8 @@ void PoolingMaxNeonK2x2S2x2(const float *input,
}
}
w += num_vectors << 2;
for (; num_vectors > 0; --num_vectors) {
float32x4_t r00 = vld1q_f32(r0);
float32x4_t r10 = vld1q_f32(r1);
......@@ -79,7 +81,6 @@ void PoolingMaxNeonK2x2S2x2(const float *input,
outptr += 4;
}
w += num_vectors << 2;
for (; w < out_width; ++w) {
float max = std::numeric_limits<float>::lowest();
for (int kh = 0; kh < 2; ++kh) {
......
......@@ -71,6 +71,7 @@ void PoolingMaxNeonK3x3S2x2(const float *input,
}
}
w += num_vectors << 2;
float32x4x2_t row0 = vld2q_f32(r0);
float32x4x2_t row1 = vld2q_f32(r1);
float32x4x2_t row2 = vld2q_f32(r2);
......@@ -105,7 +106,6 @@ void PoolingMaxNeonK3x3S2x2(const float *input,
outptr += 4;
}
w += num_vectors << 2;
for (; w < out_width; ++w) {
float max = std::numeric_limits<float>::lowest();
for (int kh = 0; kh < 3; ++kh) {
......
......@@ -39,10 +39,12 @@ class PoolingFunctor {
index_t channels = output_shape[1];
index_t height = output_shape[2];
index_t width = output_shape[3];
index_t out_image_size = height * width;
index_t input_channels = input_shape[1];
index_t input_height = input_shape[2];
index_t input_width = input_shape[3];
index_t in_image_size = input_height * input_width;
int kernel_h = kernels_[0];
int kernel_w = kernels_[1];
......@@ -57,56 +59,55 @@ class PoolingFunctor {
int padded_h_start = 0 - paddings_[0] / 2;
int padded_w_start = 0 - paddings_[1] / 2;
if (pooling_type_ == MAX) {
#pragma omp parallel for collapse(2)
for (int n = 0; n < batch; ++n) {
for (int c = 0; c < channels; ++c) {
index_t out_offset = n * channels * height * width + c * height * width;
index_t in_offset = n * input_channels * input_height * input_width +
c * input_height * input_width;
for (int h = 0; h < height; ++h) {
for (int w = 0; w < width; ++w) {
T sum_or_max = 0;
switch (pooling_type_) {
case AVG:
break;
case MAX:
sum_or_max = std::numeric_limits<T>::lowest();
break;
default:
MACE_CHECK(false, "Unsupported pooling type: ", pooling_type_);
}
for (int kh = 0; kh < kernel_h; ++kh) {
for (int kw = 0; kw < kernel_w; ++kw) {
int inh = padded_h_start + h * stride_h + dilation_h * kh;
int inw = padded_w_start + w * stride_w + dilation_w * kw;
if (inh >= 0 && inh < input_height && inw >= 0 &&
inw < input_width) {
index_t input_offset = in_offset + inh * input_width + inw;
switch (pooling_type_) {
case AVG:
sum_or_max += input[input_offset];
break;
case MAX:
sum_or_max = std::max(sum_or_max, input[input_offset]);
break;
default:
MACE_CHECK(false, "Unsupported pooling type: ",
pooling_type_);
for (int b = 0; b < batch; ++b) {
for (int c = 0; c < channels; ++c) {
index_t out_offset = (b * channels + c) * out_image_size;
index_t in_offset = (b * input_channels + c) * in_image_size;
for (int h = 0; h < height; ++h) {
for (int w = 0; w < width; ++w) {
T max = std::numeric_limits<T>::lowest();
for (int kh = 0; kh < kernel_h; ++kh) {
for (int kw = 0; kw < kernel_w; ++kw) {
int inh = padded_h_start + h * stride_h + dilation_h * kh;
int inw = padded_w_start + w * stride_w + dilation_w * kw;
if (inh >= 0 && inh < input_height && inw >= 0 &&
inw < input_width) {
index_t input_offset = in_offset + inh * input_width + inw;
max = std::max(max, input[input_offset]);
}
}
}
output[out_offset] = max;
out_offset += 1;
}
switch (pooling_type_) {
case AVG:
output[out_offset] = sum_or_max / (kernel_h * kernel_w);
break;
case MAX:
output[out_offset] = sum_or_max;
break;
default:
MACE_CHECK(false, "Unsupported pooling type: ", pooling_type_);
}
}
}
} else if (pooling_type_ == AVG) {
#pragma omp parallel for collapse(2)
for (int b = 0; b < batch; ++b) {
for (int c = 0; c < channels; ++c) {
index_t out_offset = (b * channels + c) * out_image_size;
index_t in_offset = (b * input_channels + c) * in_image_size;
for (int h = 0; h < height; ++h) {
for (int w = 0; w < width; ++w) {
T sum = 0;
for (int kh = 0; kh < kernel_h; ++kh) {
for (int kw = 0; kw < kernel_w; ++kw) {
int inh = padded_h_start + h * stride_h + dilation_h * kh;
int inw = padded_w_start + w * stride_w + dilation_w * kw;
if (inh >= 0 && inh < input_height && inw >= 0 &&
inw < input_width) {
index_t input_offset = in_offset + inh * input_width + inw;
sum += input[input_offset];
}
}
}
output[out_offset] = sum / (kernel_h * kernel_w);
out_offset += 1;
}
out_offset += 1;
}
}
}
......
......@@ -24,16 +24,17 @@ class PoolingOp : public ConvPool2dOpBase<D, T> {
bool Run() override {
const Tensor* input = this->Input(INPUT);
Tensor* output = this->Output(OUTPUT);
std::vector<index_t> in_shape = input->shape();
std::vector<index_t> output_shape(4);
std::vector<int> paddings(2);
std::vector<index_t> filter_shape = std::vector<index_t>(4);
filter_shape[0] = in_shape[1];
filter_shape[1] = in_shape[0];
std::vector<index_t> filter_shape(4);
filter_shape[0] = input->shape()[1];
filter_shape[1] = input->shape()[0];
filter_shape[2] = kernels_[0];
filter_shape[3] = kernels_[1];
kernels::CalcPaddingAndOutputSize(in_shape.data(), filter_shape.data(),
kernels::CalcPaddingAndOutputSize(input->shape().data(),
filter_shape.data(),
this->dilations_.data(),
this->strides_.data(), this->padding_,
output_shape.data(), paddings.data());
......@@ -42,7 +43,7 @@ class PoolingOp : public ConvPool2dOpBase<D, T> {
auto pooling_func = kernels::PoolingFunctor<D, T>(
pooling_type_, kernels_.data(), this->strides_.data(), paddings.data(),
this->dilations_.data());
pooling_func(input->data<float>(), in_shape.data(),
pooling_func(input->data<float>(), input->shape().data(),
output->mutable_data<float>(), output->shape().data());
return true;
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册