pool_compute_test.cc 12.8 KB
Newer Older
Y
Yan Chunwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/kernels/arm/pool_compute.h"
#include <gtest/gtest.h>
#include <limits>
18
#include <memory>
Y
Yan Chunwei 已提交
19 20
#include <string>
#include <vector>
21
#include "lite/backends/arm/math/funcs.h"
Y
Yan Chunwei 已提交
22 23 24 25 26 27 28
#include "lite/core/op_registry.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace arm {

29 30 31 32 33 34
int PoolOutputSize(int input_size,
                   int filter_size,
                   int pad_left,
                   int pad_right,
                   int stride,
                   bool ceil_mode) {
Y
Yan Chunwei 已提交
35 36
  int output_size;
  if (!ceil_mode) {
37 38
    output_size =
        (input_size - filter_size + pad_left + pad_right) / stride + 1;
Y
Yan Chunwei 已提交
39 40
  } else {
    output_size =
41 42 43
        (input_size - filter_size + pad_left + pad_right + stride - 1) /
            stride +
        1;
Y
Yan Chunwei 已提交
44 45 46 47 48 49 50
  }
  return output_size;
}

std::vector<int64_t> compute_output_shape(operators::PoolParam* param_) {
  const auto x_dims = param_->x->dims();
  std::vector<int>& ksize = param_->ksize;
51
  auto paddings = *param_->paddings;
Y
Yan Chunwei 已提交
52 53 54
  if (param_->global_pooling) {
    ksize.resize(static_cast<size_t>(x_dims.size()) - 2);
    for (size_t i = 0; i < ksize.size(); ++i) {
55 56
      paddings[2 * i] = 0;
      paddings[2 * i + 1] = 0;
Y
Yan Chunwei 已提交
57 58 59 60 61 62 63 64 65 66 67 68
      ksize[i] = static_cast<int>(x_dims[i + 2]);
    }
  }

  std::vector<int64_t> output_shape({x_dims[0], x_dims[1]});
  if (param_->adaptive) {
    output_shape.insert(
        output_shape.end(), param_->ksize.begin(), param_->ksize.end());
  } else {
    for (size_t i = 0; i < param_->ksize.size(); ++i) {
      output_shape.push_back(PoolOutputSize(x_dims[i + 2],
                                            param_->ksize[i],
69 70
                                            paddings[2 * i],
                                            paddings[2 * i + 1],
Y
Yan Chunwei 已提交
71 72 73 74 75 76 77 78 79 80 81
                                            param_->strides[i],
                                            param_->ceil_mode));
    }
  }
  return output_shape;
}

void pool_compute_ref(const operators::PoolParam& param) {
  auto& in_dims = param.x->dims();
  auto& out_dims = param.output->dims();

82 83
  const float* din = param.x->data<const float>();
  float* dout = param.output->mutable_data<float>();
Y
Yan Chunwei 已提交
84 85 86

  std::vector<int> ksize = param.ksize;
  std::vector<int> strides = param.strides;
87
  std::vector<int> paddings = *param.paddings;
Y
Yan Chunwei 已提交
88 89 90 91 92 93 94 95 96

  std::string pooling_type = param.pooling_type;
  bool global_pooling = param.global_pooling;
  bool exclusive = param.exclusive;
  bool adaptive = param.adaptive;
  bool ceil_mode = param.ceil_mode;
  bool use_quantizer = param.use_quantizer;
  std::string data_format = param.data_format;

97 98 99 100
  int num = in_dims[0];
  int chin = in_dims[1];
  int hin = in_dims[2];
  int win = in_dims[3];
Y
Yan Chunwei 已提交
101

102 103 104
  int chout = out_dims[1];
  int hout = out_dims[2];
  int wout = out_dims[3];
Y
Yan Chunwei 已提交
105

106 107 108 109
  // no need to pad input tensor, border is zero pad inside this function
  memset(dout, 0, num * chout * hout * wout * sizeof(float));
  int kernel_h = ksize[0];
  int kernel_w = ksize[1];
Y
Yan Chunwei 已提交
110 111 112
  int stride_h = strides[0];
  int stride_w = strides[1];
  int pad_h = paddings[0];
113
  int pad_w = paddings[2];
114 115 116 117 118 119 120 121 122 123 124 125 126 127
  int size_channel_in = win * hin;
  int size_channel_out = wout * hout;
  if (global_pooling) {
    if (pooling_type == "max") {  // Pooling_max
      for (int n = 0; n < num; ++n) {
        float* dout_batch = dout + n * chout * size_channel_out;
        const float* din_batch = din + n * chin * size_channel_in;
#pragma omp parallel for
        for (int c = 0; c < chout; ++c) {
          const float* din_ch = din_batch + c * size_channel_in;  // in address
          float tmp1 = din_ch[0];
          for (int i = 0; i < size_channel_in; ++i) {
            float tmp2 = din_ch[i];
            tmp1 = tmp1 > tmp2 ? tmp1 : tmp2;
Y
Yan Chunwei 已提交
128
          }
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
          dout_batch[c] = tmp1;
        }
      }
    } else if (pooling_type == "avg") {
      // Pooling_average_include_padding
      // Pooling_average_exclude_padding
      for (int n = 0; n < num; ++n) {
        float* dout_batch = dout + n * chout * size_channel_out;
        const float* din_batch = din + n * chin * size_channel_in;
#pragma omp parallel for
        for (int c = 0; c < chout; ++c) {
          const float* din_ch = din_batch + c * size_channel_in;  // in address
          float sum = 0.f;
          for (int i = 0; i < size_channel_in; ++i) {
            sum += din_ch[i];
Y
Yan Chunwei 已提交
144
          }
145
          dout_batch[c] = sum / size_channel_in;
Y
Yan Chunwei 已提交
146 147
        }
      }
148 149
    } else {
      LOG(FATAL) << "unsupported pooling type: " << pooling_type;
Y
Yan Chunwei 已提交
150 151
    }
  } else {
152 153 154 155 156 157
    for (int ind_n = 0; ind_n < num; ++ind_n) {
#pragma omp parallel for
      for (int ind_c = 0; ind_c < chin; ++ind_c) {
        for (int ind_h = 0; ind_h < hout; ++ind_h) {
          int sh = ind_h * stride_h;
          int eh = sh + kernel_h;
Y
Yan Chunwei 已提交
158
          sh = (sh - pad_h) < 0 ? 0 : sh - pad_h;
159 160 161 162
          eh = (eh - pad_h) > hin ? hin : eh - pad_h;
          for (int ind_w = 0; ind_w < wout; ++ind_w) {
            int sw = ind_w * stride_w;
            int ew = sw + kernel_w;
Y
Yan Chunwei 已提交
163
            sw = (sw - pad_w) < 0 ? 0 : sw - pad_w;
164 165 166 167
            ew = (ew - pad_w) > win ? win : ew - pad_w;
            float result = static_cast<float>(0);
            int dst_ind = (ind_n * chout + ind_c) * size_channel_out +
                          ind_h * wout + ind_w;
Y
Yan Chunwei 已提交
168 169
            for (int kh = sh; kh < eh; ++kh) {
              for (int kw = sw; kw < ew; ++kw) {
170 171
                int src_ind =
                    (ind_n * chin + ind_c) * size_channel_in + kh * win + kw;
Y
Yan Chunwei 已提交
172
                if (kh == sh && kw == sw) {
173
                  result = din[src_ind];
Y
Yan Chunwei 已提交
174 175
                } else {
                  if (pooling_type == "max") {
176 177 178
                    result = result >= din[src_ind] ? result : din[src_ind];
                  } else if (pooling_type == "avg") {
                    result += din[src_ind];
Y
Yan Chunwei 已提交
179 180 181 182 183 184
                  }
                }
              }
            }
            if (pooling_type == "avg") {
              if (exclusive) {
185 186 187
                int div = (ew - sw) * (eh - sh);
                div = div > 0 ? div : 1;
                result /= div;
Y
Yan Chunwei 已提交
188
              } else {
189 190 191
                int bh = kernel_h;
                int bw = kernel_w;
                if (ew == win) {
192 193 194
                  bw = (sw + kernel_w) >= (win + paddings[3])
                           ? (win + paddings[3])
                           : (sw + kernel_w);
195
                  bw -= sw;
196 197
                  if ((sw - pad_w) < 0 &&
                      (sw + kernel_w) > (win + paddings[3])) {
198 199 200 201
                    bw += pad_w;
                  }
                }
                if (eh == hin) {
202 203 204
                  bh = (sh + kernel_h) >= (hin + paddings[1])
                           ? (hin + paddings[1])
                           : (sh + kernel_h);
205
                  bh -= sh;
206 207
                  if ((sh - pad_h) < 0 &&
                      (sh + kernel_h) > (hin + paddings[1])) {
208 209 210 211
                    bh += pad_h;
                  }
                }
                result /= bh * bw;
Y
Yan Chunwei 已提交
212 213
              }
            }
214
            dout[dst_ind] = result;
Y
Yan Chunwei 已提交
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
          }
        }
      }
    }
  }
}

TEST(pool_arm, init) {
  PoolCompute pool;
  ASSERT_EQ(pool.precision(), PRECISION(kFloat));
  ASSERT_EQ(pool.target(), TARGET(kARM));
}

TEST(pool_arm, compute) {
  PoolCompute pool;
  operators::PoolParam param;

  lite::Tensor x;
  lite::Tensor output;
  lite::Tensor output_ref;

  // speedup for ci
  for (auto pooling_type : {"max", "avg"}) {
    for (auto ceil_mode : {true, false}) {
      for (auto global_pooling : {true, false}) {
        for (auto exclusive : {true, false}) {
          for (auto ksize : {2, 3}) {
            for (auto stride : {1, 2}) {
243 244 245 246 247 248
              for (auto pad_left : {0, 1}) {
                for (auto pad_right : {0, 1}) {
                  for (auto pad_top : {0, 1}) {
                    for (auto pad_bottom : {0, 1}) {
                      for (auto n : {1, 2}) {
                        for (auto c : {1, 3}) {
Y
Yan Chunwei 已提交
249
#if 1
250 251
                          for (auto h : {2, 3, 4, 11}) {
                            for (auto w : {2, 3, 4, 11}) {
Y
Yan Chunwei 已提交
252
#else
253 254
                          for (int h = 2; h < 25; h++) {
                            for (int w = 2; w < 25; w++) {
Y
Yan Chunwei 已提交
255
#endif
256 257 258 259 260 261 262 263 264 265 266
                              VLOG(3) << "n:" << n << " c:" << c << " h:" << h
                                      << " w:" << w << " ksize:" << ksize
                                      << " stride:" << stride
                                      << " pad_left:" << pad_left
                                      << " pad_right:" << pad_right
                                      << " pad_top:" << pad_top
                                      << " pad_bottom:" << pad_bottom
                                      << " exclusive:" << exclusive
                                      << " global_pooling:" << global_pooling
                                      << " ceil_mode: " << ceil_mode
                                      << " pooling_type:" << pooling_type;
Y
Yan Chunwei 已提交
267

268 269 270 271 272 273 274 275
                              // init x, output
                              x.Resize(
                                  DDim(std::vector<int64_t>({n, c, h, w})));
                              auto* x_data = x.mutable_data<float>();
                              for (int i = 0; i < x.dims().production(); ++i) {
                                float sign = i % 3 == 0 ? -0.03 : 0.05f;
                                x_data[i] = sign * (i % 128);
                              }
Y
Yan Chunwei 已提交
276

277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
                              // fill param
                              param.x = &x;
                              param.output = &output;
                              param.pooling_type = pooling_type;
                              if (global_pooling) {
                                param.ksize = {h, w};
                              } else {
                                param.ksize = {ksize, ksize};
                              }
                              param.global_pooling = global_pooling;
                              param.strides = {stride, stride};
                              std::vector<int> paddings = {
                                  pad_top, pad_bottom, pad_left, pad_right};
                              param.exclusive = exclusive;
                              param.paddings =
                                  std::make_shared<std::vector<int>>(paddings);
                              param.ceil_mode = ceil_mode;
                              param.adaptive = false;
                              param.use_quantizer = false;
Y
Yan Chunwei 已提交
296

297 298 299 300
                              const std::vector<int64_t>& output_shape =
                                  compute_output_shape(&param);
                              output.Resize(DDim(output_shape));
                              output_ref.Resize(DDim(output_shape));
Y
Yan Chunwei 已提交
301

302 303 304 305 306 307 308 309
                              auto* output_data = output.mutable_data<float>();
                              auto* output_ref_data =
                                  output_ref.mutable_data<float>();
                              for (int i = 0; i < output.dims().production();
                                   ++i) {
                                output_data[i] = -2;
                                output_ref_data[i] = -2;
                              }
Y
Yan Chunwei 已提交
310

311 312 313
                              // compute
                              pool.SetParam(param);
                              pool.Run();
Y
Yan Chunwei 已提交
314

315 316 317
                              // compute ref
                              param.output = &output_ref;
                              pool_compute_ref(param);
Y
Yan Chunwei 已提交
318

319 320 321 322 323 324 325 326 327
                              // compare
                              for (int i = 0; i < output.dims().production();
                                   i++) {
                                EXPECT_NEAR(
                                    output_data[i], output_ref_data[i], 1e-4);
                              }
                              VLOG(3) << "compare pass";
                            }
                          }
Y
Yan Chunwei 已提交
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
                        }
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

TEST(pool_arm, retrive_op) {
  auto pool = KernelRegistry::Global().Create<TARGET(kARM), PRECISION(kFloat)>(
      "pool2d");
  ASSERT_FALSE(pool.empty());
  ASSERT_TRUE(pool.front());
}

}  // namespace arm
}  // namespace kernels
}  // namespace lite
}  // namespace paddle

USE_LITE_KERNEL(pool2d, kARM, kFloat, kNCHW, def);