pool_compute_test.cc 12.8 KB
Newer Older
Y
Yan Chunwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lite/kernels/arm/pool_compute.h"
#include <gtest/gtest.h>
#include <limits>
18
#include <memory>
Y
Yan Chunwei 已提交
19 20
#include <string>
#include <vector>
21
#include "lite/backends/arm/math/funcs.h"
Y
Yan Chunwei 已提交
22 23 24 25 26 27 28
#include "lite/core/op_registry.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace arm {

29 30 31 32 33 34
int PoolOutputSize(int input_size,
                   int filter_size,
                   int pad_left,
                   int pad_right,
                   int stride,
                   bool ceil_mode) {
Y
Yan Chunwei 已提交
35 36
  int output_size;
  if (!ceil_mode) {
37 38
    output_size =
        (input_size - filter_size + pad_left + pad_right) / stride + 1;
Y
Yan Chunwei 已提交
39 40
  } else {
    output_size =
41 42 43
        (input_size - filter_size + pad_left + pad_right + stride - 1) /
            stride +
        1;
Y
Yan Chunwei 已提交
44 45 46 47 48 49 50
  }
  return output_size;
}

std::vector<int64_t> compute_output_shape(operators::PoolParam* param_) {
  const auto x_dims = param_->x->dims();
  std::vector<int>& ksize = param_->ksize;
51
  auto paddings = *param_->paddings;
Y
Yan Chunwei 已提交
52 53 54
  if (param_->global_pooling) {
    ksize.resize(static_cast<size_t>(x_dims.size()) - 2);
    for (size_t i = 0; i < ksize.size(); ++i) {
55 56
      paddings[2 * i] = 0;
      paddings[2 * i + 1] = 0;
Y
Yan Chunwei 已提交
57 58 59 60 61 62 63 64 65 66 67 68
      ksize[i] = static_cast<int>(x_dims[i + 2]);
    }
  }

  std::vector<int64_t> output_shape({x_dims[0], x_dims[1]});
  if (param_->adaptive) {
    output_shape.insert(
        output_shape.end(), param_->ksize.begin(), param_->ksize.end());
  } else {
    for (size_t i = 0; i < param_->ksize.size(); ++i) {
      output_shape.push_back(PoolOutputSize(x_dims[i + 2],
                                            param_->ksize[i],
69 70
                                            paddings[2 * i],
                                            paddings[2 * i + 1],
Y
Yan Chunwei 已提交
71 72 73 74 75 76 77 78 79 80 81
                                            param_->strides[i],
                                            param_->ceil_mode));
    }
  }
  return output_shape;
}

void pool_compute_ref(const operators::PoolParam& param) {
  auto& in_dims = param.x->dims();
  auto& out_dims = param.output->dims();

82 83
  const float* din = param.x->data<const float>();
  float* dout = param.output->mutable_data<float>();
Y
Yan Chunwei 已提交
84 85 86

  std::vector<int> ksize = param.ksize;
  std::vector<int> strides = param.strides;
87
  std::vector<int> paddings = *param.paddings;
Y
Yan Chunwei 已提交
88 89 90 91 92 93 94 95 96

  std::string pooling_type = param.pooling_type;
  bool global_pooling = param.global_pooling;
  bool exclusive = param.exclusive;
  bool adaptive = param.adaptive;
  bool ceil_mode = param.ceil_mode;
  bool use_quantizer = param.use_quantizer;
  std::string data_format = param.data_format;

97 98 99 100
  int num = in_dims[0];
  int chin = in_dims[1];
  int hin = in_dims[2];
  int win = in_dims[3];
Y
Yan Chunwei 已提交
101

102 103 104
  int chout = out_dims[1];
  int hout = out_dims[2];
  int wout = out_dims[3];
Y
Yan Chunwei 已提交
105

106 107 108 109
  // no need to pad input tensor, border is zero pad inside this function
  memset(dout, 0, num * chout * hout * wout * sizeof(float));
  int kernel_h = ksize[0];
  int kernel_w = ksize[1];
Y
Yan Chunwei 已提交
110 111 112
  int stride_h = strides[0];
  int stride_w = strides[1];
  int pad_h = paddings[0];
113
  int pad_w = paddings[2];
114 115 116 117 118 119 120 121 122 123 124 125 126 127
  int size_channel_in = win * hin;
  int size_channel_out = wout * hout;
  if (global_pooling) {
    if (pooling_type == "max") {  // Pooling_max
      for (int n = 0; n < num; ++n) {
        float* dout_batch = dout + n * chout * size_channel_out;
        const float* din_batch = din + n * chin * size_channel_in;
#pragma omp parallel for
        for (int c = 0; c < chout; ++c) {
          const float* din_ch = din_batch + c * size_channel_in;  // in address
          float tmp1 = din_ch[0];
          for (int i = 0; i < size_channel_in; ++i) {
            float tmp2 = din_ch[i];
            tmp1 = tmp1 > tmp2 ? tmp1 : tmp2;
Y
Yan Chunwei 已提交
128
          }
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
          dout_batch[c] = tmp1;
        }
      }
    } else if (pooling_type == "avg") {
      // Pooling_average_include_padding
      // Pooling_average_exclude_padding
      for (int n = 0; n < num; ++n) {
        float* dout_batch = dout + n * chout * size_channel_out;
        const float* din_batch = din + n * chin * size_channel_in;
#pragma omp parallel for
        for (int c = 0; c < chout; ++c) {
          const float* din_ch = din_batch + c * size_channel_in;  // in address
          float sum = 0.f;
          for (int i = 0; i < size_channel_in; ++i) {
            sum += din_ch[i];
Y
Yan Chunwei 已提交
144
          }
145
          dout_batch[c] = sum / size_channel_in;
Y
Yan Chunwei 已提交
146 147
        }
      }
148 149
    } else {
      LOG(FATAL) << "unsupported pooling type: " << pooling_type;
Y
Yan Chunwei 已提交
150 151
    }
  } else {
152 153 154 155 156 157
    for (int ind_n = 0; ind_n < num; ++ind_n) {
#pragma omp parallel for
      for (int ind_c = 0; ind_c < chin; ++ind_c) {
        for (int ind_h = 0; ind_h < hout; ++ind_h) {
          int sh = ind_h * stride_h;
          int eh = sh + kernel_h;
Y
Yan Chunwei 已提交
158
          sh = (sh - pad_h) < 0 ? 0 : sh - pad_h;
159 160 161 162
          eh = (eh - pad_h) > hin ? hin : eh - pad_h;
          for (int ind_w = 0; ind_w < wout; ++ind_w) {
            int sw = ind_w * stride_w;
            int ew = sw + kernel_w;
Y
Yan Chunwei 已提交
163
            sw = (sw - pad_w) < 0 ? 0 : sw - pad_w;
164 165 166 167
            ew = (ew - pad_w) > win ? win : ew - pad_w;
            float result = static_cast<float>(0);
            int dst_ind = (ind_n * chout + ind_c) * size_channel_out +
                          ind_h * wout + ind_w;
Y
Yan Chunwei 已提交
168 169
            for (int kh = sh; kh < eh; ++kh) {
              for (int kw = sw; kw < ew; ++kw) {
170 171
                int src_ind =
                    (ind_n * chin + ind_c) * size_channel_in + kh * win + kw;
Y
Yan Chunwei 已提交
172
                if (kh == sh && kw == sw) {
173
                  result = din[src_ind];
Y
Yan Chunwei 已提交
174 175
                } else {
                  if (pooling_type == "max") {
176 177 178
                    result = result >= din[src_ind] ? result : din[src_ind];
                  } else if (pooling_type == "avg") {
                    result += din[src_ind];
Y
Yan Chunwei 已提交
179 180 181 182 183 184
                  }
                }
              }
            }
            if (pooling_type == "avg") {
              if (exclusive) {
185 186 187
                int div = (ew - sw) * (eh - sh);
                div = div > 0 ? div : 1;
                result /= div;
Y
Yan Chunwei 已提交
188
              } else {
189 190 191
                int bh = kernel_h;
                int bw = kernel_w;
                if (ew == win) {
192 193 194
                  bw = (sw + kernel_w) >= (win + paddings[3])
                           ? (win + paddings[3])
                           : (sw + kernel_w);
195
                  bw -= sw;
196 197
                  if ((sw - pad_w) < 0 &&
                      (sw + kernel_w) > (win + paddings[3])) {
198 199 200 201
                    bw += pad_w;
                  }
                }
                if (eh == hin) {
202 203 204
                  bh = (sh + kernel_h) >= (hin + paddings[1])
                           ? (hin + paddings[1])
                           : (sh + kernel_h);
205
                  bh -= sh;
206 207
                  if ((sh - pad_h) < 0 &&
                      (sh + kernel_h) > (hin + paddings[1])) {
208 209 210 211
                    bh += pad_h;
                  }
                }
                result /= bh * bw;
Y
Yan Chunwei 已提交
212 213
              }
            }
214
            dout[dst_ind] = result;
Y
Yan Chunwei 已提交
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
          }
        }
      }
    }
  }
}

TEST(pool_arm, init) {
  PoolCompute pool;
  ASSERT_EQ(pool.precision(), PRECISION(kFloat));
  ASSERT_EQ(pool.target(), TARGET(kARM));
}

TEST(pool_arm, compute) {
  PoolCompute pool;
  operators::PoolParam param;

  lite::Tensor x;
  lite::Tensor output;
  lite::Tensor output_ref;
235
#if 0
Y
Yan Chunwei 已提交
236 237 238 239 240 241 242
  // speedup for ci
  for (auto pooling_type : {"max", "avg"}) {
    for (auto ceil_mode : {true, false}) {
      for (auto global_pooling : {true, false}) {
        for (auto exclusive : {true, false}) {
          for (auto ksize : {2, 3}) {
            for (auto stride : {1, 2}) {
243 244 245 246 247 248
              for (auto pad_left : {0, 1}) {
                for (auto pad_right : {0, 1}) {
                  for (auto pad_top : {0, 1}) {
                    for (auto pad_bottom : {0, 1}) {
                      for (auto n : {1, 2}) {
                        for (auto c : {1, 3}) {
Y
Yan Chunwei 已提交
249
#if 1
250 251
                          for (auto h : {2, 3, 4, 11}) {
                            for (auto w : {2, 3, 4, 11}) {
Y
Yan Chunwei 已提交
252
#else
253 254
                          for (int h = 2; h < 25; h++) {
                            for (int w = 2; w < 25; w++) {
Y
Yan Chunwei 已提交
255
#endif
256 257 258 259 260 261 262 263 264 265 266
                              VLOG(3) << "n:" << n << " c:" << c << " h:" << h
                                      << " w:" << w << " ksize:" << ksize
                                      << " stride:" << stride
                                      << " pad_left:" << pad_left
                                      << " pad_right:" << pad_right
                                      << " pad_top:" << pad_top
                                      << " pad_bottom:" << pad_bottom
                                      << " exclusive:" << exclusive
                                      << " global_pooling:" << global_pooling
                                      << " ceil_mode: " << ceil_mode
                                      << " pooling_type:" << pooling_type;
Y
Yan Chunwei 已提交
267

268 269 270 271 272 273 274 275
                              // init x, output
                              x.Resize(
                                  DDim(std::vector<int64_t>({n, c, h, w})));
                              auto* x_data = x.mutable_data<float>();
                              for (int i = 0; i < x.dims().production(); ++i) {
                                float sign = i % 3 == 0 ? -0.03 : 0.05f;
                                x_data[i] = sign * (i % 128);
                              }
Y
Yan Chunwei 已提交
276

277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
                              // fill param
                              param.x = &x;
                              param.output = &output;
                              param.pooling_type = pooling_type;
                              if (global_pooling) {
                                param.ksize = {h, w};
                              } else {
                                param.ksize = {ksize, ksize};
                              }
                              param.global_pooling = global_pooling;
                              param.strides = {stride, stride};
                              std::vector<int> paddings = {
                                  pad_top, pad_bottom, pad_left, pad_right};
                              param.exclusive = exclusive;
                              param.paddings =
                                  std::make_shared<std::vector<int>>(paddings);
                              param.ceil_mode = ceil_mode;
                              param.adaptive = false;
                              param.use_quantizer = false;
Y
Yan Chunwei 已提交
296

297 298 299 300
                              const std::vector<int64_t>& output_shape =
                                  compute_output_shape(&param);
                              output.Resize(DDim(output_shape));
                              output_ref.Resize(DDim(output_shape));
Y
Yan Chunwei 已提交
301

302 303 304 305 306 307 308 309
                              auto* output_data = output.mutable_data<float>();
                              auto* output_ref_data =
                                  output_ref.mutable_data<float>();
                              for (int i = 0; i < output.dims().production();
                                   ++i) {
                                output_data[i] = -2;
                                output_ref_data[i] = -2;
                              }
Y
Yan Chunwei 已提交
310

311 312 313
                              // compute
                              pool.SetParam(param);
                              pool.Run();
Y
Yan Chunwei 已提交
314

315 316 317
                              // compute ref
                              param.output = &output_ref;
                              pool_compute_ref(param);
Y
Yan Chunwei 已提交
318

319 320 321 322 323 324 325 326 327
                              // compare
                              for (int i = 0; i < output.dims().production();
                                   i++) {
                                EXPECT_NEAR(
                                    output_data[i], output_ref_data[i], 1e-4);
                              }
                              VLOG(3) << "compare pass";
                            }
                          }
Y
Yan Chunwei 已提交
328 329 330 331 332 333 334 335 336 337 338 339
                        }
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  }
340
#endif
Y
Yan Chunwei 已提交
341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
}

TEST(pool_arm, retrive_op) {
  auto pool = KernelRegistry::Global().Create<TARGET(kARM), PRECISION(kFloat)>(
      "pool2d");
  ASSERT_FALSE(pool.empty());
  ASSERT_TRUE(pool.front());
}

}  // namespace arm
}  // namespace kernels
}  // namespace lite
}  // namespace paddle

USE_LITE_KERNEL(pool2d, kARM, kFloat, kNCHW, def);