pooling.cc 36.0 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
Y
Yi Wang 已提交
14
#include "paddle/fluid/operators/math/pooling.h"
C
chengduo 已提交
15 16
#include <algorithm>
#include <vector>
17 18 19 20 21

namespace paddle {
namespace operators {
namespace math {

D
dengkaipeng 已提交
22 23 24 25 26 27 28 29 30 31
static inline int ADAPT_START_INDEX(int ph, int input_size, int output_size) {
  return static_cast<int>(
      floor(static_cast<float>(ph * input_size) / output_size));
}

static inline int ADAPT_END_INDEX(int ph, int input_size, int output_size) {
  return static_cast<int>(
      ceil(static_cast<float>((ph + 1) * input_size) / output_size));
}

C
chengduoZH 已提交
32 33 34 35 36
/*
 * All tensors are in NCHW format.
 * Ksize, strides, paddings are two elements. These two elements represent
 * height and width, respectively.
 */
37
template <typename PoolProcess, typename T>
Q
QI JUN 已提交
38
class Pool2dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
39
 public:
Q
QI JUN 已提交
40
  void operator()(const platform::CPUDeviceContext& context,
C
chengduo 已提交
41
                  const framework::Tensor& input, const std::vector<int>& ksize,
42 43
                  const std::vector<int>& strides,
                  const std::vector<int>& paddings, PoolProcess pool_process,
D
dengkaipeng 已提交
44
                  bool exclusive, bool adaptive, framework::Tensor* output) {
45 46 47
    const int batch_size = input.dims()[0];
    const int input_height = input.dims()[2];
    const int input_width = input.dims()[3];
C
chengduoZH 已提交
48 49 50
    const int output_channels = output->dims()[1];
    const int output_height = output->dims()[2];
    const int output_width = output->dims()[3];
51 52 53 54 55 56 57 58 59 60 61
    const int ksize_height = ksize[0];
    const int ksize_width = ksize[1];
    const int stride_height = strides[0];
    const int stride_width = strides[1];
    const int padding_height = paddings[0];
    const int padding_width = paddings[1];

    const int input_stride = input_height * input_width;
    const int output_stride = output_height * output_width;

    const T* input_data = input.data<T>();
C
chengduoZH 已提交
62
    T* output_data = output->mutable_data<T>(context.GetPlace());
63

64 65
    int hstart, hend;
    int wstart, wend;
66 67 68
    for (int i = 0; i < batch_size; i++) {
      for (int c = 0; c < output_channels; ++c) {
        for (int ph = 0; ph < output_height; ++ph) {
D
dengkaipeng 已提交
69
          if (adaptive) {
70 71
            hstart = ADAPT_START_INDEX(ph, input_height, output_height);
            hend = ADAPT_END_INDEX(ph, input_height, output_height);
D
dengkaipeng 已提交
72
          } else {
73 74
            hstart = ph * stride_height - padding_height;
            hend = std::min(hstart + ksize_height, input_height);
D
dengkaipeng 已提交
75 76
            hstart = std::max(hstart, 0);
          }
77
          for (int pw = 0; pw < output_width; ++pw) {
D
dengkaipeng 已提交
78
            if (adaptive) {
79 80
              wstart = ADAPT_START_INDEX(pw, input_width, output_width);
              wend = ADAPT_END_INDEX(pw, input_width, output_width);
D
dengkaipeng 已提交
81
            } else {
82 83
              wstart = pw * stride_width - padding_width;
              wend = std::min(wstart + ksize_width, input_width);
D
dengkaipeng 已提交
84 85
              wstart = std::max(wstart, 0);
            }
86 87

            T ele = pool_process.initial();
88 89
            for (int h = hstart; h < hend; ++h) {
              for (int w = wstart; w < wend; ++w) {
C
chengduo 已提交
90
                pool_process.compute(input_data[h * input_width + w], &ele);
91 92
              }
            }
D
dengkaipeng 已提交
93 94 95
            int pool_size = (exclusive || adaptive)
                                ? (hend - hstart) * (wend - wstart)
                                : ksize_height * ksize_width;
C
chengduo 已提交
96
            pool_process.finalize(static_cast<T>(pool_size), &ele);
97 98 99 100 101 102 103 104 105 106
            output_data[ph * output_width + pw] = ele;
          }
        }
        input_data += input_stride;
        output_data += output_stride;
      }
    }
  }
};

C
chengduoZH 已提交
107 108 109 110 111
/*
* All tensors are in NCHW format.
* Ksize, strides, paddings are two elements. These two elements represent height
* and width, respectively.
*/
112
template <typename PoolProcess, class T>
Q
QI JUN 已提交
113
class Pool2dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> {
114
 public:
C
chengduo 已提交
115 116 117 118 119
  void operator()(
      const platform::CPUDeviceContext& context, const framework::Tensor& input,
      const framework::Tensor& output, const framework::Tensor& output_grad,
      const std::vector<int>& ksize, const std::vector<int>& strides,
      const std::vector<int>& paddings, PoolProcess pool_grad_process,
D
dengkaipeng 已提交
120
      bool exclusive, bool adaptive, framework::Tensor* input_grad) {
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
    const int batch_size = input.dims()[0];
    const int input_height = input.dims()[2];
    const int input_width = input.dims()[3];
    const int output_channels = output.dims()[1];
    const int output_height = output.dims()[2];
    const int output_width = output.dims()[3];
    const int ksize_height = ksize[0];
    const int ksize_width = ksize[1];
    const int stride_height = strides[0];
    const int stride_width = strides[1];
    const int padding_height = paddings[0];
    const int padding_width = paddings[1];
    const int input_stride = input_height * input_width;
    const int output_stride = output_height * output_width;

    const T* input_data = input.data<T>();
    const T* output_data = output.data<T>();
    const T* output_grad_data = output_grad.data<T>();
C
chengduoZH 已提交
139
    T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
140

141 142
    int hstart, hend;
    int wstart, wend;
143 144 145
    for (int i = 0; i < batch_size; i++) {
      for (int c = 0; c < output_channels; ++c) {
        for (int ph = 0; ph < output_height; ++ph) {
D
dengkaipeng 已提交
146
          if (adaptive) {
147 148
            hstart = ADAPT_START_INDEX(ph, input_height, output_height);
            hend = ADAPT_END_INDEX(ph, input_height, output_height);
D
dengkaipeng 已提交
149
          } else {
150 151
            hstart = ph * stride_height - padding_height;
            hend = std::min(hstart + ksize_height, input_height);
D
dengkaipeng 已提交
152 153
            hstart = std::max(hstart, 0);
          }
154
          for (int pw = 0; pw < output_width; ++pw) {
D
dengkaipeng 已提交
155
            if (adaptive) {
156 157
              wstart = ADAPT_START_INDEX(pw, input_width, output_width);
              wend = ADAPT_END_INDEX(pw, input_width, output_width);
D
dengkaipeng 已提交
158
            } else {
159 160
              wstart = pw * stride_width - padding_width;
              wend = std::min(wstart + ksize_width, input_width);
D
dengkaipeng 已提交
161 162 163 164 165
              wstart = std::max(wstart, 0);
            }
            int pool_size = (exclusive || adaptive)
                                ? (hend - hstart) * (wend - wstart)
                                : ksize_height * ksize_width;
166
            float scale = 1.0 / pool_size;
167 168
            for (int h = hstart; h < hend; ++h) {
              for (int w = wstart; w < wend; ++w) {
169 170 171 172
                pool_grad_process.compute(
                    input_data[h * input_width + w],
                    output_data[ph * output_width + pw],
                    output_grad_data[ph * output_width + pw],
C
chengduo 已提交
173 174
                    static_cast<T>(scale),
                    input_grad_data + h * input_width + w);
175 176 177 178 179 180 181 182 183 184 185 186 187
              }
            }
          }
        }
        input_data += input_stride;
        output_data += output_stride;
        input_grad_data += input_stride;
        output_grad_data += output_stride;
      }
    }
  }
};

C
chengduoZH 已提交
188 189 190 191 192
/*
 * All tensors are in NCHW format.
 * Ksize, strides, paddings are two elements. These two elements represent
 * height and width, respectively.
 */
193
template <class T>
Q
QI JUN 已提交
194
class MaxPool2dGradFunctor<platform::CPUDeviceContext, T> {
195
 public:
C
chengduo 已提交
196 197 198 199 200
  void operator()(
      const platform::CPUDeviceContext& context, const framework::Tensor& input,
      const framework::Tensor& output, const framework::Tensor& output_grad,
      const std::vector<int>& ksize, const std::vector<int>& strides,
      const std::vector<int>& paddings, framework::Tensor* input_grad) {
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
    const int batch_size = input.dims()[0];
    const int input_height = input.dims()[2];
    const int input_width = input.dims()[3];
    const int output_channels = output.dims()[1];
    const int output_height = output.dims()[2];
    const int output_width = output.dims()[3];
    const int ksize_height = ksize[0];
    const int ksize_width = ksize[1];
    const int stride_height = strides[0];
    const int stride_width = strides[1];
    const int padding_height = paddings[0];
    const int padding_width = paddings[1];
    const int input_stride = input_height * input_width;
    const int output_stride = output_height * output_width;

    const T* input_data = input.data<T>();
    const T* output_data = output.data<T>();
    const T* output_grad_data = output_grad.data<T>();
C
chengduoZH 已提交
219
    T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253

    for (int i = 0; i < batch_size; i++) {
      for (int c = 0; c < output_channels; ++c) {
        for (int ph = 0; ph < output_height; ++ph) {
          int hstart = ph * stride_height - padding_height;
          int hend = std::min(hstart + ksize_height, input_height);
          hstart = std::max(hstart, 0);
          for (int pw = 0; pw < output_width; ++pw) {
            int wstart = pw * stride_width - padding_width;
            int wend = std::min(wstart + ksize_width, input_width);
            wstart = std::max(wstart, 0);

            bool stop = false;
            for (int h = hstart; h < hend && !stop; ++h) {
              for (int w = wstart; w < wend && !stop; ++w) {
                int input_idx = h * input_width + w;
                int output_idx = ph * output_width + pw;
                if (input_data[input_idx] == output_data[output_idx]) {
                  input_grad_data[input_idx] += output_grad_data[output_idx];
                  stop = true;
                }
              }
            }
          }
        }
        input_data += input_stride;
        output_data += output_stride;
        input_grad_data += input_stride;
        output_grad_data += output_stride;
      }
    }
  }
};

Q
QI JUN 已提交
254 255
template class MaxPool2dGradFunctor<platform::CPUDeviceContext, float>;
template class MaxPool2dGradFunctor<platform::CPUDeviceContext, double>;
C
chengduoZH 已提交
256

Q
QI JUN 已提交
257
template class Pool2dFunctor<platform::CPUDeviceContext,
258
                             paddle::operators::math::MaxPool<float>, float>;
Q
QI JUN 已提交
259
template class Pool2dFunctor<platform::CPUDeviceContext,
260
                             paddle::operators::math::AvgPool<float>, float>;
Q
QI JUN 已提交
261 262 263 264 265 266 267
template class Pool2dGradFunctor<platform::CPUDeviceContext,
                                 paddle::operators::math::MaxPoolGrad<float>,
                                 float>;
template class Pool2dGradFunctor<platform::CPUDeviceContext,
                                 paddle::operators::math::AvgPoolGrad<float>,
                                 float>;
template class Pool2dFunctor<platform::CPUDeviceContext,
268
                             paddle::operators::math::MaxPool<double>, double>;
Q
QI JUN 已提交
269
template class Pool2dFunctor<platform::CPUDeviceContext,
270
                             paddle::operators::math::AvgPool<double>, double>;
Q
QI JUN 已提交
271 272 273 274 275 276
template class Pool2dGradFunctor<platform::CPUDeviceContext,
                                 paddle::operators::math::MaxPoolGrad<double>,
                                 double>;
template class Pool2dGradFunctor<platform::CPUDeviceContext,
                                 paddle::operators::math::AvgPoolGrad<double>,
                                 double>;
277

C
chengduoZH 已提交
278 279 280 281 282
/*
 * All tensors are in NCDHW format.
 * Ksize, strides, paddings are three elements. These three elements represent
 * depth, height and width, respectively.
 */
283
template <typename PoolProcess, class T>
Q
QI JUN 已提交
284
class Pool3dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
285
 public:
Q
QI JUN 已提交
286
  void operator()(const platform::CPUDeviceContext& context,
C
chengduo 已提交
287
                  const framework::Tensor& input, const std::vector<int>& ksize,
288 289
                  const std::vector<int>& strides,
                  const std::vector<int>& paddings, PoolProcess pool_process,
D
dengkaipeng 已提交
290
                  bool exclusive, bool adaptive, framework::Tensor* output) {
291 292 293 294
    const int batch_size = input.dims()[0];
    const int input_depth = input.dims()[2];
    const int input_height = input.dims()[3];
    const int input_width = input.dims()[4];
C
chengduoZH 已提交
295 296 297 298
    const int output_channels = output->dims()[1];
    const int output_depth = output->dims()[2];
    const int output_height = output->dims()[3];
    const int output_width = output->dims()[4];
299 300 301 302 303 304 305 306 307 308 309 310 311 312
    const int ksize_depth = ksize[0];
    const int ksize_height = ksize[1];
    const int ksize_width = ksize[2];
    const int stride_depth = strides[0];
    const int stride_height = strides[1];
    const int stride_width = strides[2];
    const int padding_depth = paddings[0];
    const int padding_height = paddings[1];
    const int padding_width = paddings[2];

    const int input_stride = input_depth * input_height * input_width;
    const int output_stride = output_depth * output_height * output_width;

    const T* input_data = input.data<T>();
C
chengduoZH 已提交
313
    T* output_data = output->mutable_data<T>(context.GetPlace());
314

315 316 317
    int dstart, dend;
    int hstart, hend;
    int wstart, wend;
318 319 320
    for (int i = 0; i < batch_size; i++) {
      for (int c = 0; c < output_channels; ++c) {
        for (int pd = 0; pd < output_depth; ++pd) {
D
dengkaipeng 已提交
321
          if (adaptive) {
322 323
            dstart = ADAPT_START_INDEX(pd, input_depth, output_depth);
            dend = ADAPT_END_INDEX(pd, input_depth, output_depth);
D
dengkaipeng 已提交
324
          } else {
325 326
            dstart = pd * stride_depth - padding_depth;
            dend = std::min(dstart + ksize_depth, input_depth);
D
dengkaipeng 已提交
327 328
            dstart = std::max(dstart, 0);
          }
329
          for (int ph = 0; ph < output_height; ++ph) {
D
dengkaipeng 已提交
330
            if (adaptive) {
331 332
              hstart = ADAPT_START_INDEX(ph, input_height, output_height);
              hend = ADAPT_END_INDEX(ph, input_height, output_height);
D
dengkaipeng 已提交
333
            } else {
334 335
              hstart = ph * stride_height - padding_height;
              hend = std::min(hstart + ksize_height, input_height);
D
dengkaipeng 已提交
336 337
              hstart = std::max(hstart, 0);
            }
338
            for (int pw = 0; pw < output_width; ++pw) {
D
dengkaipeng 已提交
339
              if (adaptive) {
340 341
                wstart = ADAPT_START_INDEX(pw, input_width, output_width);
                wend = ADAPT_END_INDEX(pw, input_width, output_width);
D
dengkaipeng 已提交
342
              } else {
343 344
                wstart = pw * stride_width - padding_width;
                wend = std::min(wstart + ksize_width, input_width);
D
dengkaipeng 已提交
345 346
                wstart = std::max(wstart, 0);
              }
347
              int output_idx = (pd * output_height + ph) * output_width + pw;
348
              T ele = pool_process.initial();
349 350 351
              for (int d = dstart; d < dend; ++d) {
                for (int h = hstart; h < hend; ++h) {
                  for (int w = wstart; w < wend; ++w) {
352
                    pool_process.compute(
C
chengduo 已提交
353 354
                        input_data[(d * input_height + h) * input_width + w],
                        &ele);
355 356 357
                  }
                }
              }
358
              int pool_size =
D
dengkaipeng 已提交
359
                  (exclusive || adaptive)
360 361
                      ? (dend - dstart) * (hend - hstart) * (wend - wstart)
                      : ksize_depth * ksize_height * ksize_width;
C
chengduo 已提交
362
              pool_process.finalize(static_cast<T>(pool_size), &ele);
363 364 365 366 367 368 369 370 371 372 373
              output_data[output_idx] = ele;
            }
          }
        }
        input_data += input_stride;
        output_data += output_stride;
      }
    }
  }
};

C
chengduoZH 已提交
374 375 376 377 378
/*
 * All tensors are in NCDHW format.
 * Ksize, strides, paddings are three elements. These three elements represent
 * depth, height and width, respectively.
 */
379
template <typename PoolProcess, class T>
Q
QI JUN 已提交
380
class Pool3dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> {
381
 public:
C
chengduo 已提交
382 383 384 385 386
  void operator()(
      const platform::CPUDeviceContext& context, const framework::Tensor& input,
      const framework::Tensor& output, const framework::Tensor& output_grad,
      const std::vector<int>& ksize, const std::vector<int>& strides,
      const std::vector<int>& paddings, PoolProcess pool_grad_process,
D
dengkaipeng 已提交
387
      bool exclusive, bool adaptive, framework::Tensor* input_grad) {
388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
    const int batch_size = input.dims()[0];
    const int input_depth = input.dims()[2];
    const int input_height = input.dims()[3];
    const int input_width = input.dims()[4];
    const int output_channels = output.dims()[1];
    const int output_depth = output.dims()[2];
    const int output_height = output.dims()[3];
    const int output_width = output.dims()[4];
    const int ksize_depth = ksize[0];
    const int ksize_height = ksize[1];
    const int ksize_width = ksize[2];
    const int stride_depth = strides[0];
    const int stride_height = strides[1];
    const int stride_width = strides[2];
    const int padding_depth = paddings[0];
    const int padding_height = paddings[1];
    const int padding_width = paddings[2];
    const int input_stride = input_depth * input_height * input_width;
    const int output_stride = output_depth * output_height * output_width;

    const T* input_data = input.data<T>();
    const T* output_data = output.data<T>();
    const T* output_grad_data = output_grad.data<T>();
C
chengduoZH 已提交
411
    T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
412

413 414 415
    int dstart, dend;
    int hstart, hend;
    int wstart, wend;
416 417 418
    for (int i = 0; i < batch_size; i++) {
      for (int c = 0; c < output_channels; ++c) {
        for (int pd = 0; pd < output_depth; ++pd) {
D
dengkaipeng 已提交
419
          if (adaptive) {
420 421
            dstart = ADAPT_START_INDEX(pd, input_depth, output_depth);
            dend = ADAPT_END_INDEX(pd, input_depth, output_depth);
D
dengkaipeng 已提交
422
          } else {
423 424
            dstart = pd * stride_depth - padding_depth;
            dend = std::min(dstart + ksize_depth, input_depth);
D
dengkaipeng 已提交
425 426
            dstart = std::max(dstart, 0);
          }
427
          for (int ph = 0; ph < output_height; ++ph) {
D
dengkaipeng 已提交
428
            if (adaptive) {
429 430
              hstart = ADAPT_START_INDEX(ph, input_height, output_height);
              hend = ADAPT_END_INDEX(ph, input_height, output_height);
D
dengkaipeng 已提交
431
            } else {
432 433
              hstart = ph * stride_height - padding_height;
              hend = std::min(hstart + ksize_height, input_height);
D
dengkaipeng 已提交
434 435
              hstart = std::max(hstart, 0);
            }
436
            for (int pw = 0; pw < output_width; ++pw) {
D
dengkaipeng 已提交
437
              if (adaptive) {
438 439
                wstart = ADAPT_START_INDEX(pw, input_width, output_width);
                wend = ADAPT_END_INDEX(pw, input_width, output_width);
D
dengkaipeng 已提交
440
              } else {
441 442
                wstart = pw * stride_width - padding_width;
                wend = std::min(wstart + ksize_width, input_width);
D
dengkaipeng 已提交
443 444
                wstart = std::max(wstart, 0);
              }
445

446
              int pool_size =
D
dengkaipeng 已提交
447
                  (exclusive || adaptive)
448 449
                      ? (dend - dstart) * (hend - hstart) * (wend - wstart)
                      : ksize_depth * ksize_height * ksize_width;
450
              float scale = 1.0 / pool_size;
451 452 453 454 455 456
              for (int d = dstart; d < dend; ++d) {
                for (int h = hstart; h < hend; ++h) {
                  for (int w = wstart; w < wend; ++w) {
                    int input_idx = (d * input_height + h) * input_width + w;
                    int output_idx =
                        (pd * output_height + ph) * output_width + pw;
457
                    pool_grad_process.compute(
458
                        input_data[input_idx], output_data[output_idx],
C
chengduo 已提交
459 460
                        output_grad_data[output_idx], static_cast<T>(scale),
                        input_grad_data + input_idx);
461 462 463 464 465 466
                  }
                }
              }
            }
          }
        }
467 468 469 470
        input_data += input_stride;
        output_data += output_stride;
        input_grad_data += input_stride;
        output_grad_data += output_stride;
471 472 473 474 475
      }
    }
  }
};

C
chengduoZH 已提交
476 477 478 479 480
/*
 * All tensors are in NCDHW format.
 * Ksize, strides, paddings are three elements. These three elements represent
 * depth, height and width, respectively.
 */
481
template <class T>
Q
QI JUN 已提交
482
class MaxPool3dGradFunctor<platform::CPUDeviceContext, T> {
483
 public:
C
chengduo 已提交
484 485 486 487 488
  void operator()(
      const platform::CPUDeviceContext& context, const framework::Tensor& input,
      const framework::Tensor& output, const framework::Tensor& output_grad,
      const std::vector<int>& ksize, const std::vector<int>& strides,
      const std::vector<int>& paddings, framework::Tensor* input_grad) {
489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511
    const int batch_size = input.dims()[0];
    const int input_depth = input.dims()[2];
    const int input_height = input.dims()[3];
    const int input_width = input.dims()[4];
    const int output_channels = output.dims()[1];
    const int output_depth = output.dims()[2];
    const int output_height = output.dims()[3];
    const int output_width = output.dims()[4];
    const int ksize_depth = ksize[0];
    const int ksize_height = ksize[1];
    const int ksize_width = ksize[2];
    const int stride_depth = strides[0];
    const int stride_height = strides[1];
    const int stride_width = strides[2];
    const int padding_depth = paddings[0];
    const int padding_height = paddings[1];
    const int padding_width = paddings[2];
    const int input_stride = input_depth * input_height * input_width;
    const int output_stride = output_depth * output_height * output_width;

    const T* input_data = input.data<T>();
    const T* output_data = output.data<T>();
    const T* output_grad_data = output_grad.data<T>();
C
chengduoZH 已提交
512
    T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555

    for (int i = 0; i < batch_size; i++) {
      for (int c = 0; c < output_channels; ++c) {
        for (int pd = 0; pd < output_depth; ++pd) {
          int dstart = pd * stride_depth - padding_depth;
          int dend = std::min(dstart + ksize_depth, input_depth);
          dstart = std::max(dstart, 0);
          for (int ph = 0; ph < output_height; ++ph) {
            int hstart = ph * stride_height - padding_height;
            int hend = std::min(hstart + ksize_height, input_height);
            hstart = std::max(hstart, 0);
            for (int pw = 0; pw < output_width; ++pw) {
              int wstart = pw * stride_width - padding_width;
              int wend = std::min(wstart + ksize_width, input_width);
              wstart = std::max(wstart, 0);
              bool stop = false;
              for (int d = dstart; d < dend && !stop; ++d) {
                for (int h = hstart; h < hend && !stop; ++h) {
                  for (int w = wstart; w < wend && !stop; ++w) {
                    int input_idx = (d * input_height + h) * input_width + w;
                    int output_idx =
                        (pd * output_height + ph) * output_width + pw;

                    if (input_data[input_idx] == output_data[output_idx]) {
                      input_grad_data[input_idx] +=
                          output_grad_data[output_idx];
                      stop = true;
                    }
                  }
                }
              }
            }
          }
        }
        input_data += input_stride;
        output_data += output_stride;
        input_grad_data += input_stride;
        output_grad_data += output_stride;
      }
    }
  }
};

Q
QI JUN 已提交
556 557
template class MaxPool3dGradFunctor<platform::CPUDeviceContext, float>;
template class MaxPool3dGradFunctor<platform::CPUDeviceContext, double>;
C
chengduoZH 已提交
558

Q
QI JUN 已提交
559
template class Pool3dFunctor<platform::CPUDeviceContext,
560
                             paddle::operators::math::MaxPool<float>, float>;
Q
QI JUN 已提交
561
template class Pool3dFunctor<platform::CPUDeviceContext,
562
                             paddle::operators::math::AvgPool<float>, float>;
Q
QI JUN 已提交
563 564 565 566 567 568 569
template class Pool3dGradFunctor<platform::CPUDeviceContext,
                                 paddle::operators::math::MaxPoolGrad<float>,
                                 float>;
template class Pool3dGradFunctor<platform::CPUDeviceContext,
                                 paddle::operators::math::AvgPoolGrad<float>,
                                 float>;
template class Pool3dFunctor<platform::CPUDeviceContext,
570
                             paddle::operators::math::MaxPool<double>, double>;
Q
QI JUN 已提交
571
template class Pool3dFunctor<platform::CPUDeviceContext,
572
                             paddle::operators::math::AvgPool<double>, double>;
Q
QI JUN 已提交
573 574 575 576 577 578
template class Pool3dGradFunctor<platform::CPUDeviceContext,
                                 paddle::operators::math::MaxPoolGrad<double>,
                                 double>;
template class Pool3dGradFunctor<platform::CPUDeviceContext,
                                 paddle::operators::math::AvgPoolGrad<double>,
                                 double>;
C
chengduoZH 已提交
579

C
chengduoZH 已提交
580 581 582 583 584
/*
 * All tensors are in NCHW format.
 * Ksize, strides, paddings are two elements. These two elements represent
 * height and width, respectively.
 */
C
chengduoZH 已提交
585
template <typename T1, typename T2>
Q
QI JUN 已提交
586
class MaxPool2dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> {
C
chengduoZH 已提交
587
 public:
Q
QI JUN 已提交
588
  void operator()(const platform::CPUDeviceContext& context,
C
chengduo 已提交
589 590
                  const framework::Tensor& input, const std::vector<int>& ksize,
                  const std::vector<int>& strides,
D
dengkaipeng 已提交
591 592
                  const std::vector<int>& paddings, bool adaptive,
                  framework::Tensor* output, framework::Tensor* mask) {
C
chengduoZH 已提交
593 594 595
    const int batch_size = input.dims()[0];
    const int input_height = input.dims()[2];
    const int input_width = input.dims()[3];
C
chengduoZH 已提交
596 597 598
    const int output_channels = output->dims()[1];
    const int output_height = output->dims()[2];
    const int output_width = output->dims()[3];
C
chengduoZH 已提交
599 600 601 602 603 604 605 606 607
    const int ksize_height = ksize[0];
    const int ksize_width = ksize[1];
    const int stride_height = strides[0];
    const int stride_width = strides[1];
    const int padding_height = paddings[0];
    const int padding_width = paddings[1];
    const int input_stride = input_height * input_width;
    const int output_stride = output_height * output_width;

C
chengduoZH 已提交
608 609 610
    const T1* input_data = input.data<T1>();
    T1* output_data = output->mutable_data<T1>(context.GetPlace());
    T2* mask_data = mask->mutable_data<T2>(context.GetPlace());
C
chengduoZH 已提交
611

612 613
    int hstart, hend;
    int wstart, wend;
C
chengduoZH 已提交
614 615 616
    for (int i = 0; i < batch_size; i++) {
      for (int c = 0; c < output_channels; ++c) {
        for (int ph = 0; ph < output_height; ++ph) {
D
dengkaipeng 已提交
617
          if (adaptive) {
618 619
            hstart = ADAPT_START_INDEX(ph, input_height, output_height);
            hend = ADAPT_END_INDEX(ph, input_height, output_height);
D
dengkaipeng 已提交
620
          } else {
621 622
            hstart = ph * stride_height - padding_height;
            hend = std::min(hstart + ksize_height, input_height);
D
dengkaipeng 已提交
623 624
            hstart = std::max(hstart, 0);
          }
C
chengduoZH 已提交
625
          for (int pw = 0; pw < output_width; ++pw) {
D
dengkaipeng 已提交
626
            if (adaptive) {
627 628
              wstart = ADAPT_START_INDEX(pw, input_width, output_width);
              wend = ADAPT_END_INDEX(pw, input_width, output_width);
D
dengkaipeng 已提交
629
            } else {
630 631
              wstart = pw * stride_width - padding_width;
              wend = std::min(wstart + ksize_width, input_width);
D
dengkaipeng 已提交
632 633
              wstart = std::max(wstart, 0);
            }
C
chengduoZH 已提交
634

C
chengduoZH 已提交
635
            T1 ele = static_cast<T1>(-FLT_MAX);
C
chengduoZH 已提交
636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657
            int index = -1;
            for (int h = hstart; h < hend; ++h) {
              for (int w = wstart; w < wend; ++w) {
                if (ele < input_data[h * input_width + w]) {
                  ele = input_data[h * input_width + w];
                  index = h * input_width + w;
                }
              }
            }
            output_data[ph * output_width + pw] = ele;
            mask_data[ph * output_width + pw] = index;
          }
        }
        // offset
        input_data += input_stride;
        output_data += output_stride;
        mask_data += output_stride;
      }
    }
  }
};

C
chengduoZH 已提交
658 659 660 661 662
/*
 * All tensors are in NCHW format.
 * Ksize, strides, paddings are two elements. These two elements represent
 * height and width, respectively.
 */
C
chengduoZH 已提交
663
template <typename T1, typename T2>
Q
QI JUN 已提交
664
class MaxPool2dWithIndexGradFunctor<platform::CPUDeviceContext, T1, T2> {
C
chengduoZH 已提交
665
 public:
Q
QI JUN 已提交
666
  void operator()(const platform::CPUDeviceContext& context,
C
chengduoZH 已提交
667
                  const framework::Tensor& output_grad,
C
chengduo 已提交
668 669
                  const framework::Tensor& mask, const std::vector<int>& ksize,
                  const std::vector<int>& strides,
670
                  const std::vector<int>& paddings, bool adaptive,
C
chengduoZH 已提交
671 672 673 674
                  framework::Tensor* input_grad) {
    const int batch_size = input_grad->dims()[0];
    const int input_height = input_grad->dims()[2];
    const int input_width = input_grad->dims()[3];
C
chengduoZH 已提交
675 676 677 678 679 680
    const int output_channels = output_grad.dims()[1];
    const int output_height = output_grad.dims()[2];
    const int output_width = output_grad.dims()[3];
    const int input_stride = input_height * input_width;
    const int output_stride = output_height * output_width;

C
chengduoZH 已提交
681 682 683
    const T2* mask_data = mask.data<T2>();
    const T1* output_grad_data = output_grad.data<T1>();
    T1* input_grad_data = input_grad->mutable_data<T1>(context.GetPlace());
C
chengduoZH 已提交
684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702

    for (int n = 0; n < batch_size; ++n) {
      for (int c = 0; c < output_channels; ++c) {
        for (int ph = 0; ph < output_height; ++ph) {
          for (int pw = 0; pw < output_width; ++pw) {
            const int output_idx = ph * output_width + pw;
            const int input_idx = static_cast<int>(mask_data[output_idx]);
            input_grad_data[input_idx] += output_grad_data[output_idx];
          }
        }
        // offset
        input_grad_data += input_stride;
        output_grad_data += output_stride;
        mask_data += output_stride;
      }
    }
  }
};

Q
QI JUN 已提交
703 704 705 706 707 708 709 710
template class MaxPool2dWithIndexFunctor<platform::CPUDeviceContext, float,
                                         int>;
template class MaxPool2dWithIndexGradFunctor<platform::CPUDeviceContext, float,
                                             int>;
template class MaxPool2dWithIndexFunctor<platform::CPUDeviceContext, double,
                                         int>;
template class MaxPool2dWithIndexGradFunctor<platform::CPUDeviceContext, double,
                                             int>;
C
chengduoZH 已提交
711

C
chengduoZH 已提交
712 713 714 715 716
/*
 * All tensors are in NCDHW format.
 * Ksize, strides, paddings are three elements. These three elements represent
 * depth, height and width, respectively.
 */
C
chengduoZH 已提交
717
template <typename T1, typename T2>
Q
QI JUN 已提交
718
class MaxPool3dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> {
C
chengduoZH 已提交
719
 public:
Q
QI JUN 已提交
720
  void operator()(const platform::CPUDeviceContext& context,
C
chengduo 已提交
721 722
                  const framework::Tensor& input, const std::vector<int>& ksize,
                  const std::vector<int>& strides,
723 724
                  const std::vector<int>& paddings, bool adaptive,
                  framework::Tensor* output, framework::Tensor* mask) {
C
chengduoZH 已提交
725 726 727 728
    const int batch_size = input.dims()[0];
    const int input_depth = input.dims()[2];
    const int input_height = input.dims()[3];
    const int input_width = input.dims()[4];
C
chengduoZH 已提交
729 730 731 732
    const int output_channels = output->dims()[1];
    const int output_depth = output->dims()[2];
    const int output_height = output->dims()[3];
    const int output_width = output->dims()[4];
C
chengduoZH 已提交
733 734 735 736 737 738 739 740 741 742 743 744
    const int ksize_depth = ksize[0];
    const int ksize_height = ksize[1];
    const int ksize_width = ksize[2];
    const int stride_depth = strides[0];
    const int stride_height = strides[1];
    const int stride_width = strides[2];
    const int padding_depth = paddings[0];
    const int padding_height = paddings[1];
    const int padding_width = paddings[2];
    const int input_stride = input_depth * input_height * input_width;
    const int output_stride = output_depth * output_height * output_width;

C
chengduoZH 已提交
745 746 747
    const T1* input_data = input.data<T1>();
    T1* output_data = output->mutable_data<T1>(context.GetPlace());
    T2* mask_data = mask->mutable_data<T2>(context.GetPlace());
C
chengduoZH 已提交
748

749 750 751
    int dstart, dend;
    int hstart, hend;
    int wstart, wend;
C
chengduoZH 已提交
752 753 754
    for (int i = 0; i < batch_size; i++) {
      for (int c = 0; c < output_channels; ++c) {
        for (int pd = 0; pd < output_depth; ++pd) {
D
dengkaipeng 已提交
755
          if (adaptive) {
756 757
            dstart = ADAPT_START_INDEX(pd, input_depth, output_depth);
            dend = ADAPT_END_INDEX(pd, input_depth, output_depth);
D
dengkaipeng 已提交
758
          } else {
759 760
            dstart = pd * stride_depth - padding_depth;
            dend = std::min(dstart + ksize_depth, input_depth);
D
dengkaipeng 已提交
761 762
            dstart = std::max(dstart, 0);
          }
C
chengduoZH 已提交
763
          for (int ph = 0; ph < output_height; ++ph) {
D
dengkaipeng 已提交
764
            if (adaptive) {
765 766
              hstart = ADAPT_START_INDEX(ph, input_height, output_height);
              hend = ADAPT_END_INDEX(ph, input_height, output_height);
D
dengkaipeng 已提交
767
            } else {
768 769
              hstart = ph * stride_height - padding_height;
              hend = std::min(hstart + ksize_height, input_height);
D
dengkaipeng 已提交
770 771
              hstart = std::max(hstart, 0);
            }
C
chengduoZH 已提交
772
            for (int pw = 0; pw < output_width; ++pw) {
D
dengkaipeng 已提交
773
              if (adaptive) {
774 775
                wstart = ADAPT_START_INDEX(pw, input_width, output_width);
                wend = ADAPT_END_INDEX(pw, input_width, output_width);
D
dengkaipeng 已提交
776
              } else {
777 778
                wstart = pw * stride_width - padding_width;
                wend = std::min(wstart + ksize_width, input_width);
D
dengkaipeng 已提交
779 780
                wstart = std::max(wstart, 0);
              }
C
chengduoZH 已提交
781 782

              int output_idx = (pd * output_height + ph) * output_width + pw;
C
chengduoZH 已提交
783
              T1 ele = static_cast<T1>(-FLT_MAX);
C
chengduoZH 已提交
784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809
              int index = -1;
              for (int d = dstart; d < dend; ++d) {
                for (int h = hstart; h < hend; ++h) {
                  for (int w = wstart; w < wend; ++w) {
                    int input_idx = (d * input_height + h) * input_width + w;
                    if (ele < input_data[input_idx]) {
                      index = input_idx;
                      ele = input_data[input_idx];
                    }
                  }
                }
              }
              output_data[output_idx] = ele;
              mask_data[output_idx] = index;
            }
          }
        }
        // offset
        input_data += input_stride;
        output_data += output_stride;
        mask_data += output_stride;
      }
    }
  }
};

C
chengduoZH 已提交
810 811 812 813 814
/*
 * All tensors are in NCDHW format.
 * Ksize, strides, paddings are three elements. These three elements represent
 * depth, height and width, respectively.
 */
C
chengduoZH 已提交
815
template <typename T1, typename T2>
Q
QI JUN 已提交
816
class MaxPool3dWithIndexGradFunctor<platform::CPUDeviceContext, T1, T2> {
C
chengduoZH 已提交
817
 public:
Q
QI JUN 已提交
818
  void operator()(const platform::CPUDeviceContext& context,
C
chengduoZH 已提交
819
                  const framework::Tensor& output_grad,
C
chengduo 已提交
820 821
                  const framework::Tensor& mask, const std::vector<int>& ksize,
                  const std::vector<int>& strides,
822
                  const std::vector<int>& paddings, bool adaptive,
C
chengduoZH 已提交
823 824 825 826 827
                  framework::Tensor* input_grad) {
    const int batch_size = input_grad->dims()[0];
    const int input_depth = input_grad->dims()[2];
    const int input_height = input_grad->dims()[3];
    const int input_width = input_grad->dims()[4];
C
chengduoZH 已提交
828 829 830 831 832 833 834
    const int output_channels = output_grad.dims()[1];
    const int output_depth = output_grad.dims()[2];
    const int output_height = output_grad.dims()[3];
    const int output_width = output_grad.dims()[4];
    const int input_stride = input_depth * input_height * input_width;
    const int output_stride = output_depth * output_height * output_width;

C
chengduoZH 已提交
835 836 837
    const T2* mask_data = mask.data<T2>();
    const T1* output_grad_data = output_grad.data<T1>();
    T1* input_grad_data = input_grad->mutable_data<T1>(context.GetPlace());
C
chengduoZH 已提交
838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859

    for (int n = 0; n < batch_size; ++n) {
      for (int c = 0; c < output_channels; ++c) {
        for (int pd = 0; pd < output_depth; ++pd) {
          for (int ph = 0; ph < output_height; ++ph) {
            for (int pw = 0; pw < output_width; ++pw) {
              const int output_idx =
                  (pd * output_height + ph) * output_width + pw;
              const int input_idx = static_cast<int>(mask_data[output_idx]);
              input_grad_data[input_idx] += output_grad_data[output_idx];
            }
          }
        }
        // offset
        input_grad_data += input_stride;
        output_grad_data += output_stride;
        mask_data += output_stride;
      }
    }
  }
};

Q
QI JUN 已提交
860 861 862 863 864 865 866 867
template class MaxPool3dWithIndexFunctor<platform::CPUDeviceContext, float,
                                         int>;
template class MaxPool3dWithIndexGradFunctor<platform::CPUDeviceContext, float,
                                             int>;
template class MaxPool3dWithIndexFunctor<platform::CPUDeviceContext, double,
                                         int>;
template class MaxPool3dWithIndexGradFunctor<platform::CPUDeviceContext, double,
                                             int>;
868 869 870
}  // namespace math
}  // namespace operators
}  // namespace paddle